X86: Match the SSE/AVX min/max vector ops using a custom node instead of intrinsics

This is very mechanical, no functionality change. Preparation for PR14667.

llvm-svn: 170898
This commit is contained in:
Benjamin Kramer 2012-12-21 14:04:55 +00:00
parent e30a84f08f
commit 4669d18893
5 changed files with 171 additions and 97 deletions

View File

@ -10154,6 +10154,40 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
// SSE2/SSE41/AVX2 integer max/min intrinsics.
case Intrinsic::x86_sse2_pmaxu_b:
case Intrinsic::x86_sse41_pmaxuw:
case Intrinsic::x86_sse41_pmaxud:
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
return DAG.getNode(X86ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
return DAG.getNode(X86ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
return DAG.getNode(X86ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
// AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
@ -11974,6 +12008,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::UMAX: return "X86ISD::UMAX";
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMAXC: return "X86ISD::FMAXC";

View File

@ -197,6 +197,12 @@ namespace llvm {
/// FHSUB - Floating point horizontal sub.
FHSUB,
/// UMAX, UMIN - Unsigned integer max and min.
UMAX, UMIN,
/// SMAX, SMIN - Signed integer max and min.
SMAX, SMIN,
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,

View File

@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>]>;
def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;

View File

@ -775,6 +775,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
{ X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
{ X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
{ X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
{ X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
{ X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
{ X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
{ X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
{ X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
{ X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
@ -951,6 +959,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
{ X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
{ X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
{ X86::VPMINSBrr, X86::VPMINSBrm, TB_ALIGN_16 },
{ X86::VPMINSDrr, X86::VPMINSDrm, TB_ALIGN_16 },
{ X86::VPMINUDrr, X86::VPMINUDrm, TB_ALIGN_16 },
{ X86::VPMINUWrr, X86::VPMINUWrm, TB_ALIGN_16 },
{ X86::VPMAXSBrr, X86::VPMAXSBrm, TB_ALIGN_16 },
{ X86::VPMAXSDrr, X86::VPMAXSDrm, TB_ALIGN_16 },
{ X86::VPMAXUDrr, X86::VPMAXUDrm, TB_ALIGN_16 },
{ X86::VPMAXUWrr, X86::VPMAXUWrm, TB_ALIGN_16 },
{ X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
{ X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
{ X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
@ -1092,6 +1108,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
{ X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
{ X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
{ X86::VPMINSBYrr, X86::VPMINSBYrm, TB_ALIGN_32 },
{ X86::VPMINSDYrr, X86::VPMINSDYrm, TB_ALIGN_32 },
{ X86::VPMINUDYrr, X86::VPMINUDYrm, TB_ALIGN_32 },
{ X86::VPMINUWYrr, X86::VPMINUWYrm, TB_ALIGN_32 },
{ X86::VPMAXSBYrr, X86::VPMAXSBYrm, TB_ALIGN_32 },
{ X86::VPMAXSDYrr, X86::VPMAXSDYrm, TB_ALIGN_32 },
{ X86::VPMAXUDYrr, X86::VPMAXUDYrm, TB_ALIGN_32 },
{ X86::VPMAXUWYrr, X86::VPMAXUWYrm, TB_ALIGN_32 },
{ X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
{ X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
{ X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },

View File

@ -3733,6 +3733,14 @@ defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128,
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
defm VPMINUB : PDI_binop_rm<0xDA, "vpminub", X86umin, v16i8, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMINSW : PDI_binop_rm<0xEA, "vpminsw", X86smin, v8i16, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMAXUB : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v16i8, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMAXSW : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v8i16, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
// Intrinsic forms
defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
@ -3768,18 +3776,6 @@ defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
@ -3813,6 +3809,18 @@ defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256,
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, memopv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINUBY : PDI_binop_rm<0xDA, "vpminub", X86umin, v32i8,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINSWY : PDI_binop_rm<0xEA, "vpminsw", X86smin, v16i16,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXUBY : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v32i8,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXSWY : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v16i16,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
// Intrinsic forms
defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
@ -3848,18 +3856,6 @@ defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
VR256, memopv4i64, i256mem,
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
@ -3890,6 +3886,14 @@ defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P>;
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
defm PMINUB : PDI_binop_rm<0xDA, "pminub", X86umin, v16i8, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1>;
defm PMINSW : PDI_binop_rm<0xEA, "pminsw", X86smin, v8i16, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1>;
defm PMAXUB : PDI_binop_rm<0xDE, "pmaxub", X86umax, v16i8, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1>;
defm PMAXSW : PDI_binop_rm<0xEE, "pmaxsw", X86smax, v8i16, VR128, memopv2i64,
i128mem, SSE_INTALU_ITINS_P, 1>;
// Intrinsic forms
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
@ -3925,18 +3929,6 @@ defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
VR128, memopv2i64, i128mem,
SSE_INTALU_ITINS_P, 1>;
@ -6701,67 +6693,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
0>, VEX_4V;
defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
0>, VEX_4V;
defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
0>, VEX_4V;
defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
0>, VEX_4V;
defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
0>, VEX_4V;
defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
0>, VEX_4V;
defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
0>, VEX_4V;
defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@ -6784,6 +6715,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert (memop_frag addr:$src2)))))]>, OpSize;
}
let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
memopv2i64, i128mem>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
memopv2i64, i128mem>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
memopv2i64, i128mem>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
memopv2i64, i128mem>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
memopv2i64, i128mem>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
memopv2i64, i128mem>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
memopv2i64, i128mem>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
memopv2i64, i128mem>;
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
let Predicates = [HasAVX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;