From 4669d18893c07cded1b9c4f4bccf258d6b0ffd01 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Dec 2012 14:04:55 +0000 Subject: [PATCH] X86: Match the SSE/AVX min/max vector ops using a custom node instead of intrinsics This is very mechanical, no functionality change. Preparation for PR14667. llvm-svn: 170898 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 38 ++++ llvm/lib/Target/X86/X86ISelLowering.h | 6 + llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 + llvm/lib/Target/X86/X86InstrInfo.cpp | 24 +++ llvm/lib/Target/X86/X86InstrSSE.td | 195 ++++++++++--------- 5 files changed, 171 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b650726b622..6f12d6cff079 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10154,6 +10154,40 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } + // SSE2/SSE41/AVX2 integer max/min intrinsics. + case Intrinsic::x86_sse2_pmaxu_b: + case Intrinsic::x86_sse41_pmaxuw: + case Intrinsic::x86_sse41_pmaxud: + case Intrinsic::x86_avx2_pmaxu_b: + case Intrinsic::x86_avx2_pmaxu_w: + case Intrinsic::x86_avx2_pmaxu_d: + return DAG.getNode(X86ISD::UMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_pminu_b: + case Intrinsic::x86_sse41_pminuw: + case Intrinsic::x86_sse41_pminud: + case Intrinsic::x86_avx2_pminu_b: + case Intrinsic::x86_avx2_pminu_w: + case Intrinsic::x86_avx2_pminu_d: + return DAG.getNode(X86ISD::UMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pmaxsb: + case Intrinsic::x86_sse2_pmaxs_w: + case Intrinsic::x86_sse41_pmaxsd: + case Intrinsic::x86_avx2_pmaxs_b: + case Intrinsic::x86_avx2_pmaxs_w: + case Intrinsic::x86_avx2_pmaxs_d: + return DAG.getNode(X86ISD::SMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pminsb: + case Intrinsic::x86_sse2_pmins_w: + case Intrinsic::x86_sse41_pminsd: + case Intrinsic::x86_avx2_pmins_b: + case Intrinsic::x86_avx2_pmins_w: + case Intrinsic::x86_avx2_pmins_d: + return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: @@ -11974,6 +12008,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; + case X86ISD::UMAX: return "X86ISD::UMAX"; + case X86ISD::UMIN: return "X86ISD::UMIN"; + case X86ISD::SMAX: return "X86ISD::SMAX"; + case X86ISD::SMIN: return "X86ISD::SMIN"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3d234eba057b..eec1f7e7200f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -197,6 +197,12 @@ namespace llvm { /// FHSUB - Floating point horizontal sub. FHSUB, + /// UMAX, UMIN - Unsigned integer max and min. + UMAX, UMIN, + + /// SMAX, SMIN - Signed integer max and min. + SMAX, SMIN, + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 7d16d2741d98..7025e93fa15d 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>]>; +def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>; +def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>; +def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>; +def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>; + def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 81149b702db6..7bbab3862da9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -775,6 +775,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, + { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, + { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, @@ -951,6 +959,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, + { X86::VPMINSBrr, X86::VPMINSBrm, TB_ALIGN_16 }, + { X86::VPMINSDrr, X86::VPMINSDrm, TB_ALIGN_16 }, + { X86::VPMINUDrr, X86::VPMINUDrm, TB_ALIGN_16 }, + { X86::VPMINUWrr, X86::VPMINUWrm, TB_ALIGN_16 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, TB_ALIGN_16 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, TB_ALIGN_16 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, TB_ALIGN_16 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, TB_ALIGN_16 }, { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, @@ -1092,6 +1108,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 }, { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 }, { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, TB_ALIGN_32 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, TB_ALIGN_32 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, TB_ALIGN_32 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, TB_ALIGN_32 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, TB_ALIGN_32 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, TB_ALIGN_32 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, TB_ALIGN_32 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, TB_ALIGN_32 }, { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 }, { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 }, { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 }, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 521073d0a84b..69e2b4390f7b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3733,6 +3733,14 @@ defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128, defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; +defm VPMINUB : PDI_binop_rm<0xDA, "vpminub", X86umin, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMINSW : PDI_binop_rm<0xEA, "vpminsw", X86smin, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMAXUB : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMAXSW : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, @@ -3768,18 +3776,6 @@ defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; @@ -3813,6 +3809,18 @@ defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256, defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMINUBY : PDI_binop_rm<0xDA, "vpminub", X86umin, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMINSWY : PDI_binop_rm<0xEA, "vpminsw", X86smin, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMAXUBY : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMAXSWY : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, @@ -3848,18 +3856,6 @@ defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; @@ -3890,6 +3886,14 @@ defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P>; defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; +defm PMINUB : PDI_binop_rm<0xDA, "pminub", X86umin, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMINSW : PDI_binop_rm<0xEA, "pminsw", X86smin, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMAXUB : PDI_binop_rm<0xDE, "pmaxub", X86umax, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMAXSW : PDI_binop_rm<0xEE, "pmaxsw", X86smax, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, @@ -3925,18 +3929,6 @@ defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; @@ -6701,67 +6693,6 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0>, VEX_4V; - defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, - 0>, VEX_4V; - defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, - 0>, VEX_4V; - defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, - 0>, VEX_4V; - defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, - 0>, VEX_4V; - defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, - 0>, VEX_4V; - defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, - 0>, VEX_4V; - defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, - 0>, VEX_4V; - defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, - 0>, VEX_4V; - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, - 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw>, VEX_4V, VEX_L; - defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", - int_x86_avx2_pmins_b>, VEX_4V, VEX_L; - defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", - int_x86_avx2_pmins_d>, VEX_4V, VEX_L; - defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", - int_x86_avx2_pminu_d>, VEX_4V, VEX_L; - defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", - int_x86_avx2_pminu_w>, VEX_4V, VEX_L; - defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", - int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L; - defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", - int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L; - defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", - int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L; - defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", - int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L; - defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", - int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; - defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; - defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; - defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; - defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; - defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; - defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; - defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; -} /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, @@ -6784,6 +6715,76 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, (bitconvert (memop_frag addr:$src2)))))]>, OpSize; } +let Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", + int_x86_avx2_packusdw>, VEX_4V, VEX_L; + defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", + int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem>; + defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem>; + defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem>; + defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} + let Predicates = [HasAVX] in { defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, memopv2i64, i128mem, 0>, VEX_4V;