Custom lower phadd and phsub intrinsics to target specific nodes. Remove the patterns that are no longer necessary.

llvm-svn: 148927
This commit is contained in:
Craig Topper 2012-01-25 05:37:32 +00:00
parent 2d6b7b91b9
commit ce4f9c5668
3 changed files with 70 additions and 63 deletions

View File

@ -9302,6 +9302,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_hsub_pd_256: case Intrinsic::x86_avx_hsub_pd_256:
return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2)); Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
case Intrinsic::x86_avx2_phsub_d:
return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256: case Intrinsic::x86_avx2_psllv_d_256:

View File

@ -719,12 +719,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
{ X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
{ X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
{ X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
{ X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
{ X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
{ X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
{ X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
{ X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
{ X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
{ X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
@ -903,12 +903,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
{ X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
{ X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
{ X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
{ X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
{ X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
{ X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
{ X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
{ X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
{ X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
{ X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
@ -1047,12 +1047,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 },
{ X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
{ X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 },
{ X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_32 }, { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
{ X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
{ X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_32 }, { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
{ X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_32 }, { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
{ X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
{ X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_32 }, { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
{ X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
{ X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
{ X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },

View File

@ -5079,6 +5079,28 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions // SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
/// SS3I_binop_rm - Simple SSSE3 bin op
multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
OpSize;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))]>, OpSize;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Is2Addr = 1> { Intrinsic IntId128, bit Is2Addr = 1> {
@ -5118,16 +5140,16 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
let ImmT = NoImm, Predicates = [HasAVX] in { let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in { let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V; memopv2i64, i128mem, 0>, VEX_4V;
defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V; memopv2i64, i128mem, 0>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw",
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd",
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
@ -5147,16 +5169,16 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
let ImmT = NoImm, Predicates = [HasAVX2] in { let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in { let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
int_x86_avx2_phadd_w>, VEX_4V; memopv4i64, i256mem, 0>, VEX_4V;
defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
int_x86_avx2_phadd_d>, VEX_4V; memopv4i64, i256mem, 0>, VEX_4V;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V; int_x86_avx2_phadd_sw>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw",
int_x86_avx2_phsub_w>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd",
int_x86_avx2_phsub_d>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V; int_x86_avx2_phsub_sw>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
@ -5177,16 +5199,16 @@ defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
// None of these have i8 immediate fields. // None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in { let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in { let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
int_x86_ssse3_phadd_w_128>; memopv2i64, i128mem>;
defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
int_x86_ssse3_phadd_d_128>; memopv2i64, i128mem>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
memopv2i64, i128mem>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
memopv2i64, i128mem>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>; int_x86_ssse3_phadd_sw_128>;
defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw",
int_x86_ssse3_phsub_w_128>;
defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd",
int_x86_ssse3_phsub_d_128>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>; int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
@ -5216,15 +5238,6 @@ let Predicates = [HasAVX] in {
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>; (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>; (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
(VPHADDWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
(VPHADDDrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
(VPHSUBWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
(VPHSUBDrr128 VR128:$src1, VR128:$src2)>;
} }
let Predicates = [HasAVX2] in { let Predicates = [HasAVX2] in {
@ -5234,15 +5247,6 @@ let Predicates = [HasAVX2] in {
(VPSIGNWrr256 VR256:$src1, VR256:$src2)>; (VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)),
(VPSIGNDrr256 VR256:$src1, VR256:$src2)>; (VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)),
(VPHADDWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)),
(VPHADDDrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)),
(VPHSUBWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)),
(VPHSUBDrr256 VR256:$src1, VR256:$src2)>;
} }
let Predicates = [HasSSSE3] in { let Predicates = [HasSSSE3] in {
@ -5257,15 +5261,6 @@ let Predicates = [HasSSSE3] in {
(PSIGNWrr128 VR128:$src1, VR128:$src2)>; (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>; (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
(PHADDWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
(PHADDDrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
(PHSUBWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
(PHSUBDrr128 VR128:$src1, VR128:$src2)>;
} }
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//