forked from OSchip/llvm-project
[X86] Use SADDSAT/SSUBSAT instead of ADDS/SUBS
Migrate the X86 backend from X86ISD opcodes ADDS and SUBS to generic ISD opcodes SADDSAT and SSUBSAT. This also improves scodegen for @llvm.sadd.sat() and @llvm.ssub.sat() intrinsics. This is a followup to D55787 and part of PR40056. Differential Revision: https://reviews.llvm.org/D55833 llvm-svn: 349520
This commit is contained in:
parent
20a6db5a84
commit
f6058ff140
|
@ -830,14 +830,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
}
|
||||
|
||||
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
|
||||
// Use widening instead of promotion.
|
||||
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
|
||||
MVT::v4i16, MVT::v2i16 }) {
|
||||
setOperationAction(ISD::UADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::USUBSAT, VT, Custom);
|
||||
setOperationAction(ISD::UADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::SADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::USUBSAT, VT, Custom);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
|
@ -1212,9 +1218,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
|
||||
|
||||
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
|
||||
|
||||
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
|
||||
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
|
||||
|
@ -1334,7 +1344,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||
setOperationAction(ISD::UADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::SADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::USUBSAT, VT, Custom);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
|
@ -1596,7 +1608,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::VSELECT, VT, Expand);
|
||||
setOperationAction(ISD::UADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::SADDSAT, VT, Custom);
|
||||
setOperationAction(ISD::USUBSAT, VT, Custom);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
|
@ -1678,7 +1692,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::UMIN, VT, Legal);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::UADDSAT, VT, Legal);
|
||||
setOperationAction(ISD::SADDSAT, VT, Legal);
|
||||
setOperationAction(ISD::USUBSAT, VT, Legal);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Legal);
|
||||
|
||||
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
|
||||
// setcc all the way to isel and prefer SETGT in some isel patterns.
|
||||
|
@ -23388,15 +23404,17 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
|
|||
return split256IntArith(Op, DAG);
|
||||
}
|
||||
|
||||
static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
if (VT.getScalarType() == MVT::i1) {
|
||||
SDLoc dl(Op);
|
||||
switch (Op.getOpcode()) {
|
||||
default: llvm_unreachable("Expected saturated arithmetic opcode");
|
||||
case ISD::UADDSAT:
|
||||
case ISD::SADDSAT:
|
||||
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
|
||||
case ISD::USUBSAT:
|
||||
case ISD::SSUBSAT:
|
||||
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
|
||||
DAG.getNOT(dl, Op.getOperand(1), VT));
|
||||
}
|
||||
|
@ -26194,7 +26212,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::ADD:
|
||||
case ISD::SUB: return LowerADD_SUB(Op, DAG);
|
||||
case ISD::UADDSAT:
|
||||
case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
|
||||
case ISD::SADDSAT:
|
||||
case ISD::USUBSAT:
|
||||
case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
|
||||
case ISD::SMAX:
|
||||
case ISD::SMIN:
|
||||
case ISD::UMAX:
|
||||
|
@ -26277,11 +26297,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
}
|
||||
case ISD::UADDSAT:
|
||||
case ISD::SADDSAT:
|
||||
case ISD::USUBSAT:
|
||||
case ISD::SSUBSAT:
|
||||
case X86ISD::VPMADDWD:
|
||||
case X86ISD::AVG: {
|
||||
// Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
|
||||
// by widening.
|
||||
// Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
|
||||
// X86ISD::AVG/VPMADDWD by widening.
|
||||
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -27228,8 +27250,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
|
||||
case X86ISD::SCALEF: return "X86ISD::SCALEF";
|
||||
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
|
||||
case X86ISD::ADDS: return "X86ISD::ADDS";
|
||||
case X86ISD::SUBS: return "X86ISD::SUBS";
|
||||
case X86ISD::AVG: return "X86ISD::AVG";
|
||||
case X86ISD::MULHRS: return "X86ISD::MULHRS";
|
||||
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
|
||||
|
|
|
@ -226,10 +226,6 @@ namespace llvm {
|
|||
SCALEF,
|
||||
SCALEFS,
|
||||
|
||||
// Integer add/sub with signed saturation.
|
||||
ADDS,
|
||||
SUBS,
|
||||
|
||||
// Unsigned Integer average.
|
||||
AVG,
|
||||
|
||||
|
|
|
@ -4830,9 +4830,9 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
|
|||
SchedWriteVecALU, 1>;
|
||||
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
|
||||
SchedWriteVecALU, 0>;
|
||||
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
|
||||
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
|
||||
SchedWriteVecALU, HasBWI, 1>;
|
||||
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
|
||||
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
|
||||
SchedWriteVecALU, HasBWI, 0>;
|
||||
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
|
||||
SchedWriteVecALU, HasBWI, 1>;
|
||||
|
|
|
@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
|||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
|
||||
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
|
||||
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
|
|
|
@ -3623,9 +3623,9 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
|
|||
SchedWriteVecALU, 1, NoVLX>;
|
||||
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
|
||||
SchedWriteVecALU, 1, NoVLX>;
|
||||
defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
|
||||
defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
|
||||
defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
|
@ -3645,9 +3645,9 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
|
|||
SchedWriteVecALU, 0, NoVLX>;
|
||||
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
|
||||
SchedWriteVecALU, 0, NoVLX>;
|
||||
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
|
||||
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
|
||||
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
|
|
|
@ -319,8 +319,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
|
@ -361,8 +361,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
|
@ -920,8 +920,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
|
@ -1004,8 +1004,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
|
@ -1168,8 +1168,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
|
||||
|
@ -1191,8 +1191,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
|
||||
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
|
||||
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue