From 393b4bd1362f6634a972157e7c2f3936f51f7356 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 27 Jun 2020 19:10:13 +0100 Subject: [PATCH] [X86] SimplifyDemandedVectorEltsForTargetNode - merge shuffle/pack lower demanded elements handling. Generalize the vector operand extraction code for shuffle/pack ops - we can assume that the vector operands are the same width as the result, and any non-vector values can be reused directly in the smaller width op. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 77 +++++++------------------ 1 file changed, 20 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fd69791b80e8..bef29f0948cb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37053,18 +37053,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( ExtSizeInBits = SizeInBits / 4; switch (Opc) { - // Zero upper elements. - case X86ISD::VZEXT_MOVL: { - SDLoc DL(Op); - SDValue Ext0 = - extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue ExtOp = - TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0); - SDValue UndefVec = TLO.DAG.getUNDEF(VT); - SDValue Insert = - insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); - return TLO.CombineTo(Op, Insert); - } // Subvector broadcast. case X86ISD::SUBV_BROADCAST: { SDLoc DL(Op); @@ -37080,11 +37068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, TLO.DAG, DL, ExtSizeInBits)); } - // Target unary shuffles by immediate: - case X86ISD::PSHUFD: - case X86ISD::PSHUFLW: - case X86ISD::PSHUFHW: - case X86ISD::VPERMILPI: // Byte shifts by immediate. case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -37122,11 +37105,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } break; } + // Zero upper elements. + case X86ISD::VZEXT_MOVL: + // Target unary shuffles by immediate: + case X86ISD::PSHUFD: + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + case X86ISD::VPERMILPI: // (Non-Lane Crossing) Target Shuffles. case X86ISD::VPERMILPV: + case X86ISD::VPERMIL2: case X86ISD::PSHUFB: case X86ISD::UNPCKL: case X86ISD::UNPCKH: + case X86ISD::BLENDI: // Saturated Packs. case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -37136,49 +37128,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( case X86ISD::FHADD: case X86ISD::FHSUB: { SDLoc DL(Op); + SmallVector Ops; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + SDValue SrcOp = Op.getOperand(i); + EVT SrcVT = SrcOp.getValueType(); + assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) && + "Unsupported vector size"); + Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL, + ExtSizeInBits) + : SrcOp); + } MVT ExtVT = VT.getSimpleVT(); ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), ExtSizeInBits / ExtVT.getScalarSizeInBits()); - SDValue Ext0 = - extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue Ext1 = - extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1); - SDValue UndefVec = TLO.DAG.getUNDEF(VT); - SDValue Insert = - insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); - return TLO.CombineTo(Op, Insert); - } - // Vector blend by immediate. - case X86ISD::BLENDI: { - SDLoc DL(Op); - MVT ExtVT = VT.getSimpleVT(); - ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), - ExtSizeInBits / ExtVT.getScalarSizeInBits()); - SDValue Ext0 = - extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue Ext1 = - extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue ExtOp = - TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2)); - SDValue UndefVec = TLO.DAG.getUNDEF(VT); - SDValue Insert = - insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); - return TLO.CombineTo(Op, Insert); - } - case X86ISD::VPERMIL2: { - SDLoc DL(Op); - MVT ExtVT = VT.getSimpleVT(); - ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), - ExtSizeInBits / ExtVT.getScalarSizeInBits()); - SDValue Ext0 = - extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue Ext1 = - extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue Ext2 = - extractSubVector(Op.getOperand(2), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue ExtOp = - TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Ext2, Op.getOperand(3)); + SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops); SDValue UndefVec = TLO.DAG.getUNDEF(VT); SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);