From e7211bb56724b66ba1011084a2293d9d658a4afe Mon Sep 17 00:00:00 2001 From: Craig Topper <craig.topper@intel.com> Date: Fri, 23 Aug 2019 17:14:58 +0000 Subject: [PATCH] [SelectionDAG][X86] Enable iX SimplifyDemandedBits to vXi1 SimplifyDemandedVectorElts simplification. Add a hack to X86 to avoid a regression Patch showing the effect of enabling bool vector oversimplification. Non-VLX builds can simplify a kshift shuffle, but VLX builds simplify: insert_subvector v8i zeroinitializer, v2i --> insert_subvector v8i undef, v2i Preventing the removal of the AND to clear the upper bits of result Differential Revision: https://reviews.llvm.org/D53022 llvm-svn: 369780 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 4 +--- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8b66cc471af3..f4438d41f653 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1819,9 +1819,7 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. - // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. - if (SrcVT.isVector() && NumSrcEltBits > 1 && - (BitWidth % NumSrcEltBits) == 0 && + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1a986bc8ce7b..306b8bc1f6ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35380,6 +35380,26 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && Subtarget.hasAVX512()) { + // Use zeros for the widening if we already have some zeroes. This can + // allow SimplifyDemandedBits to remove scalar ANDs that may be down + // stream of this. + // FIXME: It might make sense to detect a concat_vectors with a mix of + // zeroes and undef and turn it into insert_subvector for i1 vectors as + // a separate combine. What we can't do is canonicalize the operands of + // such a concat or we'll get into a loop with SimplifyDemandedBits. + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); + if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { + SrcVT = LastOp.getValueType(); + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); + SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end()); + Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + N0 = DAG.getBitcast(MVT::i8, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + } + } + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); Ops[0] = N0;