forked from OSchip/llvm-project
[SelectionDAG][X86] Enable iX SimplifyDemandedBits to vXi1 SimplifyDemandedVectorElts simplification. Add a hack to X86 to avoid a regression
Patch showing the effect of enabling bool vector oversimplification. Non-VLX builds can simplify a kshift shuffle, but VLX builds simplify: insert_subvector v8i zeroinitializer, v2i --> insert_subvector v8i undef, v2i Preventing the removal of the AND to clear the upper bits of result Differential Revision: https://reviews.llvm.org/D53022 llvm-svn: 369780
This commit is contained in:
parent
ad5047d23d
commit
e7211bb567
|
@ -1819,9 +1819,7 @@ bool TargetLowering::SimplifyDemandedBits(
|
||||||
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
|
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
|
||||||
// Demand the elt/bit if any of the original elts/bits are demanded.
|
// Demand the elt/bit if any of the original elts/bits are demanded.
|
||||||
// TODO - bigendian once we have test coverage.
|
// TODO - bigendian once we have test coverage.
|
||||||
// TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
|
if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
|
||||||
if (SrcVT.isVector() && NumSrcEltBits > 1 &&
|
|
||||||
(BitWidth % NumSrcEltBits) == 0 &&
|
|
||||||
TLO.DAG.getDataLayout().isLittleEndian()) {
|
TLO.DAG.getDataLayout().isLittleEndian()) {
|
||||||
unsigned Scale = BitWidth / NumSrcEltBits;
|
unsigned Scale = BitWidth / NumSrcEltBits;
|
||||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||||
|
|
|
@ -35380,6 +35380,26 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
||||||
// type, widen both sides to avoid a trip through memory.
|
// type, widen both sides to avoid a trip through memory.
|
||||||
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
|
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
|
||||||
Subtarget.hasAVX512()) {
|
Subtarget.hasAVX512()) {
|
||||||
|
// Use zeros for the widening if we already have some zeroes. This can
|
||||||
|
// allow SimplifyDemandedBits to remove scalar ANDs that may be down
|
||||||
|
// stream of this.
|
||||||
|
// FIXME: It might make sense to detect a concat_vectors with a mix of
|
||||||
|
// zeroes and undef and turn it into insert_subvector for i1 vectors as
|
||||||
|
// a separate combine. What we can't do is canonicalize the operands of
|
||||||
|
// such a concat or we'll get into a loop with SimplifyDemandedBits.
|
||||||
|
if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
|
||||||
|
SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1);
|
||||||
|
if (ISD::isBuildVectorAllZeros(LastOp.getNode())) {
|
||||||
|
SrcVT = LastOp.getValueType();
|
||||||
|
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
|
||||||
|
SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end());
|
||||||
|
Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT));
|
||||||
|
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
|
||||||
|
N0 = DAG.getBitcast(MVT::i8, N0);
|
||||||
|
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
|
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
|
||||||
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
|
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
|
||||||
Ops[0] = N0;
|
Ops[0] = N0;
|
||||||
|
|
Loading…
Reference in New Issue