[X86][SSE] Lower ICMP EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.

This replaces the MOVMSK combine introduced at D52121/rL342326

(movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C))

with the more general icmp lowering so it can pick up more cases through bitcasts - notably vXi8 cases which use vXi16 shifts+masks, this patch can remove the mask and use pcmpgtb(0,x) for the sra.

Differential Revision: https://reviews.llvm.org/D60625

llvm-svn: 358651
This commit is contained in:
Simon Pilgrim 2019-04-18 09:58:59 +00:00
parent 3deff86657
commit 8f87e53462
2 changed files with 353 additions and 709 deletions

View File

@ -19873,10 +19873,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
assert((Subtarget.hasAVX512() || (VT == VTOp0)) && assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
"Value types for source and destination must be the same!"); "Value types for source and destination must be the same!");
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
// The result is boolean, but operands are int/float // The result is boolean, but operands are int/float
if (VT.getVectorElementType() == MVT::i1) { if (VT.getVectorElementType() == MVT::i1) {
// In AVX-512 architecture setcc returns mask with i1 elements, // In AVX-512 architecture setcc returns mask with i1 elements,
@ -19930,6 +19926,27 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
} }
} }
// ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND &&
Op0.getOperand(1) == Op1 && Op0.hasOneUse()) {
ConstantSDNode *C1 = isConstOrConstSplat(Op1);
if (C1 && C1->getAPIntValue().isPowerOf2()) {
unsigned BitWidth = VT.getScalarSizeInBits();
unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;
SDValue Result = Op0.getOperand(0);
Result = DAG.getNode(ISD::SHL, dl, VT, Result,
DAG.getConstant(ShiftAmt, dl, VT));
Result = DAG.getNode(ISD::SRA, dl, VT, Result,
DAG.getConstant(BitWidth - 1, dl, VT));
return Result;
}
}
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
// If this is a SETNE against the signed minimum value, change it to SETGT. // If this is a SETNE against the signed minimum value, change it to SETGT.
// If this is a SETNE against the signed maximum value, change it to SETLT. // If this is a SETNE against the signed maximum value, change it to SETLT.
// which will be swapped to SETGT. // which will be swapped to SETGT.
@ -40997,39 +41014,6 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0); return SDValue(N, 0);
// Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
// Only do this when the setcc input and output types are the same and the
// setcc and the 'and' node have a single use.
// FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
APInt SplatVal;
if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
Src.getOperand(0).getValueType() == Src.getValueType() &&
cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
Src.getOperand(0).getOpcode() == ISD::AND) {
SDValue And = Src.getOperand(0);
if (And.hasOneUse() &&
ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
SplatVal.isPowerOf2()) {
MVT VT = Src.getSimpleValueType();
unsigned BitWidth = VT.getScalarSizeInBits();
unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
SDLoc DL(And);
SDValue X = And.getOperand(0);
// If the element type is i8, we need to bitcast to i16 to use a legal
// shift. If we wait until lowering we end up with an extra and to bits
// from crossing the 8-bit elements, but we don't care about that here.
if (VT.getVectorElementType() == MVT::i8) {
VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
X = DAG.getBitcast(VT, X);
}
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
DAG.getConstant(ShAmt, DL, VT));
SDValue Cast = DAG.getBitcast(SrcVT, Shl);
return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
}
}
return SDValue(); return SDValue();
} }

File diff suppressed because it is too large Load Diff