forked from OSchip/llvm-project
[X86][SSE] Lower ICMP EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
This replaces the MOVMSK combine introduced at D52121/rL342326 (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)) with the more general icmp lowering so it can pick up more cases through bitcasts - notably vXi8 cases which use vXi16 shifts+masks, this patch can remove the mask and use pcmpgtb(0,x) for the sra. Differential Revision: https://reviews.llvm.org/D60625 llvm-svn: 358651
This commit is contained in:
parent
3deff86657
commit
8f87e53462
|
@ -19873,10 +19873,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
|
||||
"Value types for source and destination must be the same!");
|
||||
|
||||
// Break 256-bit integer vector compare into smaller ones.
|
||||
if (VT.is256BitVector() && !Subtarget.hasInt256())
|
||||
return Lower256IntVSETCC(Op, DAG);
|
||||
|
||||
// The result is boolean, but operands are int/float
|
||||
if (VT.getVectorElementType() == MVT::i1) {
|
||||
// In AVX-512 architecture setcc returns mask with i1 elements,
|
||||
|
@ -19930,6 +19926,27 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
}
|
||||
}
|
||||
|
||||
// ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
|
||||
if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND &&
|
||||
Op0.getOperand(1) == Op1 && Op0.hasOneUse()) {
|
||||
ConstantSDNode *C1 = isConstOrConstSplat(Op1);
|
||||
if (C1 && C1->getAPIntValue().isPowerOf2()) {
|
||||
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||
unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;
|
||||
|
||||
SDValue Result = Op0.getOperand(0);
|
||||
Result = DAG.getNode(ISD::SHL, dl, VT, Result,
|
||||
DAG.getConstant(ShiftAmt, dl, VT));
|
||||
Result = DAG.getNode(ISD::SRA, dl, VT, Result,
|
||||
DAG.getConstant(BitWidth - 1, dl, VT));
|
||||
return Result;
|
||||
}
|
||||
}
|
||||
|
||||
// Break 256-bit integer vector compare into smaller ones.
|
||||
if (VT.is256BitVector() && !Subtarget.hasInt256())
|
||||
return Lower256IntVSETCC(Op, DAG);
|
||||
|
||||
// If this is a SETNE against the signed minimum value, change it to SETGT.
|
||||
// If this is a SETNE against the signed maximum value, change it to SETLT.
|
||||
// which will be swapped to SETGT.
|
||||
|
@ -40997,39 +41014,6 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
|
|||
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
|
||||
return SDValue(N, 0);
|
||||
|
||||
// Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
|
||||
// Only do this when the setcc input and output types are the same and the
|
||||
// setcc and the 'and' node have a single use.
|
||||
// FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
|
||||
APInt SplatVal;
|
||||
if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
|
||||
Src.getOperand(0).getValueType() == Src.getValueType() &&
|
||||
cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
|
||||
ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
|
||||
Src.getOperand(0).getOpcode() == ISD::AND) {
|
||||
SDValue And = Src.getOperand(0);
|
||||
if (And.hasOneUse() &&
|
||||
ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
|
||||
SplatVal.isPowerOf2()) {
|
||||
MVT VT = Src.getSimpleValueType();
|
||||
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||
unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
|
||||
SDLoc DL(And);
|
||||
SDValue X = And.getOperand(0);
|
||||
// If the element type is i8, we need to bitcast to i16 to use a legal
|
||||
// shift. If we wait until lowering we end up with an extra and to bits
|
||||
// from crossing the 8-bit elements, but we don't care about that here.
|
||||
if (VT.getVectorElementType() == MVT::i8) {
|
||||
VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
|
||||
X = DAG.getBitcast(VT, X);
|
||||
}
|
||||
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
|
||||
DAG.getConstant(ShAmt, DL, VT));
|
||||
SDValue Cast = DAG.getBitcast(SrcVT, Shl);
|
||||
return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue