[X86][SSE] Generalised CMP+AND1 combine to ZERO/ALLBITS+MASK

Patch to generalize combinePCMPAnd1 (for handling SETCC + ZEXT cases) to work for any input that has zero/all bits set masked with an 'all low bits' mask.

Replaced the implicit assumption of shift availability with a call to SupportedVectorShiftWithImm.

Part 1 of 3.

Differential Revision: https://reviews.llvm.org/D31347

llvm-svn: 298779
This commit is contained in:
Simon Pilgrim 2017-03-25 19:50:14 +00:00
parent 1e6fedbb83
commit 5400a4d0af
1 changed files with 23 additions and 27 deletions

View File

@ -21690,11 +21690,11 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() < 16)
return false;
if (VT.is512BitVector() &&
if (VT.is512BitVector() && Subtarget.hasAVX512() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
bool LShift = VT.is128BitVector() ||
bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||
(VT.is256BitVector() && Subtarget.hasInt256());
bool AShift = LShift && (Subtarget.hasAVX512() ||
@ -31383,38 +31383,34 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// If this is a PCMPEQ or PCMPGT result that is bitwise-anded with 1 (this is
/// the x86 lowering of a SETCC + ZEXT), replace the 'and' with a shift-right to
/// eliminate loading the vector constant mask value. This relies on the fact
/// that a PCMP always creates an all-ones or all-zeros bitmask per element.
static SDValue combinePCMPAnd1(SDNode *N, SelectionDAG &DAG) {
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
/// with a shift-right to eliminate loading the vector constant mask value.
static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
// TODO: Use AssertSext to mark any nodes that have the property of producing
// all-ones or all-zeros. Then check for that node rather than particular
// opcodes.
if (Op0.getOpcode() != X86ISD::PCMPEQ && Op0.getOpcode() != X86ISD::PCMPGT)
return SDValue();
// The existence of the PCMP node guarantees that we have the required SSE2 or
// AVX2 for a shift of this vector type, but there is no vector shift by
// immediate for a vector with byte elements (PSRLB). 512-bit vectors use the
// masked compare nodes, so they should not make it here.
EVT VT0 = Op0.getValueType();
EVT VT1 = Op1.getValueType();
unsigned EltBitWidth = VT0.getScalarSizeInBits();
if (VT0 != VT1 || EltBitWidth == 8)
if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
return SDValue();
assert(VT0.getSizeInBits() == 128 || VT0.getSizeInBits() == 256);
APInt SplatVal;
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || SplatVal != 1)
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
!APIntOps::isMask(SplatVal))
return SDValue();
if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
unsigned EltBitWidth = VT0.getScalarSizeInBits();
if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
SDValue ShAmt = DAG.getConstant(EltBitWidth - 1, DL, MVT::i8);
unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@ -31434,7 +31430,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
return R;
if (SDValue ShiftRight = combinePCMPAnd1(N, DAG))
if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
return ShiftRight;
EVT VT = N->getValueType(0);