[X86] Pull out combineOrShiftToFunnelShift helper. NFCI.

This commit is contained in:
Simon Pilgrim 2019-10-29 15:29:32 +00:00
parent e56ba3743b
commit 501cf25839
1 changed files with 64 additions and 51 deletions

View File

@ -39587,61 +39587,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return Ret;
}
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == ISD::OR && "Expected ISD::OR node");
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// If this is SSE1 only convert to FOR to avoid scalarization.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
return DAG.getBitcast(MVT::v4i32,
DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
DAG.getBitcast(MVT::v4f32, N0),
DAG.getBitcast(MVT::v4f32, N1)));
}
// Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
SrcOps.size() == 1) {
SDLoc dl(N);
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
if (Mask) {
APInt AllBits = APInt::getNullValue(NumElts);
return DAG.getSetCC(dl, MVT::i1, Mask,
DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
}
}
}
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
return R;
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
// Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
@ -39764,6 +39716,67 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// If this is SSE1 only convert to FOR to avoid scalarization.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
return DAG.getBitcast(MVT::v4i32,
DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
DAG.getBitcast(MVT::v4f32, N0),
DAG.getBitcast(MVT::v4f32, N1)));
}
// Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
SrcOps.size() == 1) {
SDLoc dl(N);
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
if (Mask) {
APInt AllBits = APInt::getNullValue(NumElts);
return DAG.getSetCC(dl, MVT::i1, Mask,
DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
}
}
}
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
return R;
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
if (SDValue R = combineOrShiftToFunnelShift(N, DAG, Subtarget))
return R;
// Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
return SDValue();
}
/// Try to turn tests against the signbit in the form of:
/// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
/// into: