forked from OSchip/llvm-project
[X86] Teach combineTruncatedArithmetic to push truncate through subtracts where only one of the inputs is free to truncate.
Fix combineSubToSubus to handle the new DAG to avoid a regression. There are still regressions in test14/test15/test16. Where it looks like were trying to set up cases we could match to umin+trunc+subus but the handling was never finished. The regression here isn't unique to sub. Its a lost opportunity for taking an AND with two truncated inputs and producing a larger AND with a single truncate. The same thing could happen with any other node we handle in combineTruncatedArithmetic since we are moving the truncate up the DAG. Differential Revision: https://reviews.llvm.org/D80483
This commit is contained in:
parent
37ef15143a
commit
51a276c759
|
@ -43588,7 +43588,8 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||||
case ISD::AND:
|
case ISD::AND:
|
||||||
case ISD::XOR:
|
case ISD::XOR:
|
||||||
case ISD::OR:
|
case ISD::OR:
|
||||||
case ISD::ADD: {
|
case ISD::ADD:
|
||||||
|
case ISD::SUB: {
|
||||||
SDValue Op0 = Src.getOperand(0);
|
SDValue Op0 = Src.getOperand(0);
|
||||||
SDValue Op1 = Src.getOperand(1);
|
SDValue Op1 = Src.getOperand(1);
|
||||||
if (TLI.isOperationLegal(SrcOpcode, VT) &&
|
if (TLI.isOperationLegal(SrcOpcode, VT) &&
|
||||||
|
@ -43596,16 +43597,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||||
return TruncateArithmetic(Op0, Op1);
|
return TruncateArithmetic(Op0, Op1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ISD::SUB: {
|
|
||||||
// TODO: ISD::SUB We are conservative and require both sides to be freely
|
|
||||||
// truncatable to avoid interfering with combineSubToSubus.
|
|
||||||
SDValue Op0 = Src.getOperand(0);
|
|
||||||
SDValue Op1 = Src.getOperand(1);
|
|
||||||
if (TLI.isOperationLegal(SrcOpcode, VT) &&
|
|
||||||
(Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
|
|
||||||
return TruncateArithmetic(Op0, Op1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
@ -46698,6 +46689,38 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
|
||||||
SubusRHS = MinLHS;
|
SubusRHS = MinLHS;
|
||||||
else
|
else
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
} else if (Op1.getOpcode() == ISD::TRUNCATE &&
|
||||||
|
Op1.getOperand(0).getOpcode() == ISD::UMIN &&
|
||||||
|
(EltVT == MVT::i8 || EltVT == MVT::i16)) {
|
||||||
|
// Special case where the UMIN has been truncated. Try to push the truncate
|
||||||
|
// further up. This is similar to the i32/i64 special processing.
|
||||||
|
SubusLHS = Op0;
|
||||||
|
SDValue MinLHS = Op1.getOperand(0).getOperand(0);
|
||||||
|
SDValue MinRHS = Op1.getOperand(0).getOperand(1);
|
||||||
|
EVT TruncVT = Op1.getOperand(0).getValueType();
|
||||||
|
if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 ||
|
||||||
|
TruncVT == MVT::v8i64)) &&
|
||||||
|
!(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32)))
|
||||||
|
return SDValue();
|
||||||
|
SDValue OpToSaturate;
|
||||||
|
if (MinLHS.getOpcode() == ISD::ZERO_EXTEND &&
|
||||||
|
MinLHS.getOperand(0) == Op0)
|
||||||
|
OpToSaturate = MinRHS;
|
||||||
|
else if (MinRHS.getOpcode() == ISD::ZERO_EXTEND &&
|
||||||
|
MinRHS.getOperand(0) == Op0)
|
||||||
|
OpToSaturate = MinLHS;
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Saturate the non-extended input and then truncate it.
|
||||||
|
SDLoc DL(N);
|
||||||
|
SDValue SaturationConst =
|
||||||
|
DAG.getConstant(APInt::getLowBitsSet(TruncVT.getScalarSizeInBits(),
|
||||||
|
VT.getScalarSizeInBits()),
|
||||||
|
DL, TruncVT);
|
||||||
|
SDValue UMin = DAG.getNode(ISD::UMIN, DL, TruncVT, OpToSaturate,
|
||||||
|
SaturationConst);
|
||||||
|
SubusRHS = DAG.getNode(ISD::TRUNCATE, DL, VT, UMin);
|
||||||
} else
|
} else
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue