[x86] add special-case lowering for usubsat for AVX512

This is a small extension of D112095 to avoid another regression
seen with D112085.
In this case, we allow the same conversion from usubsat to ALU
ops if the target supports vpternlog.

That pattern will get converted later in X86DAGToDAGISel::tryVPTERNLOG().
This seems better than putting a magic immediate constant directly in
this code to create the exact vpternlog that we need. It's possible that
there are other special-cases along these lines, so we should try to
keep all of the vpternlog magic in one place.

Differential Revision: https://reviews.llvm.org/D112138
This commit is contained in:
Sanjay Patel 2021-10-20 16:09:15 -04:00
parent 048688fd80
commit 40163f1df8
2 changed files with 27 additions and 22 deletions

View File

@ -28139,26 +28139,32 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned BitWidth = VT.getScalarSizeInBits();
if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
// Handle a special-case with a bit-hack instead of cmp+select:
// usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
ConstantSDNode *C = isConstOrConstSplat(Y, true);
if (C && C->getAPIntValue().isSignMask()) {
SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
if (Opcode == ISD::USUBSAT) {
if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
// Handle a special-case with a bit-hack instead of cmp+select:
// usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
// If the target can use VPTERNLOG, DAGToDAG will match this as
// "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
// "broadcast" constant load.
ConstantSDNode *C = isConstOrConstSplat(Y, true);
if (C && C->getAPIntValue().isSignMask()) {
SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
}
}
if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
// usubsat X, Y --> (X >u Y) ? X - Y : 0
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
// TODO: Move this to DAGCombiner?
if (SetCCResultType == VT &&
DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
}
// usubsat X, Y --> (X >u Y) ? X - Y : 0
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
// TODO: Move this to DAGCombiner?
if (SetCCResultType == VT &&
DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
}
if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&

View File

@ -162,9 +162,8 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
;
; AVX512-LABEL: usubsat_custom:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
; AVX512-NEXT: vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512-NEXT: retq
%res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
ret <4 x i32> %res