forked from OSchip/llvm-project
[x86] add special-case lowering for usubsat for AVX512
This is a small extension of D112095 to avoid another regression seen with D112085. In this case, we allow the same conversion from usubsat to ALU ops if the target supports vpternlog. That pattern will get converted later in X86DAGToDAGISel::tryVPTERNLOG(). This seems better than putting a magic immediate constant directly in this code to create the exact vpternlog that we need. It's possible that there are other special-cases along these lines, so we should try to keep all of the vpternlog magic in one place. Differential Revision: https://reviews.llvm.org/D112138
This commit is contained in:
parent
048688fd80
commit
40163f1df8
|
@ -28139,26 +28139,32 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
|
|||
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
|
||||
|
||||
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||
if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
|
||||
// Handle a special-case with a bit-hack instead of cmp+select:
|
||||
// usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
|
||||
ConstantSDNode *C = isConstOrConstSplat(Y, true);
|
||||
if (C && C->getAPIntValue().isSignMask()) {
|
||||
SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
|
||||
SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
|
||||
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
|
||||
if (Opcode == ISD::USUBSAT) {
|
||||
if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
|
||||
// Handle a special-case with a bit-hack instead of cmp+select:
|
||||
// usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
|
||||
// If the target can use VPTERNLOG, DAGToDAG will match this as
|
||||
// "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
|
||||
// "broadcast" constant load.
|
||||
ConstantSDNode *C = isConstOrConstSplat(Y, true);
|
||||
if (C && C->getAPIntValue().isSignMask()) {
|
||||
SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
|
||||
SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
|
||||
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
|
||||
}
|
||||
}
|
||||
if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
|
||||
// usubsat X, Y --> (X >u Y) ? X - Y : 0
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
|
||||
SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
|
||||
// TODO: Move this to DAGCombiner?
|
||||
if (SetCCResultType == VT &&
|
||||
DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
|
||||
return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
|
||||
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
|
||||
}
|
||||
|
||||
// usubsat X, Y --> (X >u Y) ? X - Y : 0
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
|
||||
SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
|
||||
// TODO: Move this to DAGCombiner?
|
||||
if (SetCCResultType == VT &&
|
||||
DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
|
||||
return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
|
||||
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
|
||||
}
|
||||
|
||||
if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
|
||||
|
|
|
@ -162,9 +162,8 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
|
|||
;
|
||||
; AVX512-LABEL: usubsat_custom:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
|
||||
ret <4 x i32> %res
|
||||
|
|
Loading…
Reference in New Issue