[SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported.

Rather than using SELECT instructions, use SRA, UADDO/ADDCARRY and
XORs to expand ABS. This is the multi-part version of the sequence
we use in LegalizeDAG.

It's also the same as the Custom sequence uses for i64 on 32-bit
and i128 on 64-bit. So we can remove the X86 customization.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D87215
This commit is contained in:
Craig Topper 2020-09-07 12:23:15 -07:00
parent 8b30067919
commit da79b1eecc
5 changed files with 85 additions and 141 deletions

View File

@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
// If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
// use in LegalizeDAG. The ADD part of the expansion is based on
// ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
// ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
// if needed. Shift expansion has a special case for filling with sign bits
// so that we will only end up with one SRA.
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
return;
}
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, dl, VT), N0);
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);

View File

@ -193,10 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
setOperationAction(ISD::ABS , MVT::i64 , Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS , MVT::i128 , Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
@ -29720,31 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
case ISD::ABS: {
assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) &&
"Unexpected type (!= i64) on ABS.");
assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) &&
"Unexpected type (!= i128) on ABS.");
MVT VT = N->getSimpleValueType(0);
MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32;
SDValue Lo, Hi, Tmp;
SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
DAG.getConstant(0, dl, HalfT));
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
DAG.getConstant(1, dl, HalfT));
Tmp = DAG.getNode(
ISD::SRA, dl, HalfT, Hi,
DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl));
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
SDValue(Lo.getNode(), 1));
Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi));
return;
}
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:

View File

@ -40,33 +40,24 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: abs_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: rsbs.w lr, r1, #0
; CHECK-NEXT: sbc.w r2, r12, r0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r3, mi
; CHECK-NEXT: ands r3, r3, #1
; CHECK-NEXT: csel r1, lr, r1, ne
; CHECK-NEXT: csel r0, r2, r0, ne
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
; CHECK-NEXT: adc.w r2, r0, r0, asr #31
; CHECK-NEXT: eor.w r2, r2, r0, asr #31
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: rsbs r2, r1, #0
; CHECK-NEXT: sbc.w r12, r12, r0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r3, mi
; CHECK-NEXT: ands r3, r3, #1
; CHECK-NEXT: csel r1, r2, r1, ne
; CHECK-NEXT: csel r0, r12, r0, ne
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov.32 q1[1], r2
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: adc.w r1, r0, r0, asr #31
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <2 x i64> %s1, zeroinitializer
%1 = sub nsw <2 x i64> zeroinitializer, %s1

View File

@ -144,35 +144,31 @@ define i128 @test_i128(i128 %a) nounwind {
;
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: negl %edi
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %eax, %esi
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnsl %eax, %esi
; X86-NEXT: cmovnsl %ecx, %ebp
; X86-NEXT: cmovnsl %edx, %ebx
; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %ebp, 8(%eax)
; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: adcl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: xorl %edx, %ebx
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: xorl %edx, %esi
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = call i128 @llvm.abs.i128(i128 %a, i1 false)
ret i128 %r

View File

@ -121,73 +121,34 @@ define i64 @test_i64(i64 %a) nounwind {
}
define i128 @test_i128(i128 %a) nounwind {
; X86-NO-CMOV-LABEL: test_i128:
; X86-NO-CMOV: # %bb.0:
; X86-NO-CMOV-NEXT: pushl %ebp
; X86-NO-CMOV-NEXT: pushl %ebx
; X86-NO-CMOV-NEXT: pushl %edi
; X86-NO-CMOV-NEXT: pushl %esi
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-CMOV-NEXT: xorl %ecx, %ecx
; X86-NO-CMOV-NEXT: negl %ebp
; X86-NO-CMOV-NEXT: movl $0, %ebx
; X86-NO-CMOV-NEXT: sbbl %edx, %ebx
; X86-NO-CMOV-NEXT: movl $0, %edi
; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NO-CMOV-NEXT: sbbl %esi, %ecx
; X86-NO-CMOV-NEXT: testl %esi, %esi
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: js .LBB4_2
; X86-NO-CMOV-NEXT: # %bb.1:
; X86-NO-CMOV-NEXT: movl %esi, %ecx
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NO-CMOV-NEXT: movl %edx, %ebx
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NO-CMOV-NEXT: .LBB4_2:
; X86-NO-CMOV-NEXT: movl %ebp, (%eax)
; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax)
; X86-NO-CMOV-NEXT: movl %edi, 8(%eax)
; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax)
; X86-NO-CMOV-NEXT: popl %esi
; X86-NO-CMOV-NEXT: popl %edi
; X86-NO-CMOV-NEXT: popl %ebx
; X86-NO-CMOV-NEXT: popl %ebp
; X86-NO-CMOV-NEXT: retl $4
;
; X86-CMOV-LABEL: test_i128:
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %ebp
; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: pushl %edi
; X86-CMOV-NEXT: pushl %esi
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-CMOV-NEXT: xorl %esi, %esi
; X86-CMOV-NEXT: negl %edi
; X86-CMOV-NEXT: movl $0, %ebx
; X86-CMOV-NEXT: sbbl %edx, %ebx
; X86-CMOV-NEXT: movl $0, %ebp
; X86-CMOV-NEXT: sbbl %ecx, %ebp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: sbbl %eax, %esi
; X86-CMOV-NEXT: testl %eax, %eax
; X86-CMOV-NEXT: cmovnsl %eax, %esi
; X86-CMOV-NEXT: cmovnsl %ecx, %ebp
; X86-CMOV-NEXT: cmovnsl %edx, %ebx
; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl %edi, (%eax)
; X86-CMOV-NEXT: movl %ebx, 4(%eax)
; X86-CMOV-NEXT: movl %ebp, 8(%eax)
; X86-CMOV-NEXT: movl %esi, 12(%eax)
; X86-CMOV-NEXT: popl %esi
; X86-CMOV-NEXT: popl %edi
; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: popl %ebp
; X86-CMOV-NEXT: retl $4
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: xorl %edx, %ebx
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: xorl %edx, %esi
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
; X64-LABEL: test_i128:
; X64: # %bb.0: