forked from OSchip/llvm-project
[SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported.
Rather than using SELECT instructions, use SRA, UADDO/ADDCARRY and XORs to expand ABS. This is the multi-part version of the sequence we use in LegalizeDAG. It's also the same as the Custom sequence uses for i64 on 32-bit and i128 on 64-bit. So we can remove the X86 customization. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D87215
This commit is contained in:
parent
8b30067919
commit
da79b1eecc
|
@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
|
|||
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
GetExpandedInteger(N0, Lo, Hi);
|
||||
EVT NVT = Lo.getValueType();
|
||||
|
||||
// If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
|
||||
// use in LegalizeDAG. The ADD part of the expansion is based on
|
||||
// ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
|
||||
// ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
|
||||
// if needed. Shift expansion has a special case for filling with sign bits
|
||||
// so that we will only end up with one SRA.
|
||||
bool HasAddCarry = TLI.isOperationLegalOrCustom(
|
||||
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
|
||||
if (HasAddCarry) {
|
||||
EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
|
||||
SDValue Sign =
|
||||
DAG.getNode(ISD::SRA, dl, NVT, Hi,
|
||||
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
|
||||
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
|
||||
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
|
||||
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
|
||||
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
|
||||
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
|
||||
return;
|
||||
}
|
||||
|
||||
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
|
||||
DAG.getConstant(0, dl, VT), N0);
|
||||
SDValue NegLo, NegHi;
|
||||
SplitInteger(Neg, NegLo, NegHi);
|
||||
|
||||
GetExpandedInteger(N0, Lo, Hi);
|
||||
EVT NVT = Lo.getValueType();
|
||||
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
|
||||
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
|
||||
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
|
||||
|
|
|
@ -193,10 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
if (Subtarget.hasCMov()) {
|
||||
setOperationAction(ISD::ABS , MVT::i16 , Custom);
|
||||
setOperationAction(ISD::ABS , MVT::i32 , Custom);
|
||||
if (Subtarget.is64Bit())
|
||||
setOperationAction(ISD::ABS , MVT::i64 , Custom);
|
||||
}
|
||||
setOperationAction(ISD::ABS , MVT::i64 , Custom);
|
||||
if (Subtarget.is64Bit())
|
||||
setOperationAction(ISD::ABS , MVT::i128 , Custom);
|
||||
|
||||
// Funnel shifts.
|
||||
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
|
||||
|
@ -29720,31 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
case ISD::ABS: {
|
||||
assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) &&
|
||||
"Unexpected type (!= i64) on ABS.");
|
||||
assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) &&
|
||||
"Unexpected type (!= i128) on ABS.");
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32;
|
||||
SDValue Lo, Hi, Tmp;
|
||||
SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
|
||||
|
||||
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
|
||||
DAG.getConstant(0, dl, HalfT));
|
||||
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
|
||||
DAG.getConstant(1, dl, HalfT));
|
||||
Tmp = DAG.getNode(
|
||||
ISD::SRA, dl, HalfT, Hi,
|
||||
DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl));
|
||||
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
|
||||
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
|
||||
SDValue(Lo.getNode(), 1));
|
||||
Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
|
||||
Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
|
||||
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi));
|
||||
return;
|
||||
}
|
||||
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
|
||||
case X86ISD::FMINC:
|
||||
case X86ISD::FMIN:
|
||||
|
|
|
@ -40,33 +40,24 @@ entry:
|
|||
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
|
||||
; CHECK-LABEL: abs_v2i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
; CHECK-NEXT: rsbs.w lr, r1, #0
|
||||
; CHECK-NEXT: sbc.w r2, r12, r0
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r3, mi
|
||||
; CHECK-NEXT: ands r3, r3, #1
|
||||
; CHECK-NEXT: csel r1, lr, r1, ne
|
||||
; CHECK-NEXT: csel r0, r2, r0, ne
|
||||
; CHECK-NEXT: vmov.32 q1[0], r1
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r0
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: adc.w r2, r0, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r2, r2, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov r0, s3
|
||||
; CHECK-NEXT: rsbs r2, r1, #0
|
||||
; CHECK-NEXT: sbc.w r12, r12, r0
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: cset r3, mi
|
||||
; CHECK-NEXT: ands r3, r3, #1
|
||||
; CHECK-NEXT: csel r1, r2, r1, ne
|
||||
; CHECK-NEXT: csel r0, r12, r0, ne
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[2], r1
|
||||
; CHECK-NEXT: adc.w r1, r0, r0, asr #31
|
||||
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.32 q1[3], r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp slt <2 x i64> %s1, zeroinitializer
|
||||
%1 = sub nsw <2 x i64> zeroinitializer, %s1
|
||||
|
|
|
@ -144,35 +144,31 @@ define i128 @test_i128(i128 %a) nounwind {
|
|||
;
|
||||
; X86-LABEL: test_i128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: sarl $31, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: addl %edx, %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: xorl %esi, %esi
|
||||
; X86-NEXT: negl %edi
|
||||
; X86-NEXT: movl $0, %ebx
|
||||
; X86-NEXT: sbbl %edx, %ebx
|
||||
; X86-NEXT: movl $0, %ebp
|
||||
; X86-NEXT: sbbl %ecx, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: sbbl %eax, %esi
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: cmovnsl %eax, %esi
|
||||
; X86-NEXT: cmovnsl %ecx, %ebp
|
||||
; X86-NEXT: cmovnsl %edx, %ebx
|
||||
; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %edi, (%eax)
|
||||
; X86-NEXT: movl %ebx, 4(%eax)
|
||||
; X86-NEXT: movl %ebp, 8(%eax)
|
||||
; X86-NEXT: movl %esi, 12(%eax)
|
||||
; X86-NEXT: adcl %edx, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: adcl %edx, %ebx
|
||||
; X86-NEXT: adcl %edx, %ecx
|
||||
; X86-NEXT: xorl %edx, %ecx
|
||||
; X86-NEXT: xorl %edx, %ebx
|
||||
; X86-NEXT: xorl %edx, %edi
|
||||
; X86-NEXT: xorl %edx, %esi
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: movl %edi, 4(%eax)
|
||||
; X86-NEXT: movl %ebx, 8(%eax)
|
||||
; X86-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl $4
|
||||
%r = call i128 @llvm.abs.i128(i128 %a, i1 false)
|
||||
ret i128 %r
|
||||
|
|
|
@ -121,73 +121,34 @@ define i64 @test_i64(i64 %a) nounwind {
|
|||
}
|
||||
|
||||
define i128 @test_i128(i128 %a) nounwind {
|
||||
; X86-NO-CMOV-LABEL: test_i128:
|
||||
; X86-NO-CMOV: # %bb.0:
|
||||
; X86-NO-CMOV-NEXT: pushl %ebp
|
||||
; X86-NO-CMOV-NEXT: pushl %ebx
|
||||
; X86-NO-CMOV-NEXT: pushl %edi
|
||||
; X86-NO-CMOV-NEXT: pushl %esi
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NO-CMOV-NEXT: xorl %ecx, %ecx
|
||||
; X86-NO-CMOV-NEXT: negl %ebp
|
||||
; X86-NO-CMOV-NEXT: movl $0, %ebx
|
||||
; X86-NO-CMOV-NEXT: sbbl %edx, %ebx
|
||||
; X86-NO-CMOV-NEXT: movl $0, %edi
|
||||
; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NO-CMOV-NEXT: sbbl %esi, %ecx
|
||||
; X86-NO-CMOV-NEXT: testl %esi, %esi
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NO-CMOV-NEXT: js .LBB4_2
|
||||
; X86-NO-CMOV-NEXT: # %bb.1:
|
||||
; X86-NO-CMOV-NEXT: movl %esi, %ecx
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NO-CMOV-NEXT: movl %edx, %ebx
|
||||
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NO-CMOV-NEXT: .LBB4_2:
|
||||
; X86-NO-CMOV-NEXT: movl %ebp, (%eax)
|
||||
; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax)
|
||||
; X86-NO-CMOV-NEXT: movl %edi, 8(%eax)
|
||||
; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-NO-CMOV-NEXT: popl %esi
|
||||
; X86-NO-CMOV-NEXT: popl %edi
|
||||
; X86-NO-CMOV-NEXT: popl %ebx
|
||||
; X86-NO-CMOV-NEXT: popl %ebp
|
||||
; X86-NO-CMOV-NEXT: retl $4
|
||||
;
|
||||
; X86-CMOV-LABEL: test_i128:
|
||||
; X86-CMOV: # %bb.0:
|
||||
; X86-CMOV-NEXT: pushl %ebp
|
||||
; X86-CMOV-NEXT: pushl %ebx
|
||||
; X86-CMOV-NEXT: pushl %edi
|
||||
; X86-CMOV-NEXT: pushl %esi
|
||||
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-CMOV-NEXT: xorl %esi, %esi
|
||||
; X86-CMOV-NEXT: negl %edi
|
||||
; X86-CMOV-NEXT: movl $0, %ebx
|
||||
; X86-CMOV-NEXT: sbbl %edx, %ebx
|
||||
; X86-CMOV-NEXT: movl $0, %ebp
|
||||
; X86-CMOV-NEXT: sbbl %ecx, %ebp
|
||||
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-CMOV-NEXT: sbbl %eax, %esi
|
||||
; X86-CMOV-NEXT: testl %eax, %eax
|
||||
; X86-CMOV-NEXT: cmovnsl %eax, %esi
|
||||
; X86-CMOV-NEXT: cmovnsl %ecx, %ebp
|
||||
; X86-CMOV-NEXT: cmovnsl %edx, %ebx
|
||||
; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
|
||||
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-CMOV-NEXT: movl %edi, (%eax)
|
||||
; X86-CMOV-NEXT: movl %ebx, 4(%eax)
|
||||
; X86-CMOV-NEXT: movl %ebp, 8(%eax)
|
||||
; X86-CMOV-NEXT: movl %esi, 12(%eax)
|
||||
; X86-CMOV-NEXT: popl %esi
|
||||
; X86-CMOV-NEXT: popl %edi
|
||||
; X86-CMOV-NEXT: popl %ebx
|
||||
; X86-CMOV-NEXT: popl %ebp
|
||||
; X86-CMOV-NEXT: retl $4
|
||||
; X86-LABEL: test_i128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: sarl $31, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: addl %edx, %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: adcl %edx, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: adcl %edx, %ebx
|
||||
; X86-NEXT: adcl %edx, %ecx
|
||||
; X86-NEXT: xorl %edx, %ecx
|
||||
; X86-NEXT: xorl %edx, %ebx
|
||||
; X86-NEXT: xorl %edx, %edi
|
||||
; X86-NEXT: xorl %edx, %esi
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: movl %edi, 4(%eax)
|
||||
; X86-NEXT: movl %ebx, 8(%eax)
|
||||
; X86-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
; X64-LABEL: test_i128:
|
||||
; X64: # %bb.0:
|
||||
|
|
Loading…
Reference in New Issue