[SDAG] Optimize unordered comparison in soft-float mode (patch by Anton Nadolskiy)

Current implementation handles unordered comparison poorly in soft-float mode. 
Consider (a ULE b) which is a <= b. It is lowered to (ledf2(a, b) <= 0 || unorddf2(a, b) != 0) (in general). We can do better job by lowering it to (__gtdf2(a, b) <= 0). 
Such replacement is true for other CMP's (ult, ugt, uge). In general, we just call same function as for ordered case but negate comparison against zero.
Differential Revision: http://reviews.llvm.org/D10804

llvm-svn: 242280
This commit is contained in:
Alexey Bataev 2015-07-15 08:39:35 +00:00
parent 5d36b230b5
commit b9288601a3
6 changed files with 189 additions and 74 deletions

View File

@ -115,7 +115,6 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
return LowerCallTo(CLI); return LowerCallTo(CLI);
} }
/// SoftenSetCCOperands - Soften the operands of a comparison. This code is /// SoftenSetCCOperands - Soften the operands of a comparison. This code is
/// shared among BR_CC, SELECT_CC, and SETCC handlers. /// shared among BR_CC, SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
@ -127,6 +126,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Expand into one or more soft-fp libcall(s). // Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
bool ShouldInvertCC = false;
switch (CCCode) { switch (CCCode) {
case ISD::SETEQ: case ISD::SETEQ:
case ISD::SETOEQ: case ISD::SETOEQ:
@ -166,34 +166,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
(VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
break; break;
default: case ISD::SETONE:
// SETONE = SETOLT | SETOGT
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
break;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
break;
default:
// Invert CC for unordered comparisons
ShouldInvertCC = true;
switch (CCCode) { switch (CCCode) {
case ISD::SETONE: case ISD::SETULT:
// SETONE = SETOLT | SETOGT LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
// Fallthrough
case ISD::SETUGT:
LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
break;
case ISD::SETUGE:
LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
break; break;
case ISD::SETULT:
LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
break;
case ISD::SETULE: case ISD::SETULE:
LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
break; break;
case ISD::SETUEQ: case ISD::SETUGE:
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
break; break;
default: llvm_unreachable("Do not know how to soften this setcc!"); default: llvm_unreachable("Do not know how to soften this setcc!");
} }
@ -203,16 +207,20 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
EVT RetVT = getCmpLibcallReturnType(); EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = { NewLHS, NewRHS }; SDValue Ops[2] = { NewLHS, NewRHS };
NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
dl).first; dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT); NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1); CCCode = getCmpLibcallCC(LC1);
if (ShouldInvertCC)
CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
if (LC2 != RTLIB::UNKNOWN_LIBCALL) { if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode( SDValue Tmp = DAG.getNode(
ISD::SETCC, dl, ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode)); NewLHS, NewRHS, DAG.getCondCode(CCCode));
NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
dl).first; dl).first;
NewLHS = DAG.getNode( NewLHS = DAG.getNode(
ISD::SETCC, dl, ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),

View File

@ -148,14 +148,9 @@ define i1 @test_setcc2() {
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs] ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
%val = fcmp ugt fp128 %lhs, %rhs %val = fcmp ugt fp128 %lhs, %rhs
; CHECK: bl __gttf2 ; CHECK: bl __letf2
; CHECK: cmp w0, #0 ; CHECK: cmp w0, #0
; CHECK: cset [[GT:w[0-9]+]], gt ; CHECK: cset w0, gt
; CHECK: bl __unordtf2
; CHECK: cmp w0, #0
; CHECK: cset [[UNORDERED:w[0-9]+]], ne
; CHECK: orr w0, [[UNORDERED]], [[GT]]
ret i1 %val ret i1 %val
; CHECK: ret ; CHECK: ret
@ -169,31 +164,21 @@ define i32 @test_br_cc() {
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs] ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs] ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
; olt == !uge, which LLVM unfortunately "optimizes" this to. ; olt == !uge, which LLVM optimizes this to.
%cond = fcmp olt fp128 %lhs, %rhs %cond = fcmp olt fp128 %lhs, %rhs
; CHECK: bl __getf2 ; CHECK: bl __lttf2
; CHECK: cmp w0, #0 ; CHECK-NEXT: cmp w0, #0
; CHECK: cset [[OGE:w[0-9]+]], ge ; CHECK-NEXT: b.ge {{.LBB[0-9]+_[0-9]+}}
; CHECK: bl __unordtf2
; CHECK: cmp w0, #0
; CHECK: cset [[UNORDERED:w[0-9]+]], ne
; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
br i1 %cond, label %iftrue, label %iffalse br i1 %cond, label %iftrue, label %iffalse
iftrue: iftrue:
ret i32 42 ret i32 42
; CHECK-NEXT: BB# ; CHECK-NEXT: BB#
; CHECK-NEXT: movz w0, #0x2a ; CHECK-NEXT: movz w0, #0x2a
; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]] ; CHECK: ret
iffalse: iffalse:
ret i32 29 ret i32 29
; CHECK: [[RET29]]: ; CHECK: movz w0, #0x1d
; CHECK-NEXT: movz w0, #0x1d
; CHECK-NEXT: [[REALRET]]:
; CHECK: ret ; CHECK: ret
} }

View File

@ -297,7 +297,7 @@ entry:
%and2 = and i1 %lnot, %cmp1 %and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32 %and = zext i1 %and2 to i32
store i32 %and, i32* @ltsf2_result, align 4 store i32 %and, i32* @ltsf2_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_unordsf2)(${{[0-9]+}}) ;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}}) ;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
ret void ret void
} }
@ -313,7 +313,7 @@ entry:
%and2 = and i1 %lnot, %cmp1 %and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32 %and = zext i1 %and2 to i32
store i32 %and, i32* @ltdf2_result, align 4 store i32 %and, i32* @ltdf2_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_unorddf2)(${{[0-9]+}}) ;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}}) ;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
ret void ret void
} }

View File

@ -81,8 +81,9 @@ define i1 @cmp_f_ord(float %a, float %b) {
} }
define i1 @cmp_f_ugt(float %a, float %b) { define i1 @cmp_f_ugt(float %a, float %b) {
; CHECK-LABEL: cmp_f_ugt: ; CHECK-LABEL: cmp_f_ugt:
; NONE: bl __aeabi_fcmpgt ; NONE: bl __aeabi_fcmple
; NONE: bl __aeabi_fcmpun ; NONE: cmp r0, #0
; NONE-NEXT: it eq
; HARD: vcmpe.f32 ; HARD: vcmpe.f32
; HARD: movhi r0, #1 ; HARD: movhi r0, #1
%1 = fcmp ugt float %a, %b %1 = fcmp ugt float %a, %b
@ -90,8 +91,9 @@ define i1 @cmp_f_ugt(float %a, float %b) {
} }
define i1 @cmp_f_uge(float %a, float %b) { define i1 @cmp_f_uge(float %a, float %b) {
; CHECK-LABEL: cmp_f_uge: ; CHECK-LABEL: cmp_f_uge:
; NONE: bl __aeabi_fcmpge ; NONE: bl __aeabi_fcmplt
; NONE: bl __aeabi_fcmpun ; NONE: cmp r0, #0
; NONE-NEXT: it eq
; HARD: vcmpe.f32 ; HARD: vcmpe.f32
; HARD: movpl r0, #1 ; HARD: movpl r0, #1
%1 = fcmp uge float %a, %b %1 = fcmp uge float %a, %b
@ -99,8 +101,9 @@ define i1 @cmp_f_uge(float %a, float %b) {
} }
define i1 @cmp_f_ult(float %a, float %b) { define i1 @cmp_f_ult(float %a, float %b) {
; CHECK-LABEL: cmp_f_ult: ; CHECK-LABEL: cmp_f_ult:
; NONE: bl __aeabi_fcmplt ; NONE: bl __aeabi_fcmpge
; NONE: bl __aeabi_fcmpun ; NONE: cmp r0, #0
; NONE-NEXT: it eq
; HARD: vcmpe.f32 ; HARD: vcmpe.f32
; HARD: movlt r0, #1 ; HARD: movlt r0, #1
%1 = fcmp ult float %a, %b %1 = fcmp ult float %a, %b
@ -108,8 +111,9 @@ define i1 @cmp_f_ult(float %a, float %b) {
} }
define i1 @cmp_f_ule(float %a, float %b) { define i1 @cmp_f_ule(float %a, float %b) {
; CHECK-LABEL: cmp_f_ule: ; CHECK-LABEL: cmp_f_ule:
; NONE: bl __aeabi_fcmple ; NONE: bl __aeabi_fcmpgt
; NONE: bl __aeabi_fcmpun ; NONE: cmp r0, #0
; NONE-NEXT: it eq
; HARD: vcmpe.f32 ; HARD: vcmpe.f32
; HARD: movle r0, #1 ; HARD: movle r0, #1
%1 = fcmp ule float %a, %b %1 = fcmp ule float %a, %b
@ -214,10 +218,8 @@ define i1 @cmp_d_ord(double %a, double %b) {
} }
define i1 @cmp_d_ugt(double %a, double %b) { define i1 @cmp_d_ugt(double %a, double %b) {
; CHECK-LABEL: cmp_d_ugt: ; CHECK-LABEL: cmp_d_ugt:
; NONE: bl __aeabi_dcmpgt ; NONE: bl __aeabi_dcmple
; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmple
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64 ; DP: vcmpe.f64
; DP: movhi r0, #1 ; DP: movhi r0, #1
%1 = fcmp ugt double %a, %b %1 = fcmp ugt double %a, %b
@ -226,10 +228,8 @@ define i1 @cmp_d_ugt(double %a, double %b) {
define i1 @cmp_d_ult(double %a, double %b) { define i1 @cmp_d_ult(double %a, double %b) {
; CHECK-LABEL: cmp_d_ult: ; CHECK-LABEL: cmp_d_ult:
; NONE: bl __aeabi_dcmplt ; NONE: bl __aeabi_dcmpge
; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmpge
; SP: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64 ; DP: vcmpe.f64
; DP: movlt r0, #1 ; DP: movlt r0, #1
%1 = fcmp ult double %a, %b %1 = fcmp ult double %a, %b
@ -268,10 +268,8 @@ define i1 @cmp_d_ueq(double %a, double %b) {
define i1 @cmp_d_uge(double %a, double %b) { define i1 @cmp_d_uge(double %a, double %b) {
; CHECK-LABEL: cmp_d_uge: ; CHECK-LABEL: cmp_d_uge:
; NONE: bl __aeabi_dcmpge ; NONE: bl __aeabi_dcmplt
; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmpge
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64 ; DP: vcmpe.f64
; DP: movpl r0, #1 ; DP: movpl r0, #1
%1 = fcmp uge double %a, %b %1 = fcmp uge double %a, %b
@ -280,10 +278,8 @@ define i1 @cmp_d_uge(double %a, double %b) {
define i1 @cmp_d_ule(double %a, double %b) { define i1 @cmp_d_ule(double %a, double %b) {
; CHECK-LABEL: cmp_d_ule: ; CHECK-LABEL: cmp_d_ule:
; NONE: bl __aeabi_dcmple ; NONE: bl __aeabi_dcmpgt
; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmple
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64 ; DP: vcmpe.f64
; DP: movle r0, #1 ; DP: movle r0, #1
%1 = fcmp ule double %a, %b %1 = fcmp ule double %a, %b

View File

@ -109,9 +109,8 @@ declare double @llvm.fabs.f64(double %Val)
define double @abs_d(double %a) { define double @abs_d(double %a) {
; CHECK-LABEL: abs_d: ; CHECK-LABEL: abs_d:
; NONE: bic r1, r1, #-2147483648 ; NONE: bic r1, r1, #-2147483648
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmpun
; SP: bl __aeabi_dsub ; SP: bl __aeabi_dsub
; SP: bl __aeabi_dcmple
; DP: vabs.f64 d0, d0 ; DP: vabs.f64 d0, d0
%1 = call double @llvm.fabs.f64(double %a) %1 = call double @llvm.fabs.f64(double %a)
ret double %1 ret double %1

View File

@ -0,0 +1,127 @@
; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s
define i1 @test1(double %d) #0 {
entry:
%cmp = fcmp ule double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test1:
; CHECK: calll __gtdf2
; CHECK: setle
; CHECK: retl
define i1 @test2(double %d) #0 {
entry:
%cmp = fcmp ult double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test2:
; CHECK: calll __gedf2
; CHECK: sets
; CHECK: retl
define i1 @test3(double %d) #0 {
entry:
%cmp = fcmp ugt double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test3:
; CHECK: calll __ledf2
; CHECK: setg
; CHECK: retl
define i1 @test4(double %d) #0 {
entry:
%cmp = fcmp uge double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test4:
; CHECK: calll __ltdf2
; CHECK: setns
; CHECK: retl
define i1 @test5(double %d) #0 {
entry:
%cmp = fcmp ole double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test5:
; CHECK: calll __ledf2
; CHECK: setle
; CHECK: retl
define i1 @test6(double %d) #0 {
entry:
%cmp = fcmp olt double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test6:
; CHECK: calll __ltdf2
; CHECK: sets
; CHECK: retl
define i1 @test7(double %d) #0 {
entry:
%cmp = fcmp ogt double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test7:
; CHECK: calll __gtdf2
; CHECK: setg
; CHECK: retl
define i1 @test8(double %d) #0 {
entry:
%cmp = fcmp oge double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test8:
; CHECK: calll __gedf2
; CHECK: setns
; CHECK: retl
define i1 @test9(double %d) #0 {
entry:
%cmp = fcmp oeq double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test9:
; CHECK: calll __eqdf2
; CHECK: sete
; CHECK: retl
define i1 @test10(double %d) #0 {
entry:
%cmp = fcmp ueq double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test10:
; CHECK: calll __eqdf2
; CHECK: sete
; CHECK: calll __unorddf2
; CHECK: setne
; CHECK: retl
define i1 @test11(double %d) #0 {
entry:
%cmp = fcmp one double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test11:
; CHECK: calll __gtdf2
; CHECK: setg
; CHECK: calll __ltdf2
; CHECK: sets
; CHECK: retl
define i1 @test12(double %d) #0 {
entry:
%cmp = fcmp une double %d, 0.000000e+00
ret i1 %cmp
}
; CHECK-LABEL: test12:
; CHECK: calll __nedf2
; CHECK: setne
; CHECK: retl
attributes #0 = { "use-soft-float"="true" }