forked from OSchip/llvm-project
Neuter the optimization I implemented with r107852 and r108258 which turn some
floating point equality comparisons into integer ones with -ffast-math. The issue is the optimization causes +0.0 != -0.0. Now the optimization is only done when one side is known to be 0.0. The other side's sign bit is masked off for the comparison. rdar://10964603 llvm-svn: 151861
This commit is contained in:
parent
396260c484
commit
d12af5dc69
|
@ -2978,12 +2978,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue Dest = Op.getOperand(4);
|
SDValue Dest = Op.getOperand(4);
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
bool SeenZero = false;
|
bool LHSSeenZero = false;
|
||||||
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
|
bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
|
||||||
canChangeToInt(RHS, SeenZero, Subtarget) &&
|
bool RHSSeenZero = false;
|
||||||
// If one of the operand is zero, it's safe to ignore the NaN case since
|
bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
|
||||||
// we only care about equality comparisons.
|
if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
|
||||||
(SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
|
|
||||||
// If unsafe fp math optimization is enabled and there are no other uses of
|
// If unsafe fp math optimization is enabled and there are no other uses of
|
||||||
// the CMP operands, and the condition code is EQ or NE, we can optimize it
|
// the CMP operands, and the condition code is EQ or NE, we can optimize it
|
||||||
// to an integer comparison.
|
// to an integer comparison.
|
||||||
|
@ -2992,10 +2991,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
|
||||||
else if (CC == ISD::SETUNE)
|
else if (CC == ISD::SETUNE)
|
||||||
CC = ISD::SETNE;
|
CC = ISD::SETNE;
|
||||||
|
|
||||||
|
SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
|
||||||
SDValue ARMcc;
|
SDValue ARMcc;
|
||||||
if (LHS.getValueType() == MVT::f32) {
|
if (LHS.getValueType() == MVT::f32) {
|
||||||
LHS = bitcastf32Toi32(LHS, DAG);
|
LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||||
RHS = bitcastf32Toi32(RHS, DAG);
|
bitcastf32Toi32(LHS, DAG), Mask);
|
||||||
|
RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||||
|
bitcastf32Toi32(RHS, DAG), Mask);
|
||||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
|
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
|
||||||
|
@ -3006,6 +3008,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue RHS1, RHS2;
|
SDValue RHS1, RHS2;
|
||||||
expandf64Toi32(LHS, DAG, LHS1, LHS2);
|
expandf64Toi32(LHS, DAG, LHS1, LHS2);
|
||||||
expandf64Toi32(RHS, DAG, RHS1, RHS2);
|
expandf64Toi32(RHS, DAG, RHS1, RHS2);
|
||||||
|
LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
|
||||||
|
RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
|
||||||
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
|
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
|
||||||
ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||||
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
|
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||||
|
|
|
@ -1,24 +1,16 @@
|
||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
|
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
|
||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
|
|
||||||
; rdar://7461510
|
; rdar://7461510
|
||||||
|
; rdar://10964603
|
||||||
|
|
||||||
|
; Disable this optimization unless we know one of them is zero.
|
||||||
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
|
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; FINITE: t1:
|
; CHECK: t1:
|
||||||
; FINITE-NOT: vldr
|
; CHECK: vldr s0,
|
||||||
; FINITE: ldr
|
; CHECK: vldr s1,
|
||||||
; FINITE: ldr
|
; CHECK: vcmpe.f32 s1, s0
|
||||||
; FINITE: cmp r0, r1
|
; CHECK: vmrs apsr_nzcv, fpscr
|
||||||
; FINITE-NOT: vcmpe.f32
|
; CHECK: beq
|
||||||
; FINITE-NOT: vmrs
|
|
||||||
; FINITE: beq
|
|
||||||
|
|
||||||
; NAN: t1:
|
|
||||||
; NAN: vldr s0,
|
|
||||||
; NAN: vldr s1,
|
|
||||||
; NAN: vcmpe.f32 s1, s0
|
|
||||||
; NAN: vmrs apsr_nzcv, fpscr
|
|
||||||
; NAN: beq
|
|
||||||
%0 = load float* %a
|
%0 = load float* %a
|
||||||
%1 = load float* %b
|
%1 = load float* %b
|
||||||
%2 = fcmp une float %0, %1
|
%2 = fcmp une float %0, %1
|
||||||
|
@ -33,17 +25,21 @@ bb2:
|
||||||
ret i32 %4
|
ret i32 %4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; If one side is zero, the other size sign bit is masked off to allow
|
||||||
|
; +0.0 == -0.0
|
||||||
define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
|
define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; FINITE: t2:
|
; CHECK: t2:
|
||||||
; FINITE-NOT: vldr
|
; CHECK-NOT: vldr
|
||||||
; FINITE: ldrd r0, r1, [r0]
|
; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
|
||||||
; FINITE-NOT: b LBB
|
; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
|
||||||
; FINITE: cmp r0, #0
|
; CHECK-NOT: b LBB
|
||||||
; FINITE: cmpeq r1, #0
|
; CHECK: cmp [[REG1]], #0
|
||||||
; FINITE-NOT: vcmpe.f32
|
; CHECK: bfc [[REG2]], #31, #1
|
||||||
; FINITE-NOT: vmrs
|
; CHECK: cmpeq [[REG2]], #0
|
||||||
; FINITE: bne
|
; CHECK-NOT: vcmpe.f32
|
||||||
|
; CHECK-NOT: vmrs
|
||||||
|
; CHECK: bne
|
||||||
%0 = load double* %a
|
%0 = load double* %a
|
||||||
%1 = fcmp oeq double %0, 0.000000e+00
|
%1 = fcmp oeq double %0, 0.000000e+00
|
||||||
br i1 %1, label %bb1, label %bb2
|
br i1 %1, label %bb1, label %bb2
|
||||||
|
@ -59,13 +55,14 @@ bb2:
|
||||||
|
|
||||||
define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
|
define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; FINITE: t3:
|
; CHECK: t3:
|
||||||
; FINITE-NOT: vldr
|
; CHECK-NOT: vldr
|
||||||
; FINITE: ldr r0, [r0]
|
; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
|
||||||
; FINITE: cmp r0, #0
|
; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
|
||||||
; FINITE-NOT: vcmpe.f32
|
; CHECK: tst [[REG3]], [[REG4]]
|
||||||
; FINITE-NOT: vmrs
|
; CHECK-NOT: vcmpe.f32
|
||||||
; FINITE: bne
|
; CHECK-NOT: vmrs
|
||||||
|
; CHECK: bne
|
||||||
%0 = load float* %a
|
%0 = load float* %a
|
||||||
%1 = fcmp oeq float %0, 0.000000e+00
|
%1 = fcmp oeq float %0, 0.000000e+00
|
||||||
br i1 %1, label %bb1, label %bb2
|
br i1 %1, label %bb1, label %bb2
|
||||||
|
|
Loading…
Reference in New Issue