Improve sqrt estimate algorithm (fast-math)

This patch changes the fast-math implementation for calculating sqrt(x) from:
y = 1 / (1 / sqrt(x))
to:
y = x * (1 / sqrt(x))

This has 2 benefits: less code / faster code and one less estimate instruction 
that may lose precision.

The only target that will be affected (until http://reviews.llvm.org/D5658 is approved)
is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf
or vector sqrtf and 4 less flops for a double-precision sqrt. 
We also eliminate a constant load and extra register usage.

Differential Revision: http://reviews.llvm.org/D5682

llvm-svn: 219445
This commit is contained in:
Sanjay Patel 2014-10-09 21:26:35 +00:00
parent 6d28da10e5
commit 3d497cd778
2 changed files with 18 additions and 26 deletions

View File

@ -7088,26 +7088,25 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath) { if (DAG.getTarget().Options.UnsafeFPMath) {
// Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt. // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
AddToWorklist(RV.getNode()); AddToWorklist(RV.getNode());
RV = BuildReciprocalEstimate(RV); EVT VT = RV.getValueType();
if (RV.getNode()) { RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
// Unfortunately, RV is now NaN if the input was exactly 0. AddToWorklist(RV.getNode());
// Select out this case and force the answer to 0.
EVT VT = RV.getValueType();
SDValue Zero = DAG.getConstantFP(0.0, VT);
SDValue ZeroCmp =
DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, // Unfortunately, RV is now NaN if the input was exactly 0.
SDLoc(N), VT, ZeroCmp, Zero, RV); // Select out this case and force the answer to 0.
return RV; SDValue Zero = DAG.getConstantFP(0.0, VT);
} SDValue ZeroCmp =
DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
SDLoc(N), VT, ZeroCmp, Zero, RV);
return RV;
} }
} }
return SDValue(); return SDValue();

View File

@ -197,11 +197,7 @@ define double @foo3(double %a) nounwind {
; CHECK-NEXT: fmul ; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd ; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul ; CHECK-NEXT: fmul
; CHECK-NEXT: fre ; CHECK-NEXT: fmul
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK: blr ; CHECK: blr
; CHECK-SAFE: @foo3 ; CHECK-SAFE: @foo3
@ -220,9 +216,7 @@ define float @goo3(float %a) nounwind {
; CHECK: fmuls ; CHECK: fmuls
; CHECK-NEXT: fmadds ; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls
; CHECK-NEXT: fres ; CHECK-NEXT: fmuls
; CHECK-NEXT: fnmsubs
; CHECK-NEXT: fmadds
; CHECK: blr ; CHECK: blr
; CHECK-SAFE: @goo3 ; CHECK-SAFE: @goo3
@ -236,7 +230,6 @@ define <4 x float> @hoo3(<4 x float> %a) nounwind {
; CHECK: @hoo3 ; CHECK: @hoo3
; CHECK: vrsqrtefp ; CHECK: vrsqrtefp
; CHECK-DAG: vrefp
; CHECK-DAG: vcmpeqfp ; CHECK-DAG: vcmpeqfp
; CHECK-SAFE: @hoo3 ; CHECK-SAFE: @hoo3