Refactor reciprocal and reciprocal square root estimate into target-independent functions (part 2).

This is purely refactoring. No functional changes intended. PowerPC is the only target
that is currently using this interface.

The ultimate goal is to allow targets other than PowerPC (certainly X86 and Aarch64) to turn this:

z = y / sqrt(x)

into:

z = y * rsqrte(x)

And:

z = y / x

into:

z = y * rcpe(x)

using whatever HW magic they can use. See http://llvm.org/bugs/show_bug.cgi?id=20900 .

There is one hook in TargetLowering to get the target-specific opcode for an estimate instruction
along with the number of refinement steps needed to make the estimate usable.

Differential Revision: http://reviews.llvm.org/D5484

llvm-svn: 218553
This commit is contained in:
Sanjay Patel 2014-09-26 23:01:47 +00:00
parent d213aab71d
commit bdf1e38856
5 changed files with 214 additions and 240 deletions

View File

@ -2624,10 +2624,21 @@ public:
return SDValue();
}
virtual SDValue BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const {
/// Hooks for building estimates in place of, for example, slower divisions
/// and square roots. These are not builder functions themselves, just the
/// target-specific variables needed for building the estimate algorithm.
/// Return an estimate value for the input opcode and input operand.
/// The RefinementSteps output is the number of refinement iterations
/// required to generate a sufficient (though not necessarily IEEE-754
/// compliant) estimate for the value type.
/// An empty SDValue return means no estimate sequence can be created.
virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
return SDValue();
}
//===--------------------------------------------------------------------===//
// Legalization utility functions
//

View File

@ -276,6 +276,7 @@ namespace {
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
@ -326,7 +327,8 @@ namespace {
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildRSQRTE(SDNode *N);
SDValue BuildReciprocalEstimate(SDValue Op);
SDValue BuildRsqrtEstimate(SDValue Op);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@ -1307,6 +1309,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
@ -6976,6 +6979,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
@ -7007,10 +7011,37 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
DAG.getConstantFP(Recip, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (SDValue SqrtOp = BuildRSQRTE(N))
return SqrtOp;
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
if (SDValue RV = BuildReciprocalEstimate(N1)) {
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
@ -7042,6 +7073,33 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return SDValue();
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath) {
// Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt.
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
AddToWorklist(RV.getNode());
RV = BuildReciprocalEstimate(RV);
if (RV.getNode()) {
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
EVT VT = RV.getValueType();
SDValue Zero = DAG.getConstantFP(0.0, VT);
SDValue ZeroCmp =
DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
SDLoc(N), VT, ZeroCmp, Zero, RV);
return RV;
}
}
}
return SDValue();
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -11702,36 +11760,92 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
/// Given an ISD::FDIV node with either a direct or indirect ISD::FSQRT operand,
/// generate a DAG expression using a reciprocal square root estimate op.
SDValue DAGCombiner::BuildRSQRTE(SDNode *N) {
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
if (Level >= AfterLegalizeDAG)
return SDValue();
// Expose the DAG combiner to the target combiner implementations.
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0), DCI)) {
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
unsigned Iterations;
if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
// F(X) = A X - 1 [which has a zero at X = 1/A]
// =>
// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
// does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, VT);
AddToWorklist(Est.getNode());
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
AddToWorklist(Est.getNode());
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI)) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI)) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
return Est;
}
return SDValue();
}
SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
if (Level >= AfterLegalizeDAG)
return SDValue();
// Expose the DAG combiner to the target combiner implementations.
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
unsigned Iterations;
if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
// =>
// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
// As a result, we precompute A/2 prior to the iteration loop.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT);
AddToWorklist(Est.getNode());
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op);
AddToWorklist(HalfArg.getNode());
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op);
AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst);
AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
AddToWorklist(Est.getNode());
}
return Est;
}
return SDValue();

View File

@ -7458,138 +7458,34 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
DAGCombinerInfo &DCI) const {
if (DCI.isAfterLegalizeVectorOps())
return SDValue();
EVT VT = Op.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
// F(X) = A X - 1 [which has a zero at X = 1/A]
// =>
// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
// does not require additional intermediate precision]
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(Op);
SDValue FPOne =
DAG.getConstantFP(1.0, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 &&
"Unknown vector type");
FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
FPOne, FPOne, FPOne, FPOne);
}
SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
DCI.AddToWorklist(Est.getNode());
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
DCI.AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
DCI.AddToWorklist(Est.getNode());
}
return Est;
SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
EVT VT = Operand.getValueType();
SDValue RV;
if (Opcode == ISD::FSQRT) {
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()))
RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
} else if (Opcode == ISD::FDIV) {
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()))
RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue PPCTargetLowering::BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const {
if (DCI.isAfterLegalizeVectorOps())
return SDValue();
EVT VT = Op.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
// =>
// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
// As a result, we precompute A/2 prior to the iteration loop.
if (RV.getNode()) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
// 2^-14. IEEE float has 23 digits and double has 52 digits.
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(Op);
SDValue FPThreeHalves =
DAG.getConstantFP(1.5, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 &&
"Unknown vector type");
FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
FPThreeHalves, FPThreeHalves,
FPThreeHalves, FPThreeHalves);
}
SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
DCI.AddToWorklist(Est.getNode());
// We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
DCI.AddToWorklist(HalfArg.getNode());
HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
DCI.AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
DCI.AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
DCI.AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
DCI.AddToWorklist(Est.getNode());
}
return Est;
++RefinementSteps;
}
return SDValue();
return RV;
}
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
@ -8316,55 +8212,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC:
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::FDIV: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
}
}
break;
case ISD::FSQRT: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
// reciprocal sqrt.
SDValue RV = BuildRSQRTE(N->getOperand(0), DCI);
if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode()) {
// Unfortunately, RV is now NaN if the input was exactly 0. Select out
// this case and force the answer to 0.
EVT VT = RV.getValueType();
SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
}
SDValue ZeroCmp =
DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
DCI.AddToWorklist(ZeroCmp.getNode());
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
ZeroCmp, Zero, RV);
return RV;
}
}
}
break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {

View File

@ -700,8 +700,10 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue getEstimate(unsigned Opcode, SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};

View File

@ -16,12 +16,12 @@ define double @foo(double %a, double %b) nounwind {
; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub
; CHECK: fmul
; CHECK: fmadd
; CHECK: fmul
; CHECK: fmul
; CHECK: fmadd
; CHECK: fmul
; CHECK: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK: blr
; CHECK-SAFE: @foo
@ -85,10 +85,10 @@ define float @goo(float %a, float %b) nounwind {
; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs
; CHECK: fmuls
; CHECK: fmadds
; CHECK: fmuls
; CHECK: fmuls
; CHECK: blr
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @goo
; CHECK-SAFE: fsqrts
@ -117,10 +117,10 @@ define double @foo2(double %a, double %b) nounwind {
; CHECK-DAG: fre
; CHECK-DAG: fnmsub
; CHECK: fmadd
; CHECK: fnmsub
; CHECK: fmadd
; CHECK: fmul
; CHECK: blr
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: blr
; CHECK-SAFE: @foo2
; CHECK-SAFE: fdiv
@ -135,8 +135,8 @@ define float @goo2(float %a, float %b) nounwind {
; CHECK-DAG: fres
; CHECK-DAG: fnmsubs
; CHECK: fmadds
; CHECK: fmuls
; CHECK: blr
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @goo2
; CHECK-SAFE: fdivs
@ -164,16 +164,16 @@ define double @foo3(double %a) nounwind {
; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub
; CHECK: fmul
; CHECK: fmadd
; CHECK: fmul
; CHECK: fmul
; CHECK: fmadd
; CHECK: fmul
; CHECK: fre
; CHECK: fnmsub
; CHECK: fmadd
; CHECK: fnmsub
; CHECK: fmadd
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fre
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK: blr
; CHECK-SAFE: @foo3
@ -190,11 +190,11 @@ define float @goo3(float %a) nounwind {
; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs
; CHECK: fmuls
; CHECK: fmadds
; CHECK: fmuls
; CHECK: fres
; CHECK: fnmsubs
; CHECK: fmadds
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: fres
; CHECK-NEXT: fnmsubs
; CHECK-NEXT: fmadds
; CHECK: blr
; CHECK-SAFE: @goo3