Reassoc FMF should not optimize FMA(a, 0, b) to (b)

Optimizing (a * 0 + b) to (b) requires assuming that a is finite and not NaN. DAGCombiner will do this optimization when the reassoc fast math flag is set, which is not correct. Change DAGCombiner to only consider UnsafeMath for this optimization. Differential Revision: https://reviews.llvm.org/D130232 Co-authored-by: Andrea Faulds <andrea.faulds@arm.com>
2022-07-26 09:39:12 +01:00 · 2022-07-26 09:39:12 +01:00 · c8d91b07bb
parent 1bc7b06ffd
commit c8d91b07bb
2 changed files with 19 additions and 4 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -15002,7 +15002,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
  // FMA nodes have flags that propagate to the created nodes.
  SelectionDAG::FlagInserter FlagsInserter(DAG, N);

-  bool UnsafeFPMath =
+  bool CanReassociate =
      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();

  // Constant fold FMA.
@ -15026,7 +15026,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
       CostN1 == TargetLowering::NegatibleCost::Cheaper))
    return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);

-  if (UnsafeFPMath) {
+  // FIXME: use fast math flags instead of Options.UnsafeFPMath
+  if (Options.UnsafeFPMath) {
    if (N0CFP && N0CFP->isZero())
      return N2;
    if (N1CFP && N1CFP->isZero())
@ -15043,7 +15044,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     !DAG.isConstantFPBuildVectorOrConstantFP(N1))
    return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);

-  if (UnsafeFPMath) {
+  if (CanReassociate) {
    // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
    if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
        DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
@ -15084,7 +15085,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    }
  }

-  if (UnsafeFPMath) {
+  if (CanReassociate) {
    // (fma x, c, x) -> (fmul x, (c+1))
    if (N1CFP && N0 == N2) {
      return DAG.getNode(
--- a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
+++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
@ -54,3 +54,17 @@ define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C)
 	%tmp2 = fsub <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
+
+; Regression test: contract FMF allows folding (A * 0 + B) to FMA(A, 0, B), but
+; reassoc FMF must not allow further folding to just (B) without additional
+; FMFs (ninf, nnan)
+define float @fma_zero(float %A, float %B) {
+; CHECK-LABEL: fma_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    fmadd s0, s0, s2, s1
+; CHECK-NEXT:    ret
+	%tmp1 = fmul contract reassoc float %A, 0.0e+0;
+	%tmp2 = fadd contract reassoc float %B, %tmp1;
+	ret float %tmp2
+}