[ISel] Fix DAG divergence after new FMA combine

D132837 introduced a new DAG combine that used MorphNodeTo to morph an FMUL into an FMA. It turns out that MorphNodeTo does not properly update the divergence bit for users of the morphed node, causing an assertion failure on the new test case: llc: SelectionDAG.cpp:10486: void llvm::SelectionDAG::VerifyDAGDivergence(): Assertion `calculateDivergence(N) == N->isDivergent() && "Divergence bit inconsistency detected"' failed. Fixing MorphNodeTo to propagate the divergence bit is tricky because of the way it is used to select machine instructions, so use getNode and ReplaceAllUsesOfValueWith instead. Differential Revision: https://reviews.llvm.org/D134810
2022-09-28 15:20:08 +01:00 · 2022-09-28 15:20:08 +01:00 · 2c12a04bba
parent 2ad41f97f8
commit 2c12a04bba
2 changed files with 18 additions and 4 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -14305,10 +14305,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
      if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
        SDValue C = FMul.getOperand(0);
        SDValue D = FMul.getOperand(1);
-
-        DAG.MorphNodeTo(FMul.getNode(), PreferredFusedOpcode, FMul->getVTList(),
-                        {C, D, E});
-
+        SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+        DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
        return FMA;
      }

--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@ -221,6 +221,22 @@ define float @fmac_sequence_innermost_fmul_swapped_operands(float %a, float %b,
  ret float %t5
 }

+define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) #0 {
+; GCN-LABEL: fmac_sequence_innermost_fmul_sgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_mac_f32_e64 v0, s2, s3
+; GCN-NEXT:    v_fmac_f32_e64 v0, s0, s1
+; GCN-NEXT:    v_fmac_f32_e64 v0, s4, s5
+; GCN-NEXT:    ; return to shader part epilog
+  %t0 = fmul fast float %a, %b
+  %t1 = fmul fast float %c, %d
+  %t2 = fadd fast float %t0, %t1
+  %t3 = fmul fast float %e, %f
+  %t4 = fadd fast float %t2, %t3
+  %t5 = fadd fast float %t4, %g
+  ret float %t5
+}
+
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
 declare float @llvm.maxnum.f32(float, float) #1