forked from OSchip/llvm-project
[ISel] Fix DAG divergence after new FMA combine
D132837 introduced a new DAG combine that used MorphNodeTo to morph an FMUL into an FMA. It turns out that MorphNodeTo does not properly update the divergence bit for users of the morphed node, causing an assertion failure on the new test case: llc: SelectionDAG.cpp:10486: void llvm::SelectionDAG::VerifyDAGDivergence(): Assertion `calculateDivergence(N) == N->isDivergent() && "Divergence bit inconsistency detected"' failed. Fixing MorphNodeTo to propagate the divergence bit is tricky because of the way it is used to select machine instructions, so use getNode and ReplaceAllUsesOfValueWith instead. Differential Revision: https://reviews.llvm.org/D134810
This commit is contained in:
parent
2ad41f97f8
commit
2c12a04bba
|
@ -14305,10 +14305,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
|||
if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
|
||||
SDValue C = FMul.getOperand(0);
|
||||
SDValue D = FMul.getOperand(1);
|
||||
|
||||
DAG.MorphNodeTo(FMul.getNode(), PreferredFusedOpcode, FMul->getVTList(),
|
||||
{C, D, E});
|
||||
|
||||
SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
|
||||
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
|
||||
return FMA;
|
||||
}
|
||||
|
||||
|
|
|
@ -221,6 +221,22 @@ define float @fmac_sequence_innermost_fmul_swapped_operands(float %a, float %b,
|
|||
ret float %t5
|
||||
}
|
||||
|
||||
define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) #0 {
|
||||
; GCN-LABEL: fmac_sequence_innermost_fmul_sgpr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mac_f32_e64 v0, s2, s3
|
||||
; GCN-NEXT: v_fmac_f32_e64 v0, s0, s1
|
||||
; GCN-NEXT: v_fmac_f32_e64 v0, s4, s5
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%t0 = fmul fast float %a, %b
|
||||
%t1 = fmul fast float %c, %d
|
||||
%t2 = fadd fast float %t0, %t1
|
||||
%t3 = fmul fast float %e, %f
|
||||
%t4 = fadd fast float %t2, %t3
|
||||
%t5 = fadd fast float %t4, %g
|
||||
ret float %t5
|
||||
}
|
||||
|
||||
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
|
||||
|
|
Loading…
Reference in New Issue