From 7acc81b74428660efa5156815eebaee8a2ebe075 Mon Sep 17 00:00:00 2001 From: Michael Berg Date: Fri, 4 May 2018 18:48:20 +0000 Subject: [PATCH] Fast Math Flag mapping into SDNode Summary: Adding support for Fast flags in the SDNode to leverage fast math sub flag usage. Reviewers: spatel, arsenm, jbhateja, hfinkel, escha, qcolombet, echristo, wristow, javed.absar Reviewed By: spatel Subscribers: llvm-commits, rampitec, nhaehnle, tstellar, FarhanaAleen, nemanjai, javed.absar, jbhateja, hfinkel, wdng Differential Revision: https://reviews.llvm.org/D45710 llvm-svn: 331547 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 31 +++++++++++++------ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 3 +- .../SelectionDAG/SelectionDAGDumper.cpp | 9 ++++-- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 ++--- llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 21 ++++++------- llvm/test/CodeGen/X86/fmf-propagation.ll | 8 ++--- 8 files changed, 53 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5a6df4372be9..f9dd35745ab7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -359,21 +359,22 @@ private: bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; - bool UnsafeAlgebra : 1; bool NoNaNs : 1; bool NoInfs : 1; bool NoSignedZeros : 1; bool AllowReciprocal : 1; bool VectorReduction : 1; bool AllowContract : 1; + bool ApproximateFuncs : 1; + bool AllowReassociation : 1; public: /// Default constructor turns off all optimization flags. SDNodeFlags() : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false), + Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), - AllowContract(false) {} + AllowContract(false), ApproximateFuncs(false), AllowReassociation(false) {} /// Sets the state of the flags to the defined state. void setDefined() { AnyDefined = true; } @@ -393,10 +394,6 @@ public: setDefined(); Exact = b; } - void setUnsafeAlgebra(bool b) { - setDefined(); - UnsafeAlgebra = b; - } void setNoNaNs(bool b) { setDefined(); NoNaNs = b; @@ -421,18 +418,32 @@ public: setDefined(); AllowContract = b; } + void setApproximateFuncs(bool b) { + setDefined(); + ApproximateFuncs = b; + } + void setAllowReassociation(bool b) { + setDefined(); + AllowReassociation = b; + } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } bool hasNoSignedWrap() const { return NoSignedWrap; } bool hasExact() const { return Exact; } - bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } bool hasNoNaNs() const { return NoNaNs; } bool hasNoInfs() const { return NoInfs; } bool hasNoSignedZeros() const { return NoSignedZeros; } bool hasAllowReciprocal() const { return AllowReciprocal; } bool hasVectorReduction() const { return VectorReduction; } bool hasAllowContract() const { return AllowContract; } + bool hasApproximateFuncs() const { return ApproximateFuncs; } + bool hasAllowReassociation() const { return AllowReassociation; } + + bool isFast() const { + return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && + AllowContract && ApproximateFuncs && AllowReassociation; + } /// Clear any flags in this flag set that aren't also set in Flags. /// If the given Flags are undefined then don't do anything. @@ -442,13 +453,14 @@ public: NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; - UnsafeAlgebra &= Flags.UnsafeAlgebra; NoNaNs &= Flags.NoNaNs; NoInfs &= Flags.NoInfs; NoSignedZeros &= Flags.NoSignedZeros; AllowReciprocal &= Flags.AllowReciprocal; VectorReduction &= Flags.VectorReduction; AllowContract &= Flags.AllowContract; + ApproximateFuncs &= Flags.ApproximateFuncs; + AllowReassociation &= Flags.AllowReassociation; } }; @@ -923,6 +935,7 @@ public: const SDNodeFlags getFlags() const { return Flags; } void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } + bool isFast() { return Flags.isFast(); } /// Clear any flags in this node that aren't also set in Flags. /// If Flags is not in a defined state then this has no effect. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d54c61973565..a6a020e83613 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9542,7 +9542,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasUnsafeAlgebra(); + return F.hasAllowContract() || F.hasAllowReassociation(); } /// Try to perform FMA combining on a given FADD node. @@ -10567,9 +10567,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // TODO: FMA nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. + // For now, create a Flags object for use with reassociation math transforms. SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); if (Options.UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) @@ -10841,9 +10841,9 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. + // For now, create a Flags object for use with reassociation math transforms. SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); return buildSqrtEstimate(N0, Flags); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index bc08e9503613..fe4cbfc4bf0f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2776,7 +2776,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.isFast()); + Flags.setApproximateFuncs(FMF.approxFunc()); + Flags.setAllowReassociation(FMF.allowReassoc()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9862bc45b17b..cfe8ed016145 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -479,9 +479,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; - if (getFlags().hasUnsafeAlgebra()) - OS << " unsafe"; - if (getFlags().hasNoNaNs()) OS << " nnan"; @@ -497,6 +494,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasAllowContract()) OS << " contract"; + if (getFlags().hasApproximateFuncs()) + OS << " afn"; + + if (getFlags().hasAllowReassociation()) + OS << " reassoc"; + if (getFlags().hasVectorReduction()) OS << " vector-reduction"; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 703cccb3dbfa..1d5683cf3cb2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5145,7 +5145,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, EVT VT = Operand.getValueType(); SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2) // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) @@ -5184,7 +5184,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, EVT VT = Operand.getValueType(); SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); // Newton reciprocal iteration: E * (2 - X * E) // AArch64 reciprocal iteration instruction: (2 - M * N) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d98c2a272c90..82318263ea6e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5347,8 +5347,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); - bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || - Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -6698,8 +6697,8 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, const TargetOptions &Options = DAG.getTarget().Options; if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || - (N0->getFlags().hasUnsafeAlgebra() && - N1->getFlags().hasUnsafeAlgebra())) && + (N0->getFlags().hasAllowContract() && + N1->getFlags().hasAllowContract())) && isFMAFasterThanFMulAndFAdd(VT)) { return ISD::FMA; } diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 03b6a0ba9508..3550b09bbeb1 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -63,7 +63,7 @@ define float @fmul_fadd_contract2(float %x, float %y, float %z) { ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' ; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}} -; FMFDEBUG: fadd {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { @@ -86,15 +86,14 @@ define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' -; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}} -; FMFDEBUG: fadd {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fma {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: xsmulsp 0, 1, 2 -; FMF-NEXT: xsaddsp 1, 0, 3 +; FMF-NEXT: xsmaddasp 3, 1, 2 +; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc2: @@ -161,7 +160,7 @@ define float @fmul_fadd_fast2(float %x, float %y, float %z) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' define float @fmul_fma_reassoc1(float %x) { @@ -197,7 +196,7 @@ define float @fmul_fma_reassoc1(float %x) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' define float @fmul_fma_reassoc2(float %x) { @@ -233,7 +232,7 @@ define float @fmul_fma_reassoc2(float %x) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' define float @fmul_fma_fast1(float %x) { @@ -269,7 +268,7 @@ define float @fmul_fma_fast1(float %x) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' define float @fmul_fma_fast2(float %x) { @@ -305,7 +304,7 @@ define float @fmul_fma_fast2(float %x) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' define float @sqrt_afn(float %x) { @@ -345,7 +344,7 @@ define float @sqrt_afn(float %x) { ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; GLOBALDEBUG: fmul unsafe {{t[0-9]+}} +; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' define float @sqrt_fast(float %x) { diff --git a/llvm/test/CodeGen/X86/fmf-propagation.ll b/llvm/test/CodeGen/X86/fmf-propagation.ll index 294a2952c76e..56e813f371c1 100644 --- a/llvm/test/CodeGen/X86/fmf-propagation.ll +++ b/llvm/test/CodeGen/X86/fmf-propagation.ll @@ -3,8 +3,6 @@ ; This tests the propagation of fast-math-flags from IR instructions to SDNodeFlags. -; FIXME: 'afn' and 'reassoc' were dropped. With 'fast', 'reassoc' got renamed to 'unsafe'. - ; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_transfer:' ; CHECK: t5: f32 = fadd nsz t2, t4 @@ -12,9 +10,9 @@ ; CHECK-NEXT: t7: f32 = fadd nnan t6, t4 ; CHECK-NEXT: t8: f32 = fadd ninf t7, t4 ; CHECK-NEXT: t9: f32 = fadd contract t8, t4 -; CHECK-NEXT: t10: f32 = fadd t9, t4 -; CHECK-NEXT: t11: f32 = fadd t10, t4 -; CHECK-NEXT: t12: f32 = fadd unsafe nnan ninf nsz arcp contract t11, t4 +; CHECK-NEXT: t10: f32 = fadd afn t9, t4 +; CHECK-NEXT: t11: f32 = fadd reassoc t10, t4 +; CHECK-NEXT: t12: f32 = fadd nnan ninf nsz arcp contract afn reassoc t11, t4 ; CHECK: Optimized lowered selection DAG: %bb.0 'fmf_transfer:'