[FPEnv] Allow fneg + strict_fadd -> strict_fsub in DAGCombiner

This is the first of a set of DAGCombiner changes enabling strictfp optimizations. I want to test to waters with this to make sure changes like these are acceptable for the strictfp case- this particular change should preserve exception ordering and result precision perfectly, and many other possible changes appear to be able to as well. Copied from regular fadd combines but modified to preserve ordering via the chain, this change allows strict_fadd x, (fneg y) to become struct_fsub x, y and strict_fadd (fneg x), y to become strict_fsub y, x. Differential Revision: https://reviews.llvm.org/D85548
2020-08-26 15:17:17 -04:00 · 2020-08-26 15:17:17 -04:00 · 0ec098e22b
parent df8f3bf626
commit 0ec098e22b
3 changed files with 67 additions and 1 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -464,6 +464,7 @@ namespace {
    SDValue visitFREEZE(SDNode *N);
    SDValue visitBUILD_PAIR(SDNode *N);
    SDValue visitFADD(SDNode *N);
+    SDValue visitSTRICT_FADD(SDNode *N);
    SDValue visitFSUB(SDNode *N);
    SDValue visitFMUL(SDNode *N);
    SDValue visitFMA(SDNode *N);
@ -1650,6 +1651,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
  case ISD::BITCAST:            return visitBITCAST(N);
  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
  case ISD::FADD:               return visitFADD(N);
+  case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
  case ISD::FSUB:               return visitFSUB(N);
  case ISD::FMUL:               return visitFMUL(N);
  case ISD::FMA:                return visitFMA(N);
@ -12833,6 +12835,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  return SDValue();
 }

+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue N0 = N->getOperand(1);
+  SDValue N1 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  EVT ChainVT = N->getValueType(1);
+  SDLoc DL(N);
+  const SDNodeFlags Flags = N->getFlags();
+
+  // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+    if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+            N1, DAG, LegalOperations, ForCodeSize)) {
+      return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+                         {Chain, N0, NegN1}, Flags);
+    }
+
+  // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+    if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+            N0, DAG, LegalOperations, ForCodeSize)) {
+      return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+                         {Chain, N1, NegN0}, Flags);
+    }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFSUB(SDNode *N) {
  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
@ -112,7 +112,7 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_add_f32_e64 v0, -|v0|, v1
+; GCN-NEXT:    v_sub_f32_e64 v0, v1, |v0|
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  %fabs.x = call float @llvm.fabs.f32(float %x)
  %neg.fabs.x = fneg float %fabs.x
--- a/llvm/test/CodeGen/X86/strict-fadd-combines.ll
+++ b/llvm/test/CodeGen/X86/strict-fadd-combines.ll
@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) {
+  ; CHECK: subss %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg float %y
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %neg, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret float %add
+}
+
+define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) {
+  ; CHECK: subss %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg float %y
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %neg, float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret float %add
+}
+
+define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) {
+  ; CHECK: subsd %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg double %y
+  %add = call double @llvm.experimental.constrained.fadd.f64(double %x, double %neg, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret double %add
+}
+
+define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) {
+  ; CHECK: subsd %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg double %y
+  %add = call double @llvm.experimental.constrained.fadd.f64(double %neg, double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret double %add
+}
+
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)