From 0ec098e22ba9c882ea2a9885d5d5f5dc4e4073ed Mon Sep 17 00:00:00 2001
From: Drew Wock <drew.wock@sas.com>
Date: Wed, 26 Aug 2020 15:17:17 -0400
Subject: [PATCH] [FPEnv] Allow fneg + strict_fadd -> strict_fsub in
 DAGCombiner

This is the first of a set of DAGCombiner changes enabling strictfp
optimizations. I want to test to waters with this to make sure changes
like these are acceptable for the strictfp case- this particular change
should preserve exception ordering and result precision perfectly, and
many other possible changes appear to be able to as well.

Copied from regular fadd combines but modified to preserve ordering via
the chain, this change allows strict_fadd x, (fneg y) to become
struct_fsub x, y and strict_fadd (fneg x), y to become strict_fsub y, x.

Differential Revision: https://reviews.llvm.org/D85548
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 29 +++++++++++++++
 llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll   |  2 +-
 llvm/test/CodeGen/X86/strict-fadd-combines.ll | 37 +++++++++++++++++++
 3 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/strict-fadd-combines.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 445e2bff6c05..59edd03b7ec8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -464,6 +464,7 @@ namespace {
     SDValue visitFREEZE(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
     SDValue visitFADD(SDNode *N);
+    SDValue visitSTRICT_FADD(SDNode *N);
     SDValue visitFSUB(SDNode *N);
     SDValue visitFMUL(SDNode *N);
     SDValue visitFMA(SDNode *N);
@@ -1650,6 +1651,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   case ISD::FADD:               return visitFADD(N);
+  case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
   case ISD::FSUB:               return visitFSUB(N);
   case ISD::FMUL:               return visitFMUL(N);
   case ISD::FMA:                return visitFMA(N);
@@ -12833,6 +12835,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue N0 = N->getOperand(1);
+  SDValue N1 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  EVT ChainVT = N->getValueType(1);
+  SDLoc DL(N);
+  const SDNodeFlags Flags = N->getFlags();
+
+  // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+    if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+            N1, DAG, LegalOperations, ForCodeSize)) {
+      return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+                         {Chain, N0, NegN1}, Flags);
+    }
+
+  // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+    if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+            N0, DAG, LegalOperations, ForCodeSize)) {
+      return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+                         {Chain, N1, NegN0}, Flags);
+    }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
index e8240d57816c..a7a6ec3a6fc9 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
@@ -112,7 +112,7 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_add_f32_e64 v0, -|v0|, v1
+; GCN-NEXT:    v_sub_f32_e64 v0, v1, |v0|
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fabs.x = call float @llvm.fabs.f32(float %x)
   %neg.fabs.x = fneg float %fabs.x
diff --git a/llvm/test/CodeGen/X86/strict-fadd-combines.ll b/llvm/test/CodeGen/X86/strict-fadd-combines.ll
new file mode 100644
index 000000000000..8560e1bb5bf3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/strict-fadd-combines.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) {
+  ; CHECK: subss %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg float %y
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %neg, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret float %add
+}
+
+define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) {
+  ; CHECK: subss %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg float %y
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %neg, float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret float %add
+}
+
+define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) {
+  ; CHECK: subsd %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg double %y
+  %add = call double @llvm.experimental.constrained.fadd.f64(double %x, double %neg, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret double %add
+}
+
+define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) {
+  ; CHECK: subsd %{{.*}}, %{{.*}}
+  ; CHECK-NEXT: retq
+  %neg = fneg double %y
+  %add = call double @llvm.experimental.constrained.fadd.f64(double %neg, double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+  ret double %add
+}
+
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)