[FPEnv] Lower STRICT_FP_EXTEND and STRICT_FP_ROUND nodes in preprocess phase of ISelLowering to mirror non-strict nodes on x86.

I recently discovered a bug on the x86 platform: The fp80 type was not handled well by x86 for constrained floating point nodes, as their regular counterparts are replaced by extending loads and truncating stores during the preprocess phase. Normally, platforms don't have this issue, as they don't typically attempt to perform such legalizations during instruction selection preprocessing. Before this change, strict_fp nodes survived until they were mutated to normal nodes, which happened shortly after preprocessing on other platforms. This modification lowers these nodes at the same phase while properly utilizing the chain.5 Submitted by: Drew Wock <drew.wock@sas.com> Reviewed by: Craig Topper, Kevin P. Neal Approved by: Craig Topper Differential Revision: https://reviews.llvm.org/D63271 llvm-svn: 363417
2019-06-14 16:28:55 +00:00 · 2019-06-14 16:28:55 +00:00 · fece7c6c83
parent cdf339266b
commit fece7c6c83
2 changed files with 171 additions and 45 deletions
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -931,9 +931,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
    // and the node legalization.  As such this pass basically does "really
    // late" legalization of these inline with the X86 isel pass.
    // FIXME: This should only happen when not compiled with -O0.
-    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
-      continue;
-
+    switch (N->getOpcode()) {
+    default: continue;
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+    {
      MVT SrcVT = N->getOperand(0).getSimpleValueType();
      MVT DstVT = N->getSimpleValueType(0);

@ -972,8 +974,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
      SDLoc dl(N);

      // FIXME: optimize the case where the src/dest is a load or store?
-    SDValue Store =
-        CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
+
+      SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
                                          MemTmp, MachinePointerInfo(), MemVT);
      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
                                          MachinePointerInfo(), MemVT);
@ -984,6 +986,69 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
      // To avoid invalidating 'I', back it up to the convert node.
      --I;
      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+      break;
+    }
+
+    //The sequence of events for lowering STRICT_FP versions of these nodes requires
+    //dealing with the chain differently, as there is already a preexisting chain.
+    case ISD::STRICT_FP_ROUND:
+    case ISD::STRICT_FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(1).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
+        continue;
+
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
+        continue;
+
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(2))
+          continue;
+      }
+
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::STRICT_FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      //Since the operation is StrictFP, use the preexisting chain.
+      SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
+                                MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesWith(N, Result.getNode());
+      break;
+    }
+    }
+

    // Now that we did that, the node is dead.  Increment the iterator to the
    // next node to process, then delete N.
--- a/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
+++ b/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=x86_64-gnu-linux < %s | FileCheck %s
+
+define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) {
+; CHECK-LABEL: constrained_fpext_f32_as_fp80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    flds -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
+            float %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret float %trunc
+}
+
+define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) {
+; CHECK-LABEL: constrained_fpext_f64_to_f80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
+            double %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    retq
+  %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret double %trunc
+}
+
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float, metadata)
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80, metadata, metadata)