[X86] Promote f16 STRICT_FROUND to f32 and call libc.

Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D113817
2021-11-12 21:36:56 -08:00 · 2021-11-12 21:36:56 -08:00 · 82bc6a094e
parent 2272ec1c63
commit 82bc6a094e
3 changed files with 36 additions and 6 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -4756,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
    break;
  case ISD::STRICT_FFLOOR:
  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FROUND:
  case ISD::STRICT_FSIN:
  case ISD::STRICT_FCOS:
  case ISD::STRICT_FLOG:
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -1961,7 +1961,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::STRICT_FSETCC,        MVT::f16, Custom);
    setOperationAction(ISD::STRICT_FSETCCS,       MVT::f16, Custom);
    setOperationAction(ISD::FROUND,               MVT::f16, Custom);
-    setOperationAction(ISD::STRICT_FROUND,        MVT::f16, Custom);
+    setOperationAction(ISD::STRICT_FROUND,        MVT::f16, Promote);
    setOperationAction(ISD::FROUNDEVEN,           MVT::f16, Legal);
    setOperationAction(ISD::STRICT_FROUNDEVEN,    MVT::f16, Legal);
    setOperationAction(ISD::FP_ROUND,             MVT::f16, Custom);
@ -22442,10 +22442,6 @@ SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
 /// compiling with trapping math, we can emulate this with
 /// floor(X + copysign(nextafter(0.5, 0.0), X)).
 static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
-  if (Op.getOpcode() == ISD::STRICT_FROUND &&
-      Op.getSimpleValueType() == MVT::f16)
-    report_fatal_error("For now cannot emit strict round(fp16) at backend for "
-                       "lacking library support.");
  SDValue N0 = Op.getOperand(0);
  SDLoc dl(Op);
  MVT VT = Op.getSimpleValueType();
@ -31244,7 +31240,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::STORE:              return LowerStore(Op, Subtarget, DAG);
  case ISD::FADD:
  case ISD::FSUB:               return lowerFaddFsub(Op, DAG);
-  case ISD::STRICT_FROUND:
  case ISD::FROUND:             return LowerFROUND(Op, DAG);
  case ISD::FABS:
  case ISD::FNEG:               return LowerFABSorFNEG(Op, DAG);
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll
@ -8,6 +8,7 @@ declare half @llvm.experimental.constrained.trunc.f16(half, metadata)
 declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
+declare half @llvm.experimental.constrained.round.f16(half, metadata)

 define half @fceil32(half %f) #0 {
 ; X86-LABEL: fceil32:
@ -102,4 +103,37 @@ define half @froundeven16(half %f) #0 {
  ret half %res
 }

+define half @fround16(half %f) #0 {
+; X86-LABEL: fround16:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, (%esp)
+; X86-NEXT:    calll roundf
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    wait
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: fround16:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT:    callq roundf@PLT
+; X64-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+
+  %res = call half @llvm.experimental.constrained.round.f16(
+                        half %f, metadata !"fpexcept.strict") #0
+  ret half %res
+}
+
 attributes #0 = { strictfp }