diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3fca3b457571..8b1b6bbce30f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9019,6 +9019,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + + // Skip this folding if it results in an fp_round from f80 to f16. + // + // f80 to f16 always generates an expensive (and as yet, unimplemented) + // libcall to __truncxfhf2 instead of selecting native f16 conversion + // instructions from f32 or f64. Moreover, the first (value-preserving) + // fp_round from f80 to either f32 or f64 may become a NOP in platforms like + // x86. + if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) + return SDValue(); + // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 531891f9cae1..43e6aa869932 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \ +; RUN: | FileCheck %s -check-prefix=CHECK-I686 define void @test_load_store(half* %in, half* %out) { ; CHECK-LABEL: test_load_store: @@ -260,4 +262,17 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { ret void } +declare float @test_floatret(); + +; On i686, if SSE2 is available, the return value from test_floatret is loaded +; to f80 and then rounded to f32. The DAG combiner should not combine this +; fp_round and the subsequent fptrunc from float to half. +define half @test_f80trunc_nodagcombine() #0 { +; CHECK-LABEL: test_f80trunc_nodagcombine: +; CHECK-I686-NOT: calll __truncxfhf2 + %1 = call float @test_floatret() + %2 = fptrunc float %1 to half + ret half %2 +} + attributes #0 = { nounwind }