forked from OSchip/llvm-project
Don't combine fp_round (fp_round x) if f80 to f16 is generated
Summary: This patch skips DAG combine of fp_round (fp_round x) if it results in an fp_round from f80 to f16. fp_round from f80 to f16 always generates an expensive (and as yet, unimplemented) libcall to __truncxfhf2. This prevents selection of native f16 conversion instructions from f32 or f64. Moreover, the first (value-preserving) fp_round from f80 to either f32 or f64 may become a NOP in platforms like x86. Reviewers: ab Subscribers: srhines, llvm-commits Differential Revision: http://reviews.llvm.org/D17221 llvm-svn: 260769
This commit is contained in:
parent
2a8fa2a888
commit
7476bc89e9
|
@ -9019,6 +9019,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
|
||||||
if (N0.getOpcode() == ISD::FP_ROUND) {
|
if (N0.getOpcode() == ISD::FP_ROUND) {
|
||||||
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
|
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
|
||||||
const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
|
const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
|
||||||
|
|
||||||
|
// Skip this folding if it results in an fp_round from f80 to f16.
|
||||||
|
//
|
||||||
|
// f80 to f16 always generates an expensive (and as yet, unimplemented)
|
||||||
|
// libcall to __truncxfhf2 instead of selecting native f16 conversion
|
||||||
|
// instructions from f32 or f64. Moreover, the first (value-preserving)
|
||||||
|
// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
|
||||||
|
// x86.
|
||||||
|
if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// If the first fp_round isn't a value preserving truncation, it might
|
// If the first fp_round isn't a value preserving truncation, it might
|
||||||
// introduce a tie in the second fp_round, that wouldn't occur in the
|
// introduce a tie in the second fp_round, that wouldn't occur in the
|
||||||
// single-step fp_round we want to fold to.
|
// single-step fp_round we want to fold to.
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
|
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
|
||||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
|
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
|
||||||
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
|
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
|
||||||
|
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \
|
||||||
|
; RUN: | FileCheck %s -check-prefix=CHECK-I686
|
||||||
|
|
||||||
define void @test_load_store(half* %in, half* %out) {
|
define void @test_load_store(half* %in, half* %out) {
|
||||||
; CHECK-LABEL: test_load_store:
|
; CHECK-LABEL: test_load_store:
|
||||||
|
@ -260,4 +262,17 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare float @test_floatret();
|
||||||
|
|
||||||
|
; On i686, if SSE2 is available, the return value from test_floatret is loaded
|
||||||
|
; to f80 and then rounded to f32. The DAG combiner should not combine this
|
||||||
|
; fp_round and the subsequent fptrunc from float to half.
|
||||||
|
define half @test_f80trunc_nodagcombine() #0 {
|
||||||
|
; CHECK-LABEL: test_f80trunc_nodagcombine:
|
||||||
|
; CHECK-I686-NOT: calll __truncxfhf2
|
||||||
|
%1 = call float @test_floatret()
|
||||||
|
%2 = fptrunc float %1 to half
|
||||||
|
ret half %2
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
attributes #0 = { nounwind }
|
||||||
|
|
Loading…
Reference in New Issue