2013-07-22 20:18:04 +08:00
|
|
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
|
|
|
|
2017-01-31 13:58:22 +08:00
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
declare double @llvm.sqrt.f64(double)
|
2013-07-22 20:18:04 +08:00
|
|
|
|
2017-01-31 13:58:22 +08:00
|
|
|
; CHECK-LABEL: sqrt_div(
|
2013-07-22 20:18:04 +08:00
|
|
|
; CHECK: sqrt.rn.f32
|
|
|
|
; CHECK: div.rn.f32
|
|
|
|
define float @sqrt_div(float %a, float %b) {
|
2017-01-31 13:58:22 +08:00
|
|
|
%t1 = tail call float @llvm.sqrt.f32(float %a)
|
2013-07-22 20:18:04 +08:00
|
|
|
%t2 = fdiv float %t1, %b
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
2017-01-31 13:58:22 +08:00
|
|
|
; CHECK-LABEL: sqrt_div_fast(
|
2013-07-22 20:18:04 +08:00
|
|
|
; CHECK: sqrt.approx.f32
|
|
|
|
; CHECK: div.approx.f32
|
|
|
|
define float @sqrt_div_fast(float %a, float %b) #0 {
|
2017-01-31 13:58:22 +08:00
|
|
|
%t1 = tail call float @llvm.sqrt.f32(float %a)
|
2013-07-22 20:18:04 +08:00
|
|
|
%t2 = fdiv float %t1, %b
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
2017-01-31 13:58:22 +08:00
|
|
|
; CHECK-LABEL: sqrt_div_ftz(
|
|
|
|
; CHECK: sqrt.rn.ftz.f32
|
|
|
|
; CHECK: div.rn.ftz.f32
|
|
|
|
define float @sqrt_div_ftz(float %a, float %b) #1 {
|
|
|
|
%t1 = tail call float @llvm.sqrt.f32(float %a)
|
|
|
|
%t2 = fdiv float %t1, %b
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: sqrt_div_fast_ftz(
|
|
|
|
; CHECK: sqrt.approx.ftz.f32
|
|
|
|
; CHECK: div.approx.ftz.f32
|
|
|
|
define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
|
|
|
|
%t1 = tail call float @llvm.sqrt.f32(float %a)
|
|
|
|
%t2 = fdiv float %t1, %b
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
; There are no fast-math or ftz versions of sqrt and div for f64. We use
|
2017-02-01 07:08:57 +08:00
|
|
|
; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
|
2017-01-31 13:58:22 +08:00
|
|
|
|
|
|
|
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
|
|
|
|
; CHECK: rsqrt.approx.f64
|
2017-02-01 07:08:57 +08:00
|
|
|
; CHECK: rcp.approx.ftz.f64
|
2017-01-31 13:58:22 +08:00
|
|
|
; CHECK: div.rn.f64
|
|
|
|
define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
|
|
|
|
%t1 = tail call double @llvm.sqrt.f64(double %a)
|
|
|
|
%t2 = fdiv double %t1, %b
|
|
|
|
ret double %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: rsqrt(
|
|
|
|
; CHECK-NOT: rsqrt.approx
|
|
|
|
; CHECK: sqrt.rn.f32
|
|
|
|
; CHECK-NOT: rsqrt.approx
|
|
|
|
define float @rsqrt(float %a) {
|
|
|
|
%b = tail call float @llvm.sqrt.f32(float %a)
|
|
|
|
%ret = fdiv float 1.0, %b
|
|
|
|
ret float %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: rsqrt_fast(
|
|
|
|
; CHECK-NOT: div.
|
|
|
|
; CHECK-NOT: sqrt.
|
|
|
|
; CHECK: rsqrt.approx.f32
|
|
|
|
; CHECK-NOT: div.
|
|
|
|
; CHECK-NOT: sqrt.
|
|
|
|
define float @rsqrt_fast(float %a) #0 {
|
|
|
|
%b = tail call float @llvm.sqrt.f32(float %a)
|
|
|
|
%ret = fdiv float 1.0, %b
|
|
|
|
ret float %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: rsqrt_fast_ftz(
|
|
|
|
; CHECK-NOT: div.
|
|
|
|
; CHECK-NOT: sqrt.
|
|
|
|
; CHECK: rsqrt.approx.ftz.f32
|
|
|
|
; CHECK-NOT: div.
|
|
|
|
; CHECK-NOT: sqrt.
|
|
|
|
define float @rsqrt_fast_ftz(float %a) #0 #1 {
|
|
|
|
%b = tail call float @llvm.sqrt.f32(float %a)
|
|
|
|
%ret = fdiv float 1.0, %b
|
|
|
|
ret float %ret
|
|
|
|
}
|
|
|
|
|
2017-01-11 07:42:46 +08:00
|
|
|
; CHECK-LABEL: fadd
|
[TM] Restore default TargetOptions in TargetMachine::resetTargetOptions.
Summary:
Previously if you had
* a function with the fast-math-enabled attr, followed by
* a function without the fast-math attr,
the second function would inherit the first function's fast-math-ness.
This means that mixing fast-math and non-fast-math functions in a module
was completely broken unless you explicitly annotated every
non-fast-math function with "unsafe-fp-math"="false". This appears to
have been broken since r176986 (March 2013), when the resetTargetOptions
function was introduced.
This patch tests the correct behavior as best we can. I don't think I
can test FPDenormalMode and NoTrappingFPMath, because they aren't used
in any backends during function lowering. Surprisingly, I also can't
find any uses at all of LessPreciseFPMAD affecting generated code.
The NVPTX/fast-math.ll test changes are an expected result of fixing
this bug. When FMA is disabled, we emit add as "add.rn.f32", which
prevents fma combining. Before this patch, fast-math was enabled in all
functions following the one which explicitly enabled it on itself, so we
were emitting plain "add.f32" where we should have generated
"add.rn.f32".
Reviewers: mkuper
Subscribers: hfinkel, majnemer, jholewinski, nemanjai, llvm-commits
Differential Revision: https://reviews.llvm.org/D28507
llvm-svn: 291618
2017-01-11 07:43:04 +08:00
|
|
|
; CHECK: add.rn.f32
|
2013-07-22 20:18:04 +08:00
|
|
|
define float @fadd(float %a, float %b) {
|
|
|
|
%t1 = fadd float %a, %b
|
|
|
|
ret float %t1
|
|
|
|
}
|
|
|
|
|
2017-01-11 07:42:46 +08:00
|
|
|
; CHECK-LABEL: fadd_ftz
|
[TM] Restore default TargetOptions in TargetMachine::resetTargetOptions.
Summary:
Previously if you had
* a function with the fast-math-enabled attr, followed by
* a function without the fast-math attr,
the second function would inherit the first function's fast-math-ness.
This means that mixing fast-math and non-fast-math functions in a module
was completely broken unless you explicitly annotated every
non-fast-math function with "unsafe-fp-math"="false". This appears to
have been broken since r176986 (March 2013), when the resetTargetOptions
function was introduced.
This patch tests the correct behavior as best we can. I don't think I
can test FPDenormalMode and NoTrappingFPMath, because they aren't used
in any backends during function lowering. Surprisingly, I also can't
find any uses at all of LessPreciseFPMAD affecting generated code.
The NVPTX/fast-math.ll test changes are an expected result of fixing
this bug. When FMA is disabled, we emit add as "add.rn.f32", which
prevents fma combining. Before this patch, fast-math was enabled in all
functions following the one which explicitly enabled it on itself, so we
were emitting plain "add.f32" where we should have generated
"add.rn.f32".
Reviewers: mkuper
Subscribers: hfinkel, majnemer, jholewinski, nemanjai, llvm-commits
Differential Revision: https://reviews.llvm.org/D28507
llvm-svn: 291618
2017-01-11 07:43:04 +08:00
|
|
|
; CHECK: add.rn.ftz.f32
|
2013-07-22 20:18:04 +08:00
|
|
|
define float @fadd_ftz(float %a, float %b) #1 {
|
|
|
|
%t1 = fadd float %a, %b
|
|
|
|
ret float %t1
|
|
|
|
}
|
|
|
|
|
2017-01-14 02:48:13 +08:00
|
|
|
declare float @llvm.sin.f32(float)
|
|
|
|
declare float @llvm.cos.f32(float)
|
|
|
|
|
|
|
|
; CHECK-LABEL: fsin_approx
|
|
|
|
; CHECK: sin.approx.f32
|
|
|
|
define float @fsin_approx(float %a) #0 {
|
|
|
|
%r = tail call float @llvm.sin.f32(float %a)
|
|
|
|
ret float %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: fcos_approx
|
|
|
|
; CHECK: cos.approx.f32
|
|
|
|
define float @fcos_approx(float %a) #0 {
|
|
|
|
%r = tail call float @llvm.cos.f32(float %a)
|
|
|
|
ret float %r
|
|
|
|
}
|
|
|
|
|
2017-02-03 23:13:50 +08:00
|
|
|
; CHECK-LABEL: repeated_div_recip_allowed
|
|
|
|
define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
|
|
|
|
; CHECK: rcp.rn.f32
|
|
|
|
; CHECK: mul.rn.f32
|
|
|
|
; CHECK: mul.rn.f32
|
|
|
|
%x = fdiv arcp float %a, %divisor
|
|
|
|
%y = fdiv arcp float %b, %divisor
|
|
|
|
%z = select i1 %pred, float %x, float %y
|
|
|
|
ret float %z
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: repeated_div_recip_allowed_ftz
|
|
|
|
define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
|
|
|
|
; CHECK: rcp.rn.ftz.f32
|
|
|
|
; CHECK: mul.rn.ftz.f32
|
|
|
|
; CHECK: mul.rn.ftz.f32
|
|
|
|
%x = fdiv arcp float %a, %divisor
|
|
|
|
%y = fdiv arcp float %b, %divisor
|
|
|
|
%z = select i1 %pred, float %x, float %y
|
|
|
|
ret float %z
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: repeated_div_fast
|
|
|
|
define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
|
|
|
|
; CHECK: rcp.approx.f32
|
|
|
|
; CHECK: mul.f32
|
|
|
|
; CHECK: mul.f32
|
|
|
|
%x = fdiv float %a, %divisor
|
|
|
|
%y = fdiv float %b, %divisor
|
|
|
|
%z = select i1 %pred, float %x, float %y
|
|
|
|
ret float %z
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: repeated_div_fast_ftz
|
|
|
|
define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
|
|
|
|
; CHECK: rcp.approx.ftz.f32
|
|
|
|
; CHECK: mul.ftz.f32
|
|
|
|
; CHECK: mul.ftz.f32
|
|
|
|
%x = fdiv float %a, %divisor
|
|
|
|
%y = fdiv float %b, %divisor
|
|
|
|
%z = select i1 %pred, float %x, float %y
|
|
|
|
ret float %z
|
|
|
|
}
|
|
|
|
|
2013-07-22 20:18:04 +08:00
|
|
|
attributes #0 = { "unsafe-fp-math" = "true" }
|
|
|
|
attributes #1 = { "nvptx-f32ftz" = "true" }
|