forked from OSchip/llvm-project
DAG: Recognize no-signed-zeros-fp-math attribute
clang already emits this with -cl-no-signed-zeros, but codegen doesn't do anything with it. Treat it like the other fast math attributes, and change one place to use it. llvm-svn: 293024
This commit is contained in:
parent
4844573eb1
commit
732a531506
|
@ -143,6 +143,12 @@ EnableNoNaNsFPMath("enable-no-nans-fp-math",
|
|||
cl::desc("Enable FP math optimizations that assume no NaNs"),
|
||||
cl::init(false));
|
||||
|
||||
cl::opt<bool>
|
||||
EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math",
|
||||
cl::desc("Enable FP math optimizations that assume "
|
||||
"the sign of 0 is insignificant"),
|
||||
cl::init(false));
|
||||
|
||||
cl::opt<bool>
|
||||
EnableNoTrappingFPMath("enable-no-trapping-fp-math",
|
||||
cl::desc("Enable setting the FP exceptions build "
|
||||
|
@ -282,6 +288,7 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
|
|||
Options.UnsafeFPMath = EnableUnsafeFPMath;
|
||||
Options.NoInfsFPMath = EnableNoInfsFPMath;
|
||||
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
|
||||
Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
|
||||
Options.NoTrappingFPMath = EnableNoTrappingFPMath;
|
||||
Options.FPDenormalMode = DenormalMode;
|
||||
Options.HonorSignDependentRoundingFPMathOption =
|
||||
|
|
|
@ -153,11 +153,17 @@ namespace llvm {
|
|||
/// assume the FP arithmetic arguments and results are never NaNs.
|
||||
unsigned NoNaNsFPMath : 1;
|
||||
|
||||
/// NoTrappingFPMath - This flag is enabled when the
|
||||
/// -enable-no-trapping-fp-math is specified on the command line. This
|
||||
/// NoTrappingFPMath - This flag is enabled when the
|
||||
/// -enable-no-trapping-fp-math is specified on the command line. This
|
||||
/// specifies that there are no trap handlers to handle exceptions.
|
||||
unsigned NoTrappingFPMath : 1;
|
||||
|
||||
/// NoSignedZerosFPMath - This flag is enabled when the
|
||||
/// -enable-no-signed-zeros-fp-math is specified on the command line. This
|
||||
/// specifies that optimizations are allowed to treat the sign of a zero
|
||||
/// argument or result as insignificant.
|
||||
unsigned NoSignedZerosFPMath : 1;
|
||||
|
||||
/// HonorSignDependentRoundingFPMath - This returns true when the
|
||||
/// -enable-sign-dependent-rounding-fp-math is specified. If this returns
|
||||
/// false (the default), the code generator is allowed to assume that the
|
||||
|
|
|
@ -635,7 +635,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
|
|||
Depth + 1);
|
||||
case ISD::FSUB:
|
||||
// We can't turn -(A-B) into B-A when we honor signed zeros.
|
||||
if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
|
||||
if (!Options->NoSignedZerosFPMath &&
|
||||
!Op.getNode()->getFlags()->hasNoSignedZeros())
|
||||
return 0;
|
||||
|
||||
// fold (fneg (fsub A, B)) -> (fsub B, A)
|
||||
|
|
|
@ -84,6 +84,7 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
|
|||
RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
|
||||
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
|
||||
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
|
||||
RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
|
||||
RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
|
||||
|
||||
StringRef Denormal =
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s
|
||||
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
|
||||
; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
|
||||
|
||||
; Test that the -enable-no-signed-zeros-fp-math flag works
|
||||
|
||||
; GCN-LABEL: {{^}}fneg_fsub_f32:
|
||||
; GCN: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
|
||||
|
||||
; GCN-UNSAFE-NOT: xor
|
||||
define void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
|
||||
%a = load float, float addrspace(1)* %in, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub float %a, %b
|
||||
%neg.result = fsub float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -69,3 +69,61 @@ define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f
|
|||
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
|
||||
; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
|
||||
define void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
|
||||
%a = load float, float addrspace(1)* %in, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub float %a, %b
|
||||
%neg.result = fsub float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
|
||||
; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-NOT: xor
|
||||
define void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
|
||||
%a = load float, float addrspace(1)* %in, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub nsz float %a, %b
|
||||
%neg.result = fsub float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
|
||||
; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-NOT: xor
|
||||
define void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
|
||||
%a = load float, float addrspace(1)* %in, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub float %a, %b
|
||||
%neg.result = fsub float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; For some reason the attribute has a string "true" or "false", so
|
||||
; make sure it is disabled and the fneg is not folded if it is not
|
||||
; "true".
|
||||
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
|
||||
; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
|
||||
define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
|
||||
%a = load float, float addrspace(1)* %in, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub float %a, %b
|
||||
%neg.result = fsub float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
|
||||
attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
|
||||
|
|
|
@ -101,5 +101,5 @@ define double @fn_attr(double %e) nounwind #0 {
|
|||
ret double %h
|
||||
}
|
||||
|
||||
attributes #0 = { "unsafe-fp-math"="true" }
|
||||
attributes #0 = { "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }
|
||||
|
||||
|
|
Loading…
Reference in New Issue