forked from OSchip/llvm-project
[AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is set
Differential Revision: https://reviews.llvm.org/D31482 llvm-svn: 300306
This commit is contained in:
parent
c9a4fc0750
commit
e668b1cd1e
|
@ -2112,9 +2112,12 @@ class AMDGPUTargetInfo final : public TargetInfo {
|
||||||
bool hasFP64:1;
|
bool hasFP64:1;
|
||||||
bool hasFMAF:1;
|
bool hasFMAF:1;
|
||||||
bool hasLDEXPF:1;
|
bool hasLDEXPF:1;
|
||||||
bool hasFullSpeedFP32Denorms:1;
|
|
||||||
const AddrSpace AS;
|
const AddrSpace AS;
|
||||||
|
|
||||||
|
static bool hasFullSpeedFMAF32(StringRef GPUName) {
|
||||||
|
return parseAMDGCNName(GPUName) >= GK_GFX9;
|
||||||
|
}
|
||||||
|
|
||||||
static bool isAMDGCN(const llvm::Triple &TT) {
|
static bool isAMDGCN(const llvm::Triple &TT) {
|
||||||
return TT.getArch() == llvm::Triple::amdgcn;
|
return TT.getArch() == llvm::Triple::amdgcn;
|
||||||
}
|
}
|
||||||
|
@ -2130,7 +2133,6 @@ public:
|
||||||
hasFP64(false),
|
hasFP64(false),
|
||||||
hasFMAF(false),
|
hasFMAF(false),
|
||||||
hasLDEXPF(false),
|
hasLDEXPF(false),
|
||||||
hasFullSpeedFP32Denorms(false),
|
|
||||||
AS(isGenericZero(Triple)){
|
AS(isGenericZero(Triple)){
|
||||||
if (getTriple().getArch() == llvm::Triple::amdgcn) {
|
if (getTriple().getArch() == llvm::Triple::amdgcn) {
|
||||||
hasFP64 = true;
|
hasFP64 = true;
|
||||||
|
@ -2200,7 +2202,8 @@ public:
|
||||||
hasFP64Denormals = true;
|
hasFP64Denormals = true;
|
||||||
}
|
}
|
||||||
if (!hasFP32Denormals)
|
if (!hasFP32Denormals)
|
||||||
TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms &&
|
TargetOpts.Features.push_back(
|
||||||
|
(Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
|
||||||
!CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
|
!CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
|
||||||
// Always do not flush fp64 or fp16 denorms.
|
// Always do not flush fp64 or fp16 denorms.
|
||||||
if (!hasFP64Denormals && hasFP64)
|
if (!hasFP64Denormals && hasFP64)
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
// REQUIRES: amdgpu-registered-target
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s | FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s
|
||||||
|
|
||||||
|
// DEFAULT: +fp32-denormals
|
||||||
|
// FEATURE_FP32_DENORMALS_ON: +fp32-denormals
|
||||||
|
// FEATURE_FP32_DENORMALS_OFF: -fp32-denormals
|
||||||
|
// OPT_DENORMS_ARE_ZERO: -fp32-denormals
|
||||||
|
|
||||||
|
kernel void gfx9_fp32_denorms() {}
|
Loading…
Reference in New Issue