forked from OSchip/llvm-project
[OpenCL][AMDGPU] Add support for -cl-denorms-are-zero
Adjust target features for amdgcn target when -cl-denorms-are-zero is set. Denormal support is controlled by feature strings fp32-denormals fp64-denormals in amdgcn target. If -cl-denorms-are-zero is not set and the command line does not set fp32/64-denormals feature string, +fp32-denormals +fp64-denormals will be on for GPU's supporting them. A new virtual function virtual void TargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const is introduced to allow adjusting target option by codegen option. Differential Revision: https://reviews.llvm.org/D22815 llvm-svn: 278151
This commit is contained in:
parent
a814d89b61
commit
2c17e82bc7
|
@ -40,6 +40,7 @@ struct fltSemantics;
|
|||
namespace clang {
|
||||
class DiagnosticsEngine;
|
||||
class LangOptions;
|
||||
class CodeGenOptions;
|
||||
class MacroBuilder;
|
||||
class SourceLocation;
|
||||
class SourceManager;
|
||||
|
@ -797,6 +798,10 @@ public:
|
|||
/// language options which change the target configuration.
|
||||
virtual void adjust(const LangOptions &Opts);
|
||||
|
||||
/// \brief Adjust target options based on codegen options.
|
||||
virtual void adjustTargetOptions(const CodeGenOptions &CGOpts,
|
||||
TargetOptions &TargetOpts) const {}
|
||||
|
||||
/// \brief Initialize the map with the default set of target features for the
|
||||
/// CPU this should include all legal feature strings on the target.
|
||||
///
|
||||
|
|
|
@ -110,6 +110,7 @@ CODEGENOPT(ReciprocalMath , 1, 0) ///< Allow FP divisions to be reassociated.
|
|||
CODEGENOPT(NoInline , 1, 0) ///< Set when -fno-inline is enabled.
|
||||
///< Disables use of the inline keyword.
|
||||
CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN.
|
||||
CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
|
||||
CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
|
||||
/// \brief Method of Objective-C dispatch to use.
|
||||
ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Basic/TargetOptions.h"
|
||||
#include "clang/Basic/Version.h"
|
||||
#include "clang/Frontend/CodeGenOptions.h"
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
|
@ -1960,23 +1961,27 @@ class AMDGPUTargetInfo final : public TargetInfo {
|
|||
bool hasFP64:1;
|
||||
bool hasFMAF:1;
|
||||
bool hasLDEXPF:1;
|
||||
bool hasDenormSupport:1;
|
||||
|
||||
static bool isAMDGCN(const llvm::Triple &TT) {
|
||||
return TT.getArch() == llvm::Triple::amdgcn;
|
||||
}
|
||||
|
||||
public:
|
||||
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
|
||||
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
|
||||
: TargetInfo(Triple) ,
|
||||
GPU(isAMDGCN(Triple) ? GK_SOUTHERN_ISLANDS : GK_R600),
|
||||
hasFP64(false),
|
||||
hasFMAF(false),
|
||||
hasLDEXPF(false) {
|
||||
hasLDEXPF(false),
|
||||
hasDenormSupport(false){
|
||||
if (getTriple().getArch() == llvm::Triple::amdgcn) {
|
||||
hasFP64 = true;
|
||||
hasFMAF = true;
|
||||
hasLDEXPF = true;
|
||||
}
|
||||
if (Opts.CPU == "fiji")
|
||||
hasDenormSupport = true;
|
||||
|
||||
resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
|
||||
DataLayoutStringSI : DataLayoutStringR600);
|
||||
|
@ -2025,6 +2030,26 @@ public:
|
|||
DiagnosticsEngine &Diags, StringRef CPU,
|
||||
const std::vector<std::string> &FeatureVec) const override;
|
||||
|
||||
void adjustTargetOptions(const CodeGenOptions &CGOpts,
|
||||
TargetOptions &TargetOpts) const override {
|
||||
if (!hasDenormSupport)
|
||||
return;
|
||||
bool hasFP32Denormals = false;
|
||||
bool hasFP64Denormals = false;
|
||||
for (auto &I : TargetOpts.FeaturesAsWritten) {
|
||||
if (I == "+fp32-denormals" || I == "-fp32-denormals")
|
||||
hasFP32Denormals = true;
|
||||
if (I == "+fp64-denormals" || I == "-fp64-denormals")
|
||||
hasFP64Denormals = true;
|
||||
}
|
||||
if (!hasFP32Denormals)
|
||||
TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
|
||||
Twine("fp32-denormals")).str());
|
||||
if (!hasFP64Denormals && hasFP64)
|
||||
TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
|
||||
Twine("fp64-denormals")).str());
|
||||
}
|
||||
|
||||
ArrayRef<Builtin::Info> getTargetBuiltins() const override {
|
||||
return llvm::makeArrayRef(BuiltinInfo,
|
||||
clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
|
||||
|
|
|
@ -842,6 +842,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
|
|||
// created. This complexity should be lifted elsewhere.
|
||||
getTarget().adjust(getLangOpts());
|
||||
|
||||
// Adjust target options based on codegen options.
|
||||
getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts());
|
||||
|
||||
// rewriter project will change target built-in bool type from its default.
|
||||
if (getFrontendOpts().ProgramAction == frontend::RewriteObjC)
|
||||
getTarget().noSignedCharForObjCBool();
|
||||
|
|
|
@ -571,6 +571,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
|||
Args.hasArg(OPT_cl_fast_relaxed_math));
|
||||
Opts.NoSignedZeros = (Args.hasArg(OPT_fno_signed_zeros) ||
|
||||
Args.hasArg(OPT_cl_no_signed_zeros));
|
||||
Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
|
||||
Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
|
||||
Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
|
||||
Opts.BackendOptions = Args.getAllArgValues(OPT_backend_option);
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
// RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
|
||||
// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=CHECK-DENORM
|
||||
|
||||
// This test just checks that the -cl-denorms-are-zero argument is accepted
|
||||
// For non-amdgcn targets, this test just checks that the -cl-denorms-are-zero argument is accepted
|
||||
// by clang. This option is currently a no-op, which is allowed by the
|
||||
// OpenCL specification.
|
||||
|
||||
// CHECK-DENORM-LABEL: define void @f()
|
||||
// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
|
||||
// CHECK-LABEL: define void @f()
|
||||
// CHECK-NOT: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
|
||||
void f() {}
|
||||
|
|
Loading…
Reference in New Issue