diff --git a/llvm/lib/Target/R600/AMDGPU.td b/llvm/lib/Target/R600/AMDGPU.td index 89992c202ea6..6a23d361eb85 100644 --- a/llvm/lib/Target/R600/AMDGPU.td +++ b/llvm/lib/Target/R600/AMDGPU.td @@ -42,6 +42,20 @@ def FeatureFP64 : SubtargetFeature<"fp64", "true", "Enable double precision operations">; +def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", + "FP64Denormals", + "true", + "Enable double precision denormal handling", + [FeatureFP64]>; + +// Some instructions do not support denormals despite this flag. Using +// fp32 denormals also causes instructions to run at the double +// precision rate for the device. +def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", + "FP32Denormals", + "true", + "Enable single precision denormal handling">; + def Feature64BitPtr : SubtargetFeature<"64BitPtr", "Is64bit", "true", diff --git a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp index b2b7bf9af443..aaef1405157e 100644 --- a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -55,11 +55,20 @@ using namespace llvm; // We want to use these instructions, and using fp32 denormals also causes // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. -static uint32_t getFPMode(const MachineFunction &) { +static uint32_t getFPMode(const MachineFunction &F) { + const AMDGPUSubtarget& ST = F.getTarget().getSubtarget(); + // TODO: Is there any real use for the flush in only / flush out only modes? + + uint32_t FP32Denormals = + ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + + uint32_t FP64Denormals = + ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | - FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) | - FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); + FP_DENORM_MODE_SP(FP32Denormals) | + FP_DENORM_MODE_DP(FP64Denormals); } static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, diff --git a/llvm/lib/Target/R600/AMDGPUInstructions.td b/llvm/lib/Target/R600/AMDGPUInstructions.td index b86b7818fc1a..38620705784b 100644 --- a/llvm/lib/Target/R600/AMDGPUInstructions.td +++ b/llvm/lib/Target/R600/AMDGPUInstructions.td @@ -34,6 +34,9 @@ class AMDGPUShaderInst pattern> } +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; + def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index d5203611756f..e3c2a50ab828 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -15,6 +15,7 @@ #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" #include "SIInstrInfo.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallString.h" @@ -37,6 +38,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) : TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), + FP64Denormals(false), + FP32Denormals(false), CaymanISA(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), @@ -45,14 +48,27 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) : CFALUBug(false), LocalMemorySize(0), InstrItins(getInstrItineraryForCPU(GPU)) { + // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be + // enabled, but some instructions do not respect them and they run at the + // double precision rate, so don't enable by default. + // + // We want to be able to turn these off, but making this a subtarget feature + // for SI has the unhelpful behavior that it unsets everything else if you + // disable it. - SmallString<256> FullFS("+promote-alloca,"); + SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { InstrInfo.reset(new R600InstrInfo(*this)); + + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere if + // someone tries to enable these? + FP32Denormals = false; + FP64Denormals = false; } else { InstrInfo.reset(new SIInstrInfo(*this)); } diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index 68634ea883b1..abe4a2cec498 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -50,6 +50,8 @@ private: short TexVTXClauseSize; Generation Gen; bool FP64; + bool FP64Denormals; + bool FP32Denormals; bool CaymanISA; bool EnableIRStructurizer; bool EnablePromoteAlloca; @@ -97,6 +99,14 @@ public: return CaymanISA; } + bool hasFP32Denormals() const { + return FP32Denormals; + } + + bool hasFP64Denormals() const { + return FP64Denormals; + } + bool hasBFE() const { return (getGeneration() >= EVERGREEN); } diff --git a/llvm/test/CodeGen/R600/default-fp-mode.ll b/llvm/test/CodeGen/R600/default-fp-mode.ll index 4488bdb7f445..b24a7a246fda 100644 --- a/llvm/test/CodeGen/R600/default-fp-mode.ll +++ b/llvm/test/CodeGen/R600/default-fp-mode.ll @@ -1,8 +1,27 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; SI-LABEL: @test_kernel -; SI: FloatMode: 192 -; SI: IeeeMode: 0 +; FUNC-LABEL: @test_kernel + +; DEFAULT: FloatMode: 192 +; DEFAULT: IeeeMode: 0 + +; FP64-DENORMAL: FloatMode: 192 +; FP64-DENORMAL: IeeeMode: 0 + +; FP32-DENORMAL: FloatMode: 48 +; FP32-DENORMAL: IeeeMode: 0 + +; BOTH-DENORMAL: FloatMode: 240 +; BOTH-DENORMAL: IeeeMode: 0 + +; NO-DENORMAL: FloatMode: 0 +; NO-DENORMAL: IeeeMode: 0 define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1