forked from OSchip/llvm-project
R600: Add denormal handling subtarget features.
llvm-svn: 213018
This commit is contained in:
parent
c6ae7b4763
commit
f171cf23b8
|
@ -42,6 +42,20 @@ def FeatureFP64 : SubtargetFeature<"fp64",
|
|||
"true",
|
||||
"Enable double precision operations">;
|
||||
|
||||
def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
|
||||
"FP64Denormals",
|
||||
"true",
|
||||
"Enable double precision denormal handling",
|
||||
[FeatureFP64]>;
|
||||
|
||||
// Some instructions do not support denormals despite this flag. Using
|
||||
// fp32 denormals also causes instructions to run at the double
|
||||
// precision rate for the device.
|
||||
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
|
||||
"FP32Denormals",
|
||||
"true",
|
||||
"Enable single precision denormal handling">;
|
||||
|
||||
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
||||
"Is64bit",
|
||||
"true",
|
||||
|
|
|
@ -55,11 +55,20 @@ using namespace llvm;
|
|||
// We want to use these instructions, and using fp32 denormals also causes
|
||||
// instructions to run at the double precision rate for the device so it's
|
||||
// probably best to just report no single precision denormals.
|
||||
static uint32_t getFPMode(const MachineFunction &) {
|
||||
static uint32_t getFPMode(const MachineFunction &F) {
|
||||
const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
|
||||
// TODO: Is there any real use for the flush in only / flush out only modes?
|
||||
|
||||
uint32_t FP32Denormals =
|
||||
ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||
|
||||
uint32_t FP64Denormals =
|
||||
ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||
|
||||
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||
FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
|
||||
FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
|
||||
FP_DENORM_MODE_SP(FP32Denormals) |
|
||||
FP_DENORM_MODE_DP(FP64Denormals);
|
||||
}
|
||||
|
||||
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
|
||||
|
|
|
@ -34,6 +34,9 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
|||
|
||||
}
|
||||
|
||||
def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
|
||||
def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
|
||||
|
||||
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
|
||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "AMDGPUSubtarget.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
|
||||
|
@ -37,6 +38,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
|
|||
TexVTXClauseSize(0),
|
||||
Gen(AMDGPUSubtarget::R600),
|
||||
FP64(false),
|
||||
FP64Denormals(false),
|
||||
FP32Denormals(false),
|
||||
CaymanISA(false),
|
||||
EnableIRStructurizer(true),
|
||||
EnablePromoteAlloca(false),
|
||||
|
@ -45,14 +48,27 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
|
|||
CFALUBug(false),
|
||||
LocalMemorySize(0),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)) {
|
||||
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
|
||||
// enabled, but some instructions do not respect them and they run at the
|
||||
// double precision rate, so don't enable by default.
|
||||
//
|
||||
// We want to be able to turn these off, but making this a subtarget feature
|
||||
// for SI has the unhelpful behavior that it unsets everything else if you
|
||||
// disable it.
|
||||
|
||||
SmallString<256> FullFS("+promote-alloca,");
|
||||
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
|
||||
FullFS += FS;
|
||||
|
||||
ParseSubtargetFeatures(GPU, FullFS);
|
||||
|
||||
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
InstrInfo.reset(new R600InstrInfo(*this));
|
||||
|
||||
// FIXME: I don't think think Evergreen has any useful support for
|
||||
// denormals, but should be checked. Should we issue a warning somewhere if
|
||||
// someone tries to enable these?
|
||||
FP32Denormals = false;
|
||||
FP64Denormals = false;
|
||||
} else {
|
||||
InstrInfo.reset(new SIInstrInfo(*this));
|
||||
}
|
||||
|
|
|
@ -50,6 +50,8 @@ private:
|
|||
short TexVTXClauseSize;
|
||||
Generation Gen;
|
||||
bool FP64;
|
||||
bool FP64Denormals;
|
||||
bool FP32Denormals;
|
||||
bool CaymanISA;
|
||||
bool EnableIRStructurizer;
|
||||
bool EnablePromoteAlloca;
|
||||
|
@ -97,6 +99,14 @@ public:
|
|||
return CaymanISA;
|
||||
}
|
||||
|
||||
bool hasFP32Denormals() const {
|
||||
return FP32Denormals;
|
||||
}
|
||||
|
||||
bool hasFP64Denormals() const {
|
||||
return FP64Denormals;
|
||||
}
|
||||
|
||||
bool hasBFE() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
|
|
@ -1,8 +1,27 @@
|
|||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
|
||||
|
||||
; SI-LABEL: @test_kernel
|
||||
; SI: FloatMode: 192
|
||||
; SI: IeeeMode: 0
|
||||
; FUNC-LABEL: @test_kernel
|
||||
|
||||
; DEFAULT: FloatMode: 192
|
||||
; DEFAULT: IeeeMode: 0
|
||||
|
||||
; FP64-DENORMAL: FloatMode: 192
|
||||
; FP64-DENORMAL: IeeeMode: 0
|
||||
|
||||
; FP32-DENORMAL: FloatMode: 48
|
||||
; FP32-DENORMAL: IeeeMode: 0
|
||||
|
||||
; BOTH-DENORMAL: FloatMode: 240
|
||||
; BOTH-DENORMAL: IeeeMode: 0
|
||||
|
||||
; NO-DENORMAL: FloatMode: 0
|
||||
; NO-DENORMAL: IeeeMode: 0
|
||||
define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
|
||||
store float 0.0, float addrspace(1)* %out0
|
||||
store double 0.0, double addrspace(1)* %out1
|
||||
|
|
Loading…
Reference in New Issue