forked from OSchip/llvm-project
AMDGPU: Split denormal mode tracking bits
Prepare to accurately track the future denormal-fp-math attribute changes. The way to actually set these separately is not wired in yet. This is just a mechanical change, and mostly still assumes the input and output mode match. This should be refined for some cases. For example, fcanonicalize lowering should use the flushing variant if either input or output flushing is enabled
This commit is contained in:
parent
047c041277
commit
1024b73ef5
|
@ -49,9 +49,9 @@ using namespace llvm;
|
||||||
using namespace llvm::AMDGPU;
|
using namespace llvm::AMDGPU;
|
||||||
using namespace llvm::AMDGPU::HSAMD;
|
using namespace llvm::AMDGPU::HSAMD;
|
||||||
|
|
||||||
// TODO: This should get the default rounding mode from the kernel. We just set
|
// This should get the default rounding mode from the kernel. We just set the
|
||||||
// the default here, but this could change if the OpenCL rounding mode pragmas
|
// default here, but this could change if the OpenCL rounding mode pragmas are
|
||||||
// are used.
|
// used.
|
||||||
//
|
//
|
||||||
// The denormal mode here should match what is reported by the OpenCL runtime
|
// The denormal mode here should match what is reported by the OpenCL runtime
|
||||||
// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
|
// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
|
||||||
|
@ -70,18 +70,10 @@ using namespace llvm::AMDGPU::HSAMD;
|
||||||
// instructions to run at the double precision rate for the device so it's
|
// instructions to run at the double precision rate for the device so it's
|
||||||
// probably best to just report no single precision denormals.
|
// probably best to just report no single precision denormals.
|
||||||
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
|
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
|
||||||
|
|
||||||
// TODO: Is there any real use for the flush in only / flush out only modes?
|
|
||||||
uint32_t FP32Denormals =
|
|
||||||
Mode.FP32Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
|
||||||
|
|
||||||
uint32_t FP64Denormals =
|
|
||||||
Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
|
||||||
|
|
||||||
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
|
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||||
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
|
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||||
FP_DENORM_MODE_SP(FP32Denormals) |
|
FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
|
||||||
FP_DENORM_MODE_DP(FP64Denormals);
|
FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
static AsmPrinter *
|
static AsmPrinter *
|
||||||
|
|
|
@ -2136,7 +2136,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
|
||||||
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
|
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
|
||||||
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
|
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
|
||||||
|
|
||||||
assert((IsFMA || !Mode.FP32Denormals) &&
|
assert((IsFMA || !Mode.allFP32Denormals()) &&
|
||||||
"fmad selected with denormals enabled");
|
"fmad selected with denormals enabled");
|
||||||
// TODO: We can select this with f32 denormals enabled if all the sources are
|
// TODO: We can select this with f32 denormals enabled if all the sources are
|
||||||
// converted from f16 (in which case fmad isn't legal).
|
// converted from f16 (in which case fmad isn't legal).
|
||||||
|
|
|
@ -1587,7 +1587,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
|
||||||
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||||
|
|
||||||
// float fr = mad(fqneg, fb, fa);
|
// float fr = mad(fqneg, fb, fa);
|
||||||
unsigned OpCode = MFI->getMode().FP32Denormals ?
|
unsigned OpCode = MFI->getMode().allFP32Denormals() ?
|
||||||
(unsigned)AMDGPUISD::FMAD_FTZ :
|
(unsigned)AMDGPUISD::FMAD_FTZ :
|
||||||
(unsigned)ISD::FMAD;
|
(unsigned)ISD::FMAD;
|
||||||
SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
|
SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
|
||||||
|
@ -1672,7 +1672,7 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
|
||||||
// Compute denominator reciprocal.
|
// Compute denominator reciprocal.
|
||||||
unsigned FMAD = MFI->getMode().FP32Denormals ?
|
unsigned FMAD = MFI->getMode().allFP32Denormals() ?
|
||||||
(unsigned)AMDGPUISD::FMAD_FTZ :
|
(unsigned)AMDGPUISD::FMAD_FTZ :
|
||||||
(unsigned)ISD::FMAD;
|
(unsigned)ISD::FMAD;
|
||||||
|
|
||||||
|
|
|
@ -101,12 +101,12 @@ class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
|
||||||
PredicateControl;
|
PredicateControl;
|
||||||
|
|
||||||
let RecomputePerFunction = 1 in {
|
let RecomputePerFunction = 1 in {
|
||||||
def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
|
def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
|
||||||
def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
|
def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
|
||||||
def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
|
def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
|
||||||
def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
|
def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
|
||||||
def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
|
def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
|
||||||
def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
|
def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
|
||||||
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1874,12 +1874,12 @@ bool AMDGPULegalizerInfo::legalizeFMad(
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
|
||||||
// TODO: Always legal with future ftz flag.
|
// TODO: Always legal with future ftz flag.
|
||||||
if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals)
|
// FIXME: Do we need just output?
|
||||||
|
if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals())
|
||||||
return true;
|
return true;
|
||||||
if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals)
|
if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|
||||||
MachineIRBuilder HelperBuilder(MI);
|
MachineIRBuilder HelperBuilder(MI);
|
||||||
GISelObserverWrapper DummyObserver;
|
GISelObserverWrapper DummyObserver;
|
||||||
LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
|
LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
|
||||||
|
@ -2081,7 +2081,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!Unsafe && ResTy == S32 &&
|
if (!Unsafe && ResTy == S32 &&
|
||||||
MF.getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals)
|
MF.getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
|
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
|
||||||
|
@ -2162,15 +2162,13 @@ static void toggleSPDenormMode(bool Enable,
|
||||||
AMDGPU::SIModeRegisterDefaults Mode) {
|
AMDGPU::SIModeRegisterDefaults Mode) {
|
||||||
// Set SP denorm mode to this value.
|
// Set SP denorm mode to this value.
|
||||||
unsigned SPDenormMode =
|
unsigned SPDenormMode =
|
||||||
Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
Enable ? FP_DENORM_FLUSH_NONE : Mode.fpDenormModeSPValue();
|
||||||
|
|
||||||
if (ST.hasDenormModeInst()) {
|
if (ST.hasDenormModeInst()) {
|
||||||
// Preserve default FP64FP16 denorm mode while updating FP32 mode.
|
// Preserve default FP64FP16 denorm mode while updating FP32 mode.
|
||||||
unsigned DPDenormModeDefault = Mode.FP64FP16Denormals
|
uint32_t DPDenormModeDefault = Mode.fpDenormModeDPValue();
|
||||||
? FP_DENORM_FLUSH_NONE
|
|
||||||
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
|
||||||
|
|
||||||
unsigned NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2);
|
uint32_t NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2);
|
||||||
B.buildInstr(AMDGPU::S_DENORM_MODE)
|
B.buildInstr(AMDGPU::S_DENORM_MODE)
|
||||||
.addImm(NewDenormModeValue);
|
.addImm(NewDenormModeValue);
|
||||||
|
|
||||||
|
@ -2223,7 +2221,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||||
|
|
||||||
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
|
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
|
||||||
// aren't modeled as reading it.
|
// aren't modeled as reading it.
|
||||||
if (!Mode.FP32Denormals)
|
if (!Mode.allFP32Denormals())
|
||||||
toggleSPDenormMode(true, B, ST, Mode);
|
toggleSPDenormMode(true, B, ST, Mode);
|
||||||
|
|
||||||
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
|
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
|
||||||
|
@ -2233,7 +2231,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||||
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
|
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
|
||||||
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
|
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
|
||||||
|
|
||||||
if (!Mode.FP32Denormals)
|
if (!Mode.allFP32Denormals())
|
||||||
toggleSPDenormMode(false, B, ST, Mode);
|
toggleSPDenormMode(false, B, ST, Mode);
|
||||||
|
|
||||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
|
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
|
||||||
|
|
|
@ -1163,8 +1163,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
|
||||||
|
|
||||||
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
|
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
|
||||||
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
|
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
|
||||||
MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
|
MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals;
|
||||||
MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
|
MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals;
|
||||||
|
MFI->Mode.FP64FP16InputDenormals = YamlMFI.Mode.FP64FP16InputDenormals;
|
||||||
|
MFI->Mode.FP64FP16OutputDenormals = YamlMFI.Mode.FP64FP16OutputDenormals;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1381,8 +1381,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
|
||||||
case AMDGPU::V_MUL_F32_e64:
|
case AMDGPU::V_MUL_F32_e64:
|
||||||
case AMDGPU::V_MUL_F16_e64: {
|
case AMDGPU::V_MUL_F16_e64: {
|
||||||
// If output denormals are enabled, omod is ignored.
|
// If output denormals are enabled, omod is ignored.
|
||||||
if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals) ||
|
if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
|
||||||
(Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16Denormals))
|
(Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16OutputDenormals))
|
||||||
return std::make_pair(nullptr, SIOutMods::NONE);
|
return std::make_pair(nullptr, SIOutMods::NONE);
|
||||||
|
|
||||||
const MachineOperand *RegOp = nullptr;
|
const MachineOperand *RegOp = nullptr;
|
||||||
|
@ -1411,8 +1411,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
|
||||||
case AMDGPU::V_ADD_F32_e64:
|
case AMDGPU::V_ADD_F32_e64:
|
||||||
case AMDGPU::V_ADD_F16_e64: {
|
case AMDGPU::V_ADD_F16_e64: {
|
||||||
// If output denormals are enabled, omod is ignored.
|
// If output denormals are enabled, omod is ignored.
|
||||||
if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals) ||
|
if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
|
||||||
(Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16Denormals))
|
(Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16OutputDenormals))
|
||||||
return std::make_pair(nullptr, SIOutMods::NONE);
|
return std::make_pair(nullptr, SIOutMods::NONE);
|
||||||
|
|
||||||
// Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
|
// Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
|
||||||
|
|
|
@ -97,12 +97,12 @@ static cl::opt<bool> DisableLoopAlignment(
|
||||||
|
|
||||||
static bool hasFP32Denormals(const MachineFunction &MF) {
|
static bool hasFP32Denormals(const MachineFunction &MF) {
|
||||||
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
return Info->getMode().FP32Denormals;
|
return Info->getMode().allFP32Denormals();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hasFP64FP16Denormals(const MachineFunction &MF) {
|
static bool hasFP64FP16Denormals(const MachineFunction &MF) {
|
||||||
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
return Info->getMode().FP64FP16Denormals;
|
return Info->getMode().allFP64FP16Denormals();
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
|
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
|
||||||
|
@ -783,6 +783,7 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
|
||||||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
|
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
|
||||||
DestVT.getScalarType() == MVT::f32 &&
|
DestVT.getScalarType() == MVT::f32 &&
|
||||||
SrcVT.getScalarType() == MVT::f16 &&
|
SrcVT.getScalarType() == MVT::f16 &&
|
||||||
|
// TODO: This probably only requires no input flushing?
|
||||||
!hasFP32Denormals(DAG.getMachineFunction());
|
!hasFP32Denormals(DAG.getMachineFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -236,23 +236,29 @@ template <> struct MappingTraits<SIArgumentInfo> {
|
||||||
struct SIMode {
|
struct SIMode {
|
||||||
bool IEEE = true;
|
bool IEEE = true;
|
||||||
bool DX10Clamp = true;
|
bool DX10Clamp = true;
|
||||||
bool FP32Denormals = true;
|
bool FP32InputDenormals = true;
|
||||||
bool FP64FP16Denormals = true;
|
bool FP32OutputDenormals = true;
|
||||||
|
bool FP64FP16InputDenormals = true;
|
||||||
|
bool FP64FP16OutputDenormals = true;
|
||||||
|
|
||||||
SIMode() = default;
|
SIMode() = default;
|
||||||
|
|
||||||
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
|
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
|
||||||
IEEE = Mode.IEEE;
|
IEEE = Mode.IEEE;
|
||||||
DX10Clamp = Mode.DX10Clamp;
|
DX10Clamp = Mode.DX10Clamp;
|
||||||
FP32Denormals = Mode.FP32Denormals;
|
FP32InputDenormals = Mode.FP32InputDenormals;
|
||||||
FP64FP16Denormals = Mode.FP64FP16Denormals;
|
FP32OutputDenormals = Mode.FP32OutputDenormals;
|
||||||
|
FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
|
||||||
|
FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator ==(const SIMode Other) const {
|
bool operator ==(const SIMode Other) const {
|
||||||
return IEEE == Other.IEEE &&
|
return IEEE == Other.IEEE &&
|
||||||
DX10Clamp == Other.DX10Clamp &&
|
DX10Clamp == Other.DX10Clamp &&
|
||||||
FP32Denormals == Other.FP32Denormals &&
|
FP32InputDenormals == Other.FP32InputDenormals &&
|
||||||
FP64FP16Denormals == Other.FP64FP16Denormals;
|
FP32OutputDenormals == Other.FP32OutputDenormals &&
|
||||||
|
FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
|
||||||
|
FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -260,8 +266,10 @@ template <> struct MappingTraits<SIMode> {
|
||||||
static void mapping(IO &YamlIO, SIMode &Mode) {
|
static void mapping(IO &YamlIO, SIMode &Mode) {
|
||||||
YamlIO.mapOptional("ieee", Mode.IEEE, true);
|
YamlIO.mapOptional("ieee", Mode.IEEE, true);
|
||||||
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
|
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
|
||||||
YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
|
YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
|
||||||
YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
|
YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
|
||||||
|
YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
|
||||||
|
YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1345,8 +1345,11 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
|
||||||
if (!DX10ClampAttr.empty())
|
if (!DX10ClampAttr.empty())
|
||||||
DX10Clamp = DX10ClampAttr == "true";
|
DX10Clamp = DX10ClampAttr == "true";
|
||||||
|
|
||||||
FP32Denormals = ST.hasFP32Denormals(F);
|
// FIXME: Split this when denormal-fp-math is used
|
||||||
FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
|
FP32InputDenormals = ST.hasFP32Denormals(F);
|
||||||
|
FP32OutputDenormals = FP32InputDenormals;
|
||||||
|
FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F);
|
||||||
|
FP64FP16OutputDenormals = FP64FP16InputDenormals;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
|
@ -687,20 +687,21 @@ struct SIModeRegisterDefaults {
|
||||||
|
|
||||||
/// If this is set, neither input or output denormals are flushed for most f32
|
/// If this is set, neither input or output denormals are flushed for most f32
|
||||||
/// instructions.
|
/// instructions.
|
||||||
///
|
bool FP32InputDenormals : 1;
|
||||||
/// TODO: Split into separate input and output fields if necessary like the
|
bool FP32OutputDenormals : 1;
|
||||||
/// control bits really provide?
|
|
||||||
bool FP32Denormals : 1;
|
|
||||||
|
|
||||||
/// If this is set, neither input or output denormals are flushed for both f64
|
/// If this is set, neither input or output denormals are flushed for both f64
|
||||||
/// and f16/v2f16 instructions.
|
/// and f16/v2f16 instructions.
|
||||||
bool FP64FP16Denormals : 1;
|
bool FP64FP16InputDenormals : 1;
|
||||||
|
bool FP64FP16OutputDenormals : 1;
|
||||||
|
|
||||||
SIModeRegisterDefaults() :
|
SIModeRegisterDefaults() :
|
||||||
IEEE(true),
|
IEEE(true),
|
||||||
DX10Clamp(true),
|
DX10Clamp(true),
|
||||||
FP32Denormals(true),
|
FP32InputDenormals(true),
|
||||||
FP64FP16Denormals(true) {}
|
FP32OutputDenormals(true),
|
||||||
|
FP64FP16InputDenormals(true),
|
||||||
|
FP64FP16OutputDenormals(true) {}
|
||||||
|
|
||||||
// FIXME: Should not depend on the subtarget
|
// FIXME: Should not depend on the subtarget
|
||||||
SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
|
SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
|
||||||
|
@ -711,15 +712,51 @@ struct SIModeRegisterDefaults {
|
||||||
SIModeRegisterDefaults Mode;
|
SIModeRegisterDefaults Mode;
|
||||||
Mode.DX10Clamp = true;
|
Mode.DX10Clamp = true;
|
||||||
Mode.IEEE = IsCompute;
|
Mode.IEEE = IsCompute;
|
||||||
Mode.FP32Denormals = false; // FIXME: Should be on by default.
|
Mode.FP32InputDenormals = false; // FIXME: Should be on by default.
|
||||||
Mode.FP64FP16Denormals = true;
|
Mode.FP32OutputDenormals = false; // FIXME: Should be on by default.
|
||||||
|
Mode.FP64FP16InputDenormals = true;
|
||||||
|
Mode.FP64FP16OutputDenormals = true;
|
||||||
return Mode;
|
return Mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator ==(const SIModeRegisterDefaults Other) const {
|
bool operator ==(const SIModeRegisterDefaults Other) const {
|
||||||
return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
|
return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
|
||||||
FP32Denormals == Other.FP32Denormals &&
|
FP32InputDenormals == Other.FP32InputDenormals &&
|
||||||
FP64FP16Denormals == Other.FP64FP16Denormals;
|
FP32OutputDenormals == Other.FP32OutputDenormals &&
|
||||||
|
FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
|
||||||
|
FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool allFP32Denormals() const {
|
||||||
|
return FP32InputDenormals && FP32OutputDenormals;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool allFP64FP16Denormals() const {
|
||||||
|
return FP64FP16InputDenormals && FP64FP16OutputDenormals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the encoding value for the FP_DENORM bits of the mode register for the
|
||||||
|
/// FP32 denormal mode.
|
||||||
|
uint32_t fpDenormModeSPValue() const {
|
||||||
|
if (FP32InputDenormals && FP32OutputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_NONE;
|
||||||
|
if (FP32InputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_OUT;
|
||||||
|
if (FP32OutputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_IN;
|
||||||
|
return FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the encoding value for the FP_DENORM bits of the mode register for the
|
||||||
|
/// FP64/FP16 denormal mode.
|
||||||
|
uint32_t fpDenormModeDPValue() const {
|
||||||
|
if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_NONE;
|
||||||
|
if (FP64FP16InputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_OUT;
|
||||||
|
if (FP64FP16OutputDenormals)
|
||||||
|
return FP_DENORM_FLUSH_IN;
|
||||||
|
return FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if a flag is compatible if it's enabled in the callee, but
|
/// Returns true if a flag is compatible if it's enabled in the callee, but
|
||||||
|
@ -737,8 +774,10 @@ struct SIModeRegisterDefaults {
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Allow inlining denormals enabled into denormals flushed functions.
|
// Allow inlining denormals enabled into denormals flushed functions.
|
||||||
return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
|
return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
|
||||||
oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
|
oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
|
||||||
|
oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
|
||||||
|
oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -30,7 +31,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -52,7 +54,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -74,7 +77,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -96,7 +100,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -118,7 +123,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -140,7 +146,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -162,7 +169,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -184,7 +192,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -207,8 +216,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -230,7 +239,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -252,7 +262,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -274,7 +285,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -299,7 +311,8 @@ legalized: true
|
||||||
regBankSelected: true
|
regBankSelected: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -9,8 +9,10 @@
|
||||||
name: test_fdiv_s16
|
name: test_fdiv_s16
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: true
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -100,8 +102,10 @@ body: |
|
||||||
name: test_fdiv_s32_denorms_on
|
name: test_fdiv_s32_denorms_on
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: true
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -191,8 +195,10 @@ body: |
|
||||||
name: test_fdiv_s32_denorms_off
|
name: test_fdiv_s32_denorms_off
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -290,8 +296,10 @@ body: |
|
||||||
name: test_fdiv_s32_denorms_off_arcp
|
name: test_fdiv_s32_denorms_off_arcp
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -337,8 +345,10 @@ body: |
|
||||||
name: test_fdiv_s64
|
name: test_fdiv_s64
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -467,8 +477,10 @@ body: |
|
||||||
name: test_fdiv_v2s32
|
name: test_fdiv_v2s32
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -2223,8 +2235,10 @@ body: |
|
||||||
name: test_fdiv_s64_constant_one_rcp
|
name: test_fdiv_s64_constant_one_rcp
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -2347,8 +2361,10 @@ body: |
|
||||||
name: test_fdiv_s64_constant_negative_one_rcp
|
name: test_fdiv_s64_constant_negative_one_rcp
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: true
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
name: test_fmad_s16_flush
|
name: test_fmad_s16_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -66,7 +67,8 @@ body: |
|
||||||
name: test_fmad_v2s16_flush
|
name: test_fmad_v2s16_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -162,7 +164,8 @@ body: |
|
||||||
name: test_fmad_v4s16_flush
|
name: test_fmad_v4s16_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -332,7 +335,8 @@ body: |
|
||||||
name: test_fmad_s16_denorm
|
name: test_fmad_s16_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -392,7 +396,8 @@ body: |
|
||||||
name: test_fmad_s16_denorm_flags
|
name: test_fmad_s16_denorm_flags
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -452,7 +457,8 @@ body: |
|
||||||
name: test_fmad_v2s16_denorm
|
name: test_fmad_v2s16_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -550,7 +556,8 @@ body: |
|
||||||
name: test_fmad_v2s16_denorm_flags
|
name: test_fmad_v2s16_denorm_flags
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -648,7 +655,8 @@ body: |
|
||||||
name: test_fmad_v4s16_denorm
|
name: test_fmad_v4s16_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -822,7 +830,8 @@ body: |
|
||||||
name: test_fmad_v4s16_denorm_flags
|
name: test_fmad_v4s16_denorm_flags
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
name: test_fmad_s32_flush
|
name: test_fmad_s32_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -42,7 +43,8 @@ body: |
|
||||||
name: test_fmad_s32_flags_flush
|
name: test_fmad_s32_flags_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -77,7 +79,8 @@ body: |
|
||||||
name: test_fmad_v2s32_flush
|
name: test_fmad_v2s32_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -127,7 +130,8 @@ body: |
|
||||||
name: test_fmad_v3s32_flush
|
name: test_fmad_v3s32_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -180,7 +184,8 @@ body: |
|
||||||
name: test_fmad_v4s32_flush
|
name: test_fmad_v4s32_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -236,7 +241,8 @@ body: |
|
||||||
name: test_fmad_s32_denorm
|
name: test_fmad_s32_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -274,7 +280,8 @@ body: |
|
||||||
name: test_fmad_s32_flags_denorm
|
name: test_fmad_s32_flags_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -312,7 +319,8 @@ body: |
|
||||||
name: test_fmad_v2s32_denorm
|
name: test_fmad_v2s32_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -368,7 +376,8 @@ body: |
|
||||||
name: test_fmad_v3s32_denorm
|
name: test_fmad_v3s32_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -430,7 +439,8 @@ body: |
|
||||||
name: test_fmad_v4s32_denorm
|
name: test_fmad_v4s32_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp32-denormals: true
|
fp32-input-denormals: true
|
||||||
|
fp32-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
name: test_fmad_s64_flush
|
name: test_fmad_s64_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -31,7 +32,8 @@ body: |
|
||||||
name: test_fmad_v2s64_flush
|
name: test_fmad_v2s64_flush
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: false
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -61,7 +63,8 @@ body: |
|
||||||
name: test_fmad_s64_denorm
|
name: test_fmad_s64_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -85,7 +88,8 @@ body: |
|
||||||
name: test_fmad_v2s64_denorm
|
name: test_fmad_v2s64_denorm
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
fp64-fp16-denormals: true
|
fp64-fp16-input-denormals: true
|
||||||
|
fp64-fp16-output-denormals: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -12,7 +12,7 @@ tracksRegLiveness: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
ieee: false
|
ieee: false
|
||||||
fp32-denormals: false
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -34,7 +34,7 @@ tracksRegLiveness: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
ieee: false
|
ieee: false
|
||||||
fp32-denormals: false
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
@ -56,7 +56,7 @@ tracksRegLiveness: true
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
ieee: false
|
ieee: false
|
||||||
fp32-denormals: false
|
fp32-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -25,8 +25,10 @@
|
||||||
# FULL-NEXT: mode:
|
# FULL-NEXT: mode:
|
||||||
# FULL-NEXT: ieee: true
|
# FULL-NEXT: ieee: true
|
||||||
# FULL-NEXT: dx10-clamp: true
|
# FULL-NEXT: dx10-clamp: true
|
||||||
# FULL-NEXT: fp32-denormals: true
|
# FULL-NEXT: fp32-input-denormals: true
|
||||||
# FULL-NEXT: fp64-fp16-denormals: true
|
# FULL-NEXT: fp32-output-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||||
# FULL-NEXT: body:
|
# FULL-NEXT: body:
|
||||||
|
|
||||||
|
@ -94,8 +96,10 @@ body: |
|
||||||
# FULL-NEXT: mode:
|
# FULL-NEXT: mode:
|
||||||
# FULL-NEXT: ieee: true
|
# FULL-NEXT: ieee: true
|
||||||
# FULL-NEXT: dx10-clamp: true
|
# FULL-NEXT: dx10-clamp: true
|
||||||
# FULL-NEXT: fp32-denormals: true
|
# FULL-NEXT: fp32-input-denormals: true
|
||||||
# FULL-NEXT: fp64-fp16-denormals: true
|
# FULL-NEXT: fp32-output-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||||
# FULL-NEXT: body:
|
# FULL-NEXT: body:
|
||||||
|
|
||||||
|
@ -133,8 +137,10 @@ body: |
|
||||||
# FULL-NEXT: mode:
|
# FULL-NEXT: mode:
|
||||||
# FULL-NEXT: ieee: true
|
# FULL-NEXT: ieee: true
|
||||||
# FULL-NEXT: dx10-clamp: true
|
# FULL-NEXT: dx10-clamp: true
|
||||||
# FULL-NEXT: fp32-denormals: true
|
# FULL-NEXT: fp32-input-denormals: true
|
||||||
# FULL-NEXT: fp64-fp16-denormals: true
|
# FULL-NEXT: fp32-output-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||||
# FULL-NEXT: body:
|
# FULL-NEXT: body:
|
||||||
|
|
||||||
|
@ -173,8 +179,10 @@ body: |
|
||||||
# FULL-NEXT: mode:
|
# FULL-NEXT: mode:
|
||||||
# FULL-NEXT: ieee: true
|
# FULL-NEXT: ieee: true
|
||||||
# FULL-NEXT: dx10-clamp: true
|
# FULL-NEXT: dx10-clamp: true
|
||||||
# FULL-NEXT: fp32-denormals: true
|
# FULL-NEXT: fp32-input-denormals: true
|
||||||
# FULL-NEXT: fp64-fp16-denormals: true
|
# FULL-NEXT: fp32-output-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||||
# FULL-NEXT: body:
|
# FULL-NEXT: body:
|
||||||
|
|
||||||
|
@ -247,16 +255,20 @@ body: |
|
||||||
# ALL: mode:
|
# ALL: mode:
|
||||||
# ALL-NEXT: ieee: false
|
# ALL-NEXT: ieee: false
|
||||||
# ALL-NEXT: dx10-clamp: false
|
# ALL-NEXT: dx10-clamp: false
|
||||||
# ALL-NEXT: fp32-denormals: false
|
# ALL-NEXT: fp32-input-denormals: false
|
||||||
# ALL-NEXT: fp64-fp16-denormals: false
|
# ALL-NEXT: fp32-output-denormals: false
|
||||||
|
# ALL-NEXT: fp64-fp16-input-denormals: false
|
||||||
|
# ALL-NEXT: fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
name: parse_mode
|
name: parse_mode
|
||||||
machineFunctionInfo:
|
machineFunctionInfo:
|
||||||
mode:
|
mode:
|
||||||
ieee: false
|
ieee: false
|
||||||
dx10-clamp: false
|
dx10-clamp: false
|
||||||
fp32-denormals: false
|
fp32-input-denormals: false
|
||||||
fp64-fp16-denormals: false
|
fp32-output-denormals: false
|
||||||
|
fp64-fp16-input-denormals: false
|
||||||
|
fp64-fp16-output-denormals: false
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -28,8 +28,10 @@
|
||||||
; CHECK-NEXT: mode:
|
; CHECK-NEXT: mode:
|
||||||
; CHECK-NEXT: ieee: true
|
; CHECK-NEXT: ieee: true
|
||||||
; CHECK-NEXT: dx10-clamp: true
|
; CHECK-NEXT: dx10-clamp: true
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||||
; CHECK-NEXT: body:
|
; CHECK-NEXT: body:
|
||||||
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
||||||
|
@ -57,8 +59,10 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
||||||
; CHECK-NEXT: mode:
|
; CHECK-NEXT: mode:
|
||||||
; CHECK-NEXT: ieee: false
|
; CHECK-NEXT: ieee: false
|
||||||
; CHECK-NEXT: dx10-clamp: true
|
; CHECK-NEXT: dx10-clamp: true
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||||
; CHECK-NEXT: body:
|
; CHECK-NEXT: body:
|
||||||
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
|
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
|
||||||
|
@ -84,8 +88,10 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
|
||||||
; CHECK-NEXT: mode:
|
; CHECK-NEXT: mode:
|
||||||
; CHECK-NEXT: ieee: true
|
; CHECK-NEXT: ieee: true
|
||||||
; CHECK-NEXT: dx10-clamp: true
|
; CHECK-NEXT: dx10-clamp: true
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||||
; CHECK-NEXT: body:
|
; CHECK-NEXT: body:
|
||||||
define void @function() {
|
define void @function() {
|
||||||
|
@ -111,8 +117,10 @@ define void @function() {
|
||||||
; CHECK-NEXT: mode:
|
; CHECK-NEXT: mode:
|
||||||
; CHECK-NEXT: ieee: true
|
; CHECK-NEXT: ieee: true
|
||||||
; CHECK-NEXT: dx10-clamp: true
|
; CHECK-NEXT: dx10-clamp: true
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||||
; CHECK-NEXT: body:
|
; CHECK-NEXT: body:
|
||||||
define void @function_nsz() #0 {
|
define void @function_nsz() #0 {
|
||||||
|
@ -123,8 +131,10 @@ define void @function_nsz() #0 {
|
||||||
; CHECK: mode:
|
; CHECK: mode:
|
||||||
; CHECK-NEXT: ieee: true
|
; CHECK-NEXT: ieee: true
|
||||||
; CHECK-NEXT: dx10-clamp: false
|
; CHECK-NEXT: dx10-clamp: false
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
define void @function_dx10_clamp_off() #1 {
|
define void @function_dx10_clamp_off() #1 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -133,8 +143,10 @@ define void @function_dx10_clamp_off() #1 {
|
||||||
; CHECK: mode:
|
; CHECK: mode:
|
||||||
; CHECK-NEXT: ieee: false
|
; CHECK-NEXT: ieee: false
|
||||||
; CHECK-NEXT: dx10-clamp: true
|
; CHECK-NEXT: dx10-clamp: true
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
define void @function_ieee_off() #2 {
|
define void @function_ieee_off() #2 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -143,8 +155,10 @@ define void @function_ieee_off() #2 {
|
||||||
; CHECK: mode:
|
; CHECK: mode:
|
||||||
; CHECK-NEXT: ieee: false
|
; CHECK-NEXT: ieee: false
|
||||||
; CHECK-NEXT: dx10-clamp: false
|
; CHECK-NEXT: dx10-clamp: false
|
||||||
; CHECK-NEXT: fp32-denormals: false
|
; CHECK-NEXT: fp32-input-denormals: false
|
||||||
; CHECK-NEXT: fp64-fp16-denormals: true
|
; CHECK-NEXT: fp32-output-denormals: false
|
||||||
|
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||||
|
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||||
define void @function_ieee_off_dx10_clamp_off() #3 {
|
define void @function_ieee_off_dx10_clamp_off() #3 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue