forked from OSchip/llvm-project
[AMDGPU] Use S_DENORM_MODE for gfx10
Summary: During fdiv32 lowering use S_DENORM_MODE to select denorm mode in gfx10. Reviewers: arsenm, rampitec Reviewed By: arsenm, rampitec Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65620 llvm-svn: 367882
This commit is contained in:
parent
e15d95a987
commit
8d229dbb47
|
@ -4221,6 +4221,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
NODE_NAME_CASE(FRACT)
|
NODE_NAME_CASE(FRACT)
|
||||||
NODE_NAME_CASE(SETCC)
|
NODE_NAME_CASE(SETCC)
|
||||||
NODE_NAME_CASE(SETREG)
|
NODE_NAME_CASE(SETREG)
|
||||||
|
NODE_NAME_CASE(DENORM_MODE)
|
||||||
NODE_NAME_CASE(FMA_W_CHAIN)
|
NODE_NAME_CASE(FMA_W_CHAIN)
|
||||||
NODE_NAME_CASE(FMUL_W_CHAIN)
|
NODE_NAME_CASE(FMUL_W_CHAIN)
|
||||||
NODE_NAME_CASE(CLAMP)
|
NODE_NAME_CASE(CLAMP)
|
||||||
|
|
|
@ -369,6 +369,9 @@ enum NodeType : unsigned {
|
||||||
// result bit per item in the wavefront.
|
// result bit per item in the wavefront.
|
||||||
SETCC,
|
SETCC,
|
||||||
SETREG,
|
SETREG,
|
||||||
|
|
||||||
|
DENORM_MODE,
|
||||||
|
|
||||||
// FP ops with input and output chain.
|
// FP ops with input and output chain.
|
||||||
FMA_W_CHAIN,
|
FMA_W_CHAIN,
|
||||||
FMUL_W_CHAIN,
|
FMUL_W_CHAIN,
|
||||||
|
|
|
@ -617,6 +617,11 @@ public:
|
||||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \returns If target supports S_DENORM_MODE.
|
||||||
|
bool hasDenormModeInst() const {
|
||||||
|
return getGeneration() >= AMDGPUSubtarget::GFX10;
|
||||||
|
}
|
||||||
|
|
||||||
bool useFlatForGlobal() const {
|
bool useFlatForGlobal() const {
|
||||||
return FlatForGlobal;
|
return FlatForGlobal;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7591,6 +7591,19 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
|
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns immediate value for setting the F32 denorm mode when using the
|
||||||
|
// S_DENORM_MODE instruction.
|
||||||
|
static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
|
||||||
|
const SDLoc &SL, const GCNSubtarget *ST) {
|
||||||
|
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
|
||||||
|
int DPDenormModeDefault = ST->hasFP64Denormals()
|
||||||
|
? FP_DENORM_FLUSH_NONE
|
||||||
|
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||||
|
|
||||||
|
int Mode = SPDenormMode | (DPDenormModeDefault << 2);
|
||||||
|
return DAG.getTargetConstant(Mode, SL, MVT::i32);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||||
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
|
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
|
||||||
return FastLowered;
|
return FastLowered;
|
||||||
|
@ -7617,16 +7630,26 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||||
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
|
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
|
||||||
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
|
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
|
||||||
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
|
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
|
||||||
|
|
||||||
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
|
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
|
||||||
|
|
||||||
if (!Subtarget->hasFP32Denormals()) {
|
if (!Subtarget->hasFP32Denormals()) {
|
||||||
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||||
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
|
|
||||||
SL, MVT::i32);
|
SDValue EnableDenorm;
|
||||||
SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
|
if (Subtarget->hasDenormModeInst()) {
|
||||||
DAG.getEntryNode(),
|
const SDValue EnableDenormValue =
|
||||||
EnableDenormValue, BitField);
|
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
|
||||||
|
|
||||||
|
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
|
||||||
|
DAG.getEntryNode(), EnableDenormValue);
|
||||||
|
} else {
|
||||||
|
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
|
||||||
|
SL, MVT::i32);
|
||||||
|
EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
|
||||||
|
DAG.getEntryNode(), EnableDenormValue,
|
||||||
|
BitField);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue Ops[3] = {
|
SDValue Ops[3] = {
|
||||||
NegDivScale0,
|
NegDivScale0,
|
||||||
EnableDenorm.getValue(0),
|
EnableDenorm.getValue(0),
|
||||||
|
@ -7648,19 +7671,29 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
|
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
|
||||||
NumeratorScaled, Mul);
|
NumeratorScaled, Mul);
|
||||||
|
|
||||||
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
|
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
|
||||||
|
|
||||||
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
|
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
|
||||||
NumeratorScaled, Fma3);
|
NumeratorScaled, Fma3);
|
||||||
|
|
||||||
if (!Subtarget->hasFP32Denormals()) {
|
if (!Subtarget->hasFP32Denormals()) {
|
||||||
const SDValue DisableDenormValue =
|
|
||||||
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
|
SDValue DisableDenorm;
|
||||||
SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
|
if (Subtarget->hasDenormModeInst()) {
|
||||||
Fma4.getValue(1),
|
const SDValue DisableDenormValue =
|
||||||
DisableDenormValue,
|
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
|
||||||
BitField,
|
|
||||||
Fma4.getValue(2));
|
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
|
||||||
|
Fma4.getValue(1), DisableDenormValue,
|
||||||
|
Fma4.getValue(2));
|
||||||
|
} else {
|
||||||
|
const SDValue DisableDenormValue =
|
||||||
|
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
|
||||||
|
|
||||||
|
DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
|
||||||
|
Fma4.getValue(1), DisableDenormValue,
|
||||||
|
BitField, Fma4.getValue(2));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
|
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
|
||||||
DisableDenorm, DAG.getRoot());
|
DisableDenorm, DAG.getRoot());
|
||||||
|
|
|
@ -2671,6 +2671,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
|
||||||
MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
|
MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
|
||||||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
|
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
|
||||||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
|
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
|
||||||
|
MI.getOpcode() == AMDGPU::S_DENORM_MODE ||
|
||||||
changesVGPRIndexingMode(MI);
|
changesVGPRIndexingMode(MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -266,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
|
||||||
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
|
||||||
|
SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
|
||||||
|
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
|
||||||
|
>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// ValueType helpers
|
// ValueType helpers
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -689,7 +694,7 @@ def SIMM16bit : ImmLeaf <i32,
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def UIMM16bit : ImmLeaf <i32,
|
def UIMM16bit : ImmLeaf <i32,
|
||||||
[{return isUInt<16>(Imm); }]
|
[{return isUInt<16>(Imm);}]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
|
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
|
||||||
|
|
|
@ -1168,7 +1168,10 @@ let SubtargetPredicate = isGFX10Plus in {
|
||||||
def S_ROUND_MODE :
|
def S_ROUND_MODE :
|
||||||
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
|
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
|
||||||
def S_DENORM_MODE :
|
def S_DENORM_MODE :
|
||||||
SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">;
|
SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
|
||||||
|
[(SIdenorm_mode (i32 timm:$simm16))]> {
|
||||||
|
let hasSideEffects = 1;
|
||||||
|
}
|
||||||
def S_TTRACEDATA_IMM :
|
def S_TTRACEDATA_IMM :
|
||||||
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
|
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
|
||||||
} // End SubtargetPredicate = isGFX10Plus
|
} // End SubtargetPredicate = isGFX10Plus
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s
|
||||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||||
|
|
||||||
; These tests check that fdiv is expanded correctly and also test that the
|
; These tests check that fdiv is expanded correctly and also test that the
|
||||||
|
@ -17,14 +18,16 @@
|
||||||
; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
|
; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
|
||||||
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
|
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
|
||||||
|
|
||||||
; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
|
||||||
|
; GFX10: s_denorm_mode 15
|
||||||
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
|
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
|
||||||
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
|
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
|
||||||
; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
|
; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
|
||||||
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
|
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
|
||||||
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
|
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
|
||||||
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
|
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
|
||||||
; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
|
; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
|
||||||
|
; GFX10: s_denorm_mode 12
|
||||||
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
|
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
|
||||||
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
|
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
|
||||||
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
|
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
|
@ -39,17 +42,28 @@ entry:
|
||||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
|
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
|
||||||
|
|
||||||
; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
|
; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
|
||||||
; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
|
|
||||||
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
|
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
|
||||||
|
|
||||||
; GCN-NOT: s_setreg
|
; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
|
||||||
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
|
; PREGFX10-NOT: s_setreg
|
||||||
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
|
; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
|
||||||
; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
|
; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
|
||||||
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
|
; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
|
||||||
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
|
; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
|
||||||
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
|
; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
|
||||||
; GCN-NOT: s_setreg
|
; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
|
||||||
|
; PREGFX10-NOT: s_setreg
|
||||||
|
|
||||||
|
; GFX10-NOT: s_denorm_mode
|
||||||
|
; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
|
||||||
|
; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]]
|
||||||
|
; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
|
||||||
|
; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
|
||||||
|
; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]]
|
||||||
|
; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]]
|
||||||
|
; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]]
|
||||||
|
; GFX10-NOT: s_denorm_mode
|
||||||
|
|
||||||
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
|
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
|
||||||
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
|
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
|
||||||
define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
|
define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
|
||||||
|
@ -88,7 +102,8 @@ entry:
|
||||||
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
|
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
|
||||||
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
|
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
|
||||||
; GCN-NOT: [[RESULT]]
|
; GCN-NOT: [[RESULT]]
|
||||||
; GCN-NOT: s_setreg
|
; PREGFX10-NOT: s_setreg
|
||||||
|
; GFX10-NOT: s_denorm_mode
|
||||||
; GCN: buffer_store_dword [[RESULT]]
|
; GCN: buffer_store_dword [[RESULT]]
|
||||||
define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
|
define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
|
||||||
entry:
|
entry:
|
||||||
|
|
Loading…
Reference in New Issue