[AMDGPU] hazard recognizer for fp atomic to s_denorm_mode

This requires 3 wait states unless there is a wait or VALU in
between.

Differential Revision: https://reviews.llvm.org/D63619

llvm-svn: 364074
This commit is contained in:
Stanislav Mekhanoshin 2019-06-21 16:30:14 +00:00
parent dbcdad51ff
commit bdf7f81b89
10 changed files with 559 additions and 28 deletions

View File

@ -691,34 +691,53 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic> {
SDPatternOperator atomic,
bit isFP = getIsFP<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
let FPAtomic = isFP in
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
MUBUFAddr64Table <1, NAME>;
let FPAtomic = isFP in
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
let FPAtomic = isFP in
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
let FPAtomic = isFP in
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic> {
SDPatternOperator atomic,
bit isFP = getIsFP<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
let FPAtomic = isFP in
def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
let FPAtomic = isFP in
def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
let FPAtomic = isFP in
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}

View File

@ -273,7 +273,8 @@ multiclass FLAT_Atomic_Pseudo<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
RegisterClass data_rc = vdst_rc,
bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
@ -281,6 +282,7 @@ multiclass FLAT_Atomic_Pseudo<
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
let FPAtomic = isFP;
}
def _RTN : FLAT_AtomicRet_Pseudo <opName,
@ -290,7 +292,9 @@ multiclass FLAT_Atomic_Pseudo<
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>;
AtomicNoRet <opName, 1>{
let FPAtomic = isFP;
}
}
multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
@ -299,7 +303,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
RegisterClass data_rc = vdst_rc,
bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
@ -309,6 +314,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
AtomicNoRet <opName, 0> {
let has_saddr = 1;
let PseudoInstr = NAME;
let FPAtomic = isFP;
}
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
@ -320,6 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR";
let FPAtomic = isFP;
}
}
@ -329,7 +336,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
RegisterClass data_rc = vdst_rc,
bit isFP = getIsFP<data_vt>.ret> {
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
@ -340,6 +348,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
let FPAtomic = isFP;
}
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
@ -351,6 +360,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR_RTN";
let FPAtomic = isFP;
}
}

View File

@ -145,6 +145,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
return NoopHazard;
if (checkFPAtomicToDenormModeHazard(MI) > 0)
return NoopHazard;
if (ST.hasNoDataDepHazard())
return NoHazard;
@ -247,6 +250,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasNSAtoVMEMBug())
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
if (ST.hasNoDataDepHazard())
return WaitStates;
@ -1138,3 +1143,39 @@ int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
}
int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
int FPAtomicToDenormModeWaitStates = 3;
if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
return 0;
auto IsHazardFn = [] (MachineInstr *I) {
if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
return false;
return SIInstrInfo::isFPAtomic(*I);
};
auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
return true;
switch (MI->getOpcode()) {
case AMDGPU::S_WAITCNT:
case AMDGPU::S_WAITCNT_VSCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAITCNT_IDLE:
return true;
default:
break;
}
return false;
};
return FPAtomicToDenormModeWaitStates -
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
}

View File

@ -84,6 +84,7 @@ private:
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
void fixHazards(MachineInstr *MI);
bool fixVcmpxPermlaneHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);

View File

@ -716,9 +716,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;
defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
//let FPAtomic = 1 in {
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
//} // End let FPAtomic = 1
defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;

View File

@ -93,7 +93,10 @@ enum : uint64_t {
IsNonFlatSeg = UINT64_C(1) << 51,
// Uses floating point double precision rounding mode
FPDPRounding = UINT64_C(1) << 52
FPDPRounding = UINT64_C(1) << 52,
// Instruction is FP atomic.
FPAtomic = UINT64_C(1) << 53
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

View File

@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// rounding mode flags
field bit FPDPRounding = 0;
// Instruction is FP atomic.
field bit FPAtomic = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@ -179,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{52} = FPDPRounding;
let TSFlags{53} = FPAtomic;
let SchedRW = [Write32Bit];
field bits<1> DisableSIDecoder = 0;

View File

@ -631,6 +631,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
}
static bool isFPAtomic(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
}
bool isFPAtomic(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
}
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();

View File

@ -1243,6 +1243,17 @@ class getVALUDstForVT<ValueType VT> {
VOPDstS64orS32)))); // else VT == i1
}
// Returns true if VT is floating point.
class getIsFP<ValueType VT> {
bit ret = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, v2f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
!if(!eq(VT.Value, v2f64.Value), 1,
0))))));
}
// Returns the register class to use for the destination of VOP[12C]
// instructions with SDWA extension
class getSDWADstForVT<ValueType VT> {
@ -1254,11 +1265,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0))));
bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(isFP,
@ -1292,9 +1299,7 @@ class getVregSrcForVT<ValueType VT> {
}
class getSDWASrcForVT <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
0));
bit isFP = getIsFP<VT>.ret;
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
RegisterOperand ret = !if(isFP, retFlt, retInt);
@ -1303,11 +1308,7 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0))));
bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
@ -1351,10 +1352,7 @@ class isModifierType<ValueType SrcVT> {
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
bit isFP = getIsFP<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
@ -1373,10 +1371,7 @@ class getOpSelMod <ValueType VT> {
// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
bit isFP = getIsFP<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}

View File

@ -0,0 +1,447 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt
# GCN: FLAT_ATOMIC_FMIN
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_DENORM_MODE
---
name: flat_fp_atomic_to_s_denorm_mode_waitcnt
body: |
bb.0:
FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_WAITCNT 0
S_DENORM_MODE 0
...
# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu
# GCN: FLAT_ATOMIC_FMIN
# GCN-NEXT: V_ADD_F32_e32
# GCN-NEXT: S_DENORM_MODE
---
name: flat_fp_atomic_to_s_denorm_mode_valu
body: |
bb.0:
FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec
S_DENORM_MODE 0
...