diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index ef1ccd2c1aa3..69bef02b2031 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -691,34 +691,53 @@ class MUBUF_AtomicRet_Pseudo { + SDPatternOperator atomic, + bit isFP = getIsFP.ret> { + let FPAtomic = isFP in def _OFFSET : MUBUF_AtomicNoRet_Pseudo , MUBUFAddr64Table <0, NAME>; + + let FPAtomic = isFP in def _ADDR64 : MUBUF_AtomicNoRet_Pseudo , MUBUFAddr64Table <1, NAME>; + + let FPAtomic = isFP in def _OFFEN : MUBUF_AtomicNoRet_Pseudo ; + + let FPAtomic = isFP in + def _IDXEN : MUBUF_AtomicNoRet_Pseudo ; + + let FPAtomic = isFP in def _BOTHEN : MUBUF_AtomicNoRet_Pseudo ; } multiclass MUBUF_Pseudo_Atomics_RTN { + SDPatternOperator atomic, + bit isFP = getIsFP.ret> { + let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo , MUBUFAddr64Table <0, NAME # "_RTN">; + let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo , MUBUFAddr64Table <1, NAME # "_RTN">; + let FPAtomic = isFP in def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo ; + + let FPAtomic = isFP in def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo ; + + let FPAtomic = isFP in def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo ; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0196b36a95ce..966bb6666ccd 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -273,7 +273,8 @@ multiclass FLAT_Atomic_Pseudo< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP.ret> { def "" : FLAT_AtomicNoRet_Pseudo , AtomicNoRet { let PseudoInstr = NAME; + let FPAtomic = isFP; } def _RTN : FLAT_AtomicRet_Pseudo , GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet ; + AtomicNoRet { + let FPAtomic = isFP; + } } multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< @@ -299,7 +303,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP.ret> { def "" : FLAT_AtomicNoRet_Pseudo { let has_saddr = 1; let PseudoInstr = NAME; + let FPAtomic = isFP; } def _SADDR : FLAT_AtomicNoRet_Pseudo { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP.ret> { def _RTN : FLAT_AtomicRet_Pseudo , AtomicNoRet { let has_saddr = 1; + let FPAtomic = isFP; } def _SADDR_RTN : FLAT_AtomicRet_Pseudo 0) return NoopHazard; + if (checkFPAtomicToDenormModeHazard(MI) > 0) + return NoopHazard; + if (ST.hasNoDataDepHazard()) return NoHazard; @@ -247,6 +250,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { if (ST.hasNSAtoVMEMBug()) WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); + WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); + if (ST.hasNoDataDepHazard()) return WaitStates; @@ -1138,3 +1143,39 @@ int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); } + +int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { + int FPAtomicToDenormModeWaitStates = 3; + + if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) + return 0; + + auto IsHazardFn = [] (MachineInstr *I) { + if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) + return false; + return SIInstrInfo::isFPAtomic(*I); + }; + + auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { + if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) + return true; + + switch (MI->getOpcode()) { + case AMDGPU::S_WAITCNT: + case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VMCNT: + case AMDGPU::S_WAITCNT_EXPCNT: + case AMDGPU::S_WAITCNT_LGKMCNT: + case AMDGPU::S_WAITCNT_IDLE: + return true; + default: + break; + } + + return false; + }; + + + return FPAtomicToDenormModeWaitStates - + ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 0c4c9d9d9824..cf914b398044 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -84,6 +84,7 @@ private: int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); + int checkFPAtomicToDenormModeHazard(MachineInstr *MI); void fixHazards(MachineInstr *MI); bool fixVcmpxPermlaneHazards(MachineInstr *MI); bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 6f7dbc76f2ef..bb0c9306f53d 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -716,9 +716,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic , "image_atomic_or">; defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">; defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">; defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">; +//let FPAtomic = 1 in { //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI +//} // End let FPAtomic = 1 defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>; defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>; defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index cc96f1de43ad..50cd079721cf 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -93,7 +93,10 @@ enum : uint64_t { IsNonFlatSeg = UINT64_C(1) << 51, // Uses floating point double precision rounding mode - FPDPRounding = UINT64_C(1) << 52 + FPDPRounding = UINT64_C(1) << 52, + + // Instruction is FP atomic. + FPAtomic = UINT64_C(1) << 53 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index e0f928bdf86c..eb64a0685dee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,6 +118,9 @@ class InstSI DisableSIDecoder = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index f96b03dcd2cd..64eb60b4690f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -631,6 +631,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; } + static bool isFPAtomic(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; + } + + bool isFPAtomic(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c06356a685e3..1a3e16afce39 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1243,6 +1243,17 @@ class getVALUDstForVT { VOPDstS64orS32)))); // else VT == i1 } +// Returns true if VT is floating point. +class getIsFP { + bit ret = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, v2f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, v2f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + !if(!eq(VT.Value, v2f64.Value), 1, + 0)))))); +} + // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT { @@ -1254,11 +1265,7 @@ class getSDWADstForVT { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0)))); + bit isFP = getIsFP.ret; RegisterOperand ret = !if(isFP, @@ -1292,9 +1299,7 @@ class getVregSrcForVT { } class getSDWASrcForVT { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - 0)); + bit isFP = getIsFP.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1303,11 +1308,7 @@ class getSDWASrcForVT { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0)))); + bit isFP = getIsFP.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), VSrc_128, @@ -1351,10 +1352,7 @@ class isModifierType { // Return type of input modifiers operand for specified input operand class getSrcMod { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0))); + bit isFP = getIsFP.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), @@ -1373,10 +1371,7 @@ class getOpSelMod { // Return type of input modifiers operand specified input operand for DPP class getSrcModExt { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0))); + bit isFP = getIsFP.ret; Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir new file mode 100644 index 000000000000..f1b5ee3524d9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -0,0 +1,447 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_saddr_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: S_WAITCNT +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_fp_atomic_to_s_denorm_mode_waitcnt +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_WAITCNT 0 + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: V_ADD_F32_e32 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_fp_atomic_to_s_denorm_mode_valu +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec + S_DENORM_MODE 0 +...