[AMDGPU] gfx908 hazard recognizer

Differential Revision: https://reviews.llvm.org/D64593

llvm-svn: 365829
This commit is contained in:
Stanislav Mekhanoshin 2019-07-11 21:30:34 +00:00
parent 7b4a59db1e
commit 7d2019bb96
3 changed files with 690 additions and 1 deletions

View File

@ -46,7 +46,8 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
TRI(TII.getRegisterInfo()),
ClauseUses(TRI.getNumRegUnits()),
ClauseDefs(TRI.getNumRegUnits()) {
MaxLookAhead = 5;
MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
TSchedModel.init(&ST);
}
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
@ -181,6 +182,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
checkReadM0Hazards(MI) > 0)
return NoopHazard;
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
return NoopHazard;
if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
return NoopHazard;
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
return NoopHazard;
@ -286,6 +293,12 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
if (SIInstrInfo::isMAI(*MI))
return std::max(WaitStates, checkMAIHazards(MI));
if (MI->mayLoad() || MI->mayStore())
return std::max(WaitStates, checkMAILdStHazards(MI));
return WaitStates;
}
@ -1179,3 +1192,217 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
return FPAtomicToDenormModeWaitStates -
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
}
int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
assert(SIInstrInfo::isMAI(*MI));
int WaitStatesNeeded = 0;
unsigned Opc = MI->getOpcode();
auto IsVALUFn = [] (MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI);
};
if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
const int LegacyVALUWritesVGPRWaitStates = 2;
const int VALUWritesExecWaitStates = 4;
const int MaxWaitStates = 4;
int WaitStatesNeededForUse = VALUWritesExecWaitStates -
getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
if (WaitStatesNeeded < MaxWaitStates) {
for (const MachineOperand &Use : MI->explicit_uses()) {
const int MaxWaitStates = 2;
if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
continue;
int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
if (WaitStatesNeeded == MaxWaitStates)
break;
}
}
}
auto IsMFMAFn = [] (MachineInstr *MI) {
return SIInstrInfo::isMAI(*MI) &&
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
};
for (const MachineOperand &Op : MI->explicit_operands()) {
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
continue;
if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
continue;
const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
const int MaxWaitStates = 18;
unsigned Reg = Op.getReg();
unsigned HazardDefLatency = 0;
auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
(MachineInstr *MI) {
if (!IsMFMAFn(MI))
return false;
unsigned DstReg = MI->getOperand(0).getReg();
if (DstReg == Reg)
return false;
HazardDefLatency = std::max(HazardDefLatency,
TSchedModel.computeInstrLatency(MI));
return TRI.regsOverlap(DstReg, Reg);
};
int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
MaxWaitStates);
int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
int OpNo = MI->getOperandNo(&Op);
if (OpNo == SrcCIdx) {
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
} else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
break;
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
break;
}
} else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
break;
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
break;
}
}
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
if (WaitStatesNeeded == MaxWaitStates)
return WaitStatesNeeded; // Early exit.
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
return false;
unsigned DstReg = MI->getOperand(0).getReg();
return TRI.regsOverlap(Reg, DstReg);
};
const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
const int AccVGPRWriteAccVgprReadWaitStates = 3;
NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
if (OpNo == SrcCIdx)
NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
WaitStatesNeededForUse = NeedWaitStates -
getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
if (WaitStatesNeeded == MaxWaitStates)
return WaitStatesNeeded; // Early exit.
}
if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
const int MaxWaitStates = 13;
unsigned DstReg = MI->getOperand(0).getReg();
unsigned HazardDefLatency = 0;
auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
(MachineInstr *MI) {
if (!IsMFMAFn(MI))
return false;
unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
HazardDefLatency = std::max(HazardDefLatency,
TSchedModel.computeInstrLatency(MI));
return TRI.regsOverlap(Reg, DstReg);
};
int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
int NeedWaitStates;
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
break;
case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
break;
}
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
if (!ST.hasMAIInsts())
return 0;
int WaitStatesNeeded = 0;
auto IsAccVgprReadFn = [] (MachineInstr *MI) {
return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
};
for (const MachineOperand &Op : MI->explicit_uses()) {
if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
continue;
unsigned Reg = Op.getReg();
const int AccVgprReadLdStWaitStates = 2;
const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
const int MaxWaitStates = 2;
int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
if (WaitStatesNeeded == MaxWaitStates)
return WaitStatesNeeded; // Early exit.
auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
return false;
auto IsVALUFn = [] (MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
};
return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
std::numeric_limits<int>::max();
};
WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include <list>
namespace llvm {
@ -46,6 +47,7 @@ private:
const GCNSubtarget &ST;
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
TargetSchedModel TSchedModel;
/// RegUnits of uses in the current soft memory clause.
BitVector ClauseUses;
@ -92,6 +94,9 @@ private:
bool fixVcmpxExecWARHazard(MachineInstr *MI);
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAILdStHazards(MachineInstr *MI);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.

View File

@ -0,0 +1,457 @@
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: valu_write_vgpr_mfma_read
# GCN: V_MOV_B32
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: valu_write_vgpr_mfma_read
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: valu_write_vgpr_accvgpr_write_read
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_write_agpr_mfma_read_same_agpr
# GCN: V_MFMA
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_same_agpr
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_overlap
body: |
bb.0:
$agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_partial
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srca_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srcb_read_overlap
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_4X4X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_4x4_write_agpr_accvgpr_read
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_16x16_write_agpr_accvgpr_read
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_32x32_write_agpr_accvgpr_read
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_4X4X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_4x4_write_agpr_accvgpr_write
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_write_agpr_accvgpr_write
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_write_agpr_accvgpr_write
body: |
bb.0:
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: mfma_4x4_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_4X4X1F32
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_4x4_read_srcc_accvgpr_write
body: |
bb.0:
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
...
---
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_read_srcc_accvgpr_write
body: |
bb.0:
$agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
...
---
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_read_srcc_accvgpr_write
body: |
bb.0:
$agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_read_write_vgpr_valu_read
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: V_ADD_F32
name: accvgpr_read_write_vgpr_valu_read
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
$vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: accvgpr_read_write_vgpr_mfma_read
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: accvgpr_read_write_vgpr_accvgpr_write_read
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcc
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srcc
body: |
bb.0:
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srca
body: |
bb.0:
$agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srcb
body: |
bb.0:
$agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_READ_B32
name: accvgpr_write_agpr_accvgpr_read
body: |
bb.0:
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
$vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
...
---
# GCN-LABEL: name: vcmpx_write_exec_mfma
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: vcmpx_write_exec_mfma
body: |
bb.0:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: vcmpx_write_exec_accvgpr_write
body: |
bb.0:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
...
---
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_LOAD_DWORD
name: accvgpr_read_write_vgpr_load
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: DS_WRITE_B32
name: accvgpr_read_write_vgpr_store
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
DS_WRITE_B32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
...
---
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_no_dependency
# GCN: V_MOV_B32
# GCN-NEXT: V_ACCVGPR_READ_B32
# GCN-NEXT: FLAT_LOAD_DWORD
name: valu_write_vgpr_accvgpr_read_load_no_dependency
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
$vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
# GCN: V_MOV_B32
# GCN-NEXT: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_LOAD_DWORD
name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
---
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
# GCN: V_MOV_B32
# GCN-NEXT: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: FLAT_LOAD_DWORD
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
$vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
---