forked from OSchip/llvm-project
[AMDGPU] gfx908 hazard recognizer
Differential Revision: https://reviews.llvm.org/D64593 llvm-svn: 365829
This commit is contained in:
parent
7b4a59db1e
commit
7d2019bb96
|
@ -46,7 +46,8 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
|
|||
TRI(TII.getRegisterInfo()),
|
||||
ClauseUses(TRI.getNumRegUnits()),
|
||||
ClauseDefs(TRI.getNumRegUnits()) {
|
||||
MaxLookAhead = 5;
|
||||
MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
|
||||
TSchedModel.init(&ST);
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
|
@ -181,6 +182,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|||
checkReadM0Hazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
|
@ -286,6 +293,12 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
|||
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
|
||||
return std::max(WaitStates, checkReadM0Hazards(MI));
|
||||
|
||||
if (SIInstrInfo::isMAI(*MI))
|
||||
return std::max(WaitStates, checkMAIHazards(MI));
|
||||
|
||||
if (MI->mayLoad() || MI->mayStore())
|
||||
return std::max(WaitStates, checkMAILdStHazards(MI));
|
||||
|
||||
return WaitStates;
|
||||
}
|
||||
|
||||
|
@ -1179,3 +1192,217 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
|
|||
return FPAtomicToDenormModeWaitStates -
|
||||
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
|
||||
assert(SIInstrInfo::isMAI(*MI));
|
||||
|
||||
int WaitStatesNeeded = 0;
|
||||
unsigned Opc = MI->getOpcode();
|
||||
|
||||
auto IsVALUFn = [] (MachineInstr *MI) {
|
||||
return SIInstrInfo::isVALU(*MI);
|
||||
};
|
||||
|
||||
if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
|
||||
const int LegacyVALUWritesVGPRWaitStates = 2;
|
||||
const int VALUWritesExecWaitStates = 4;
|
||||
const int MaxWaitStates = 4;
|
||||
|
||||
int WaitStatesNeededForUse = VALUWritesExecWaitStates -
|
||||
getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
|
||||
if (WaitStatesNeeded < MaxWaitStates) {
|
||||
for (const MachineOperand &Use : MI->explicit_uses()) {
|
||||
const int MaxWaitStates = 2;
|
||||
|
||||
if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
|
||||
continue;
|
||||
|
||||
int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
|
||||
getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
|
||||
if (WaitStatesNeeded == MaxWaitStates)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto IsMFMAFn = [] (MachineInstr *MI) {
|
||||
return SIInstrInfo::isMAI(*MI) &&
|
||||
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
|
||||
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
|
||||
};
|
||||
|
||||
for (const MachineOperand &Op : MI->explicit_operands()) {
|
||||
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
|
||||
continue;
|
||||
|
||||
if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
|
||||
continue;
|
||||
|
||||
const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
|
||||
const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
|
||||
const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
|
||||
const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
|
||||
const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
|
||||
const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
|
||||
const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
|
||||
const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
|
||||
const int MaxWaitStates = 18;
|
||||
unsigned Reg = Op.getReg();
|
||||
unsigned HazardDefLatency = 0;
|
||||
|
||||
auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
|
||||
(MachineInstr *MI) {
|
||||
if (!IsMFMAFn(MI))
|
||||
return false;
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
if (DstReg == Reg)
|
||||
return false;
|
||||
HazardDefLatency = std::max(HazardDefLatency,
|
||||
TSchedModel.computeInstrLatency(MI));
|
||||
return TRI.regsOverlap(DstReg, Reg);
|
||||
};
|
||||
|
||||
int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
|
||||
MaxWaitStates);
|
||||
int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
|
||||
int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
|
||||
int OpNo = MI->getOperandNo(&Op);
|
||||
if (OpNo == SrcCIdx) {
|
||||
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
|
||||
} else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
|
||||
switch (HazardDefLatency) {
|
||||
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
|
||||
break;
|
||||
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
|
||||
break;
|
||||
case 16: LLVM_FALLTHROUGH;
|
||||
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
|
||||
break;
|
||||
}
|
||||
} else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
|
||||
switch (HazardDefLatency) {
|
||||
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
|
||||
break;
|
||||
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
|
||||
break;
|
||||
case 16: LLVM_FALLTHROUGH;
|
||||
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
|
||||
if (WaitStatesNeeded == MaxWaitStates)
|
||||
return WaitStatesNeeded; // Early exit.
|
||||
|
||||
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
|
||||
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
|
||||
return false;
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
return TRI.regsOverlap(Reg, DstReg);
|
||||
};
|
||||
|
||||
const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
|
||||
const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
|
||||
const int AccVGPRWriteAccVgprReadWaitStates = 3;
|
||||
NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
|
||||
if (OpNo == SrcCIdx)
|
||||
NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
|
||||
else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
|
||||
NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
|
||||
|
||||
WaitStatesNeededForUse = NeedWaitStates -
|
||||
getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
|
||||
if (WaitStatesNeeded == MaxWaitStates)
|
||||
return WaitStatesNeeded; // Early exit.
|
||||
}
|
||||
|
||||
if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
|
||||
const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
|
||||
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
|
||||
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
|
||||
const int MaxWaitStates = 13;
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
unsigned HazardDefLatency = 0;
|
||||
|
||||
auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
|
||||
(MachineInstr *MI) {
|
||||
if (!IsMFMAFn(MI))
|
||||
return false;
|
||||
unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
|
||||
HazardDefLatency = std::max(HazardDefLatency,
|
||||
TSchedModel.computeInstrLatency(MI));
|
||||
return TRI.regsOverlap(Reg, DstReg);
|
||||
};
|
||||
|
||||
int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
|
||||
int NeedWaitStates;
|
||||
switch (HazardDefLatency) {
|
||||
case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
|
||||
break;
|
||||
case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
|
||||
break;
|
||||
case 16: LLVM_FALLTHROUGH;
|
||||
default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
|
||||
break;
|
||||
}
|
||||
|
||||
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
}
|
||||
|
||||
return WaitStatesNeeded;
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
|
||||
if (!ST.hasMAIInsts())
|
||||
return 0;
|
||||
|
||||
int WaitStatesNeeded = 0;
|
||||
|
||||
auto IsAccVgprReadFn = [] (MachineInstr *MI) {
|
||||
return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
|
||||
};
|
||||
|
||||
for (const MachineOperand &Op : MI->explicit_uses()) {
|
||||
if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
|
||||
continue;
|
||||
|
||||
unsigned Reg = Op.getReg();
|
||||
|
||||
const int AccVgprReadLdStWaitStates = 2;
|
||||
const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
|
||||
const int MaxWaitStates = 2;
|
||||
|
||||
int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
|
||||
getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
|
||||
if (WaitStatesNeeded == MaxWaitStates)
|
||||
return WaitStatesNeeded; // Early exit.
|
||||
|
||||
auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
|
||||
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
|
||||
return false;
|
||||
auto IsVALUFn = [] (MachineInstr *MI) {
|
||||
return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
|
||||
};
|
||||
return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
|
||||
std::numeric_limits<int>::max();
|
||||
};
|
||||
|
||||
WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
|
||||
getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
}
|
||||
|
||||
return WaitStatesNeeded;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include <list>
|
||||
|
||||
namespace llvm {
|
||||
|
@ -46,6 +47,7 @@ private:
|
|||
const GCNSubtarget &ST;
|
||||
const SIInstrInfo &TII;
|
||||
const SIRegisterInfo &TRI;
|
||||
TargetSchedModel TSchedModel;
|
||||
|
||||
/// RegUnits of uses in the current soft memory clause.
|
||||
BitVector ClauseUses;
|
||||
|
@ -92,6 +94,9 @@ private:
|
|||
bool fixVcmpxExecWARHazard(MachineInstr *MI);
|
||||
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
|
||||
|
||||
int checkMAIHazards(MachineInstr *MI);
|
||||
int checkMAILdStHazards(MachineInstr *MI);
|
||||
|
||||
public:
|
||||
GCNHazardRecognizer(const MachineFunction &MF);
|
||||
// We can only issue one instruction per cycle.
|
||||
|
|
|
@ -0,0 +1,457 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: valu_write_vgpr_mfma_read
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: valu_write_vgpr_mfma_read
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: valu_write_vgpr_accvgpr_write_read
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_same_agpr
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_read_same_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_read_overlap
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_read_partial
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_srca_read_overlap
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_4X4X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_4x4_write_agpr_accvgpr_read
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_16x16_write_agpr_accvgpr_read
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_32x32_write_agpr_accvgpr_read
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_write
|
||||
# GCN: V_MFMA_F32_4X4X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_4x4_write_agpr_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_16x16_write_agpr_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_32x32_write_agpr_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_4x4_read_srcc_accvgpr_write
|
||||
# GCN: V_MFMA_F32_4X4X1F32
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_4x4_read_srcc_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_16x16_read_srcc_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_32x32_read_srcc_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_valu_read
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: V_ADD_F32
|
||||
name: accvgpr_read_write_vgpr_valu_read
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
|
||||
$vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_read_write_vgpr_mfma_read
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
|
||||
$agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcc
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_write_agpr_mfma_read_srcc
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_write_agpr_mfma_read_srca
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_write_agpr_mfma_read_srcb
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: accvgpr_write_agpr_accvgpr_read
|
||||
body: |
|
||||
bb.0:
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
|
||||
$vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: vcmpx_write_exec_mfma
|
||||
# GCN: V_CMPX_EQ_I32_e32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: vcmpx_write_exec_mfma
|
||||
body: |
|
||||
bb.0:
|
||||
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
|
||||
$agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
|
||||
# GCN: V_CMPX_EQ_I32_e32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: vcmpx_write_exec_accvgpr_write
|
||||
body: |
|
||||
bb.0:
|
||||
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
|
||||
$agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: accvgpr_read_write_vgpr_load
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: DS_WRITE_B32
|
||||
name: accvgpr_read_write_vgpr_store
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
DS_WRITE_B32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_no_dependency
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: valu_write_vgpr_accvgpr_read_load_no_dependency
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
$vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
$vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
$vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
...
|
||||
---
|
Loading…
Reference in New Issue