[AMDGPU] Check MI bundles for hazards

Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.

Reviewers: arsenm, msearles, rampitec

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61564

llvm-svn: 360199
This commit is contained in:
Austin Kerbow 2019-05-07 22:12:15 +00:00
parent 6e6480e216
commit 8a3d3a9af6
4 changed files with 222 additions and 11 deletions

View File

@ -124,6 +124,8 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (MI->isBundle())
return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
@ -179,6 +181,37 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return NoHazard;
}
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
.addImm(0);
}
void GCNHazardRecognizer::processBundle() {
MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
// Check bundled MachineInstr's for hazards.
for (; MI != E && MI->isInsideBundle(); ++MI) {
CurrCycleInstr = &*MI;
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
if (IsHazardRecognizerMode)
fixHazards(CurrCycleInstr);
for (unsigned i = 0; i < WaitStates; ++i)
insertNoopInBundle(CurrCycleInstr, TII);
// Its unnecessary to track more than MaxLookAhead instructions. Since we
// include the bundled MI directly after, only add a maximum of
// (MaxLookAhead - 1) noops to EmittedInstrs.
for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
EmittedInstrs.push_front(nullptr);
EmittedInstrs.push_front(CurrCycleInstr);
EmittedInstrs.resize(MaxLookAhead);
}
CurrCycleInstr = nullptr;
}
unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
IsHazardRecognizerMode = false;
return PreEmitNoopsCommon(SU->getInstr());
@ -188,17 +221,15 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
IsHazardRecognizerMode = true;
CurrCycleInstr = MI;
unsigned W = PreEmitNoopsCommon(MI);
fixVMEMtoScalarWriteHazards(MI);
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
fixHazards(MI);
CurrCycleInstr = nullptr;
return W;
}
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (MI->isBundle())
return 0;
int WaitStates = std::max(0, checkAnyInstHazards(MI));
if (SIInstrInfo::isSMRD(*MI))
@ -264,6 +295,11 @@ void GCNHazardRecognizer::AdvanceCycle() {
CurrCycleInstr->isKill())
return;
if (CurrCycleInstr->isBundle()) {
processBundle();
return;
}
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
@ -304,8 +340,11 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
int WaitStates,
IsExpiredFn IsExpired,
DenseSet<const MachineBasicBlock *> &Visited) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
continue;
for (auto E = MBB->rend() ; I != E; ++I) {
if (IsHazard(&*I))
return WaitStates;
@ -437,9 +476,9 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// instructions in this group may return out of order and/or may be
// replayed (i.e. the same instruction issued more than once).
//
// In order to handle these situations correctly we need to make sure
// that when a clause has more than one instruction, no instruction in the
// clause writes to a register that is read another instruction in the clause
// In order to handle these situations correctly we need to make sure that
// when a clause has more than one instruction, no instruction in the clause
// writes to a register that is read by another instruction in the clause
// (including itself). If we encounter this situaion, we need to break the
// clause by inserting a non SMEM instruction.
@ -525,7 +564,6 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
// SGPR was written by a VALU Instruction.
const int VmemSgprWaitStates = 5;
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
continue;
@ -795,6 +833,13 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
SMovRelWaitStates);
}
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixVMEMtoScalarWriteHazards(MI);
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
}
bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
if (!ST.hasVMEMtoScalarWriteHazard())
return false;

View File

@ -60,6 +60,10 @@ private:
void addClauseInst(const MachineInstr &MI);
// Advance over a MachineInstr bundle. Look for hazards in the bundled
// instructions.
void processBundle();
int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
@ -80,6 +84,8 @@ private:
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
void fixHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);

View File

@ -0,0 +1,94 @@
# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
# GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
# GCN: }
# XNACK-NEXT: S_NOP
# NOXNACK-NOT: S_NOP
# GCN: S_LOAD_DWORDX2_IMM
---
name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
body: |
bb.0:
BUNDLE implicit-def $sgpr6_sgpr7 {
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
}
$sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
# GFX9-LABEL: name: hazard_precedes_bundle
# GFX9: S_MOV_B32
# GFX9-NEXT: S_NOP
# GFX9: BUNDLE
# GFX9-NEXT: S_NOP
---
name: hazard_precedes_bundle
body: |
bb.0:
$m0 = S_MOV_B32 $sgpr7
S_SENDMSG 3, implicit $exec, implicit $m0
$m0 = S_MOV_B32 $sgpr8
BUNDLE implicit-def $vgpr0 {
$vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec
}
S_ENDPGM 0
...
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
# GCN: S_LOAD_DWORDX2_IMM
# GCN-NEXT: }
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_ignore_bundle_instr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit $vgpr0, implicit $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec {
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
}
BUNDLE implicit-def $sgpr0_sgpr1, implicit $sgpr10_sgpr11 {
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
}
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
# GCN: bb.2:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_after_bundle
body: |
bb.0:
successors: %bb.2
BUNDLE implicit-def $vgpr1, implicit $vgpr0 {
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
}
S_NOP 0
S_BRANCH %bb.2
bb.1:
successors: %bb.2
BUNDLE implicit-def $vgpr1, implicit $vgpr0 {
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
}
bb.2:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...

View File

@ -0,0 +1,66 @@
# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
# GCN-LABEL: name: break_smem_clause_max_look_ahead_in_bundle
# GCN: S_LOAD_DWORDX2_IMM
# XNACK-NEXT: S_NOP
# NOXNACK-NOT: S_NOP
# GCN: S_LOAD_DWORDX2
# XNACK-NEXT: S_NOP
# NOXNACK-NOT: S_NOP
# GCN: }
---
name: break_smem_clause_max_look_ahead_in_bundle
body: |
bb.0:
BUNDLE implicit-def $sgpr6_sgpr7 {
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 4, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 8, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 12, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 16, 0, 0
$sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM $sgpr14_sgpr15, 0, 0, 0
}
S_ENDPGM 0
...
# GFX10-LABEL: name: hazard_smem_war_in_bundle
# GFX10: S_LOAD_DWORD_IMM
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GFX10: V_CMP_EQ_F32
---
name: hazard_smem_war_in_bundle
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
BUNDLE implicit-def $sgpr0_sgpr1 {
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
}
S_ENDPGM 0
...
# GFX9-LABEL: name: hazard_ignore_dbg_label_in_bundle
# GFX9: DBG_LABEL 6
# GFX9-NEXT: S_NOP 0
# GFX9: S_SENDMSG 3, implicit $exec, implicit $m0
---
name: hazard_ignore_dbg_label_in_bundle
body: |
bb.0:
BUNDLE {
$m0 = S_MOV_B32 killed $sgpr12
DBG_LABEL 0
DBG_LABEL 1
DBG_LABEL 2
DBG_LABEL 3
DBG_LABEL 4
DBG_LABEL 5
DBG_LABEL 6
S_SENDMSG 3, implicit $exec, implicit $m0
}
S_ENDPGM 0
...