[AMDGPU] Fix inserting combined s_nop in bundles

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D90334
This commit is contained in:
Austin Kerbow 2020-10-28 12:22:23 -07:00
parent 4e4abd16a7
commit 8b127a8661
5 changed files with 33 additions and 9 deletions

View File

@ -207,9 +207,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return NoHazard;
}
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
.addImm(0);
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
unsigned Quantity) {
while (Quantity > 0) {
unsigned Arg;
if (Quantity >= 8)
Arg = 7;
else
Arg = Quantity - 1;
Quantity -= Arg + 1;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
.addImm(Arg);
}
}
void GCNHazardRecognizer::processBundle() {
@ -220,11 +229,11 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = &*MI;
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
if (IsHazardRecognizerMode)
if (IsHazardRecognizerMode) {
fixHazards(CurrCycleInstr);
for (unsigned i = 0; i < WaitStates; ++i)
insertNoopInBundle(CurrCycleInstr, TII);
insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
}
// Its unnecessary to track more than MaxLookAhead instructions. Since we
// include the bundled MI directly after, only add a maximum of

View File

@ -64,3 +64,21 @@ body: |
}
S_ENDPGM 0
...
# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
# GCN: S_LOAD_DWORDX2_IMM
# GCN-NEXT: S_NOP 3
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_in_bundle
body: |
bb.0:
BUNDLE {
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
}
S_ENDPGM 0
...

View File

@ -11,7 +11,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_p gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)

View File

@ -16,7 +16,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_release_all gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)

View File

@ -11,7 +11,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_v gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)