forked from OSchip/llvm-project
[AMDGPU] Fix inserting combined s_nop in bundles
Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D90334
This commit is contained in:
parent
4e4abd16a7
commit
8b127a8661
|
@ -207,9 +207,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|||
return NoHazard;
|
||||
}
|
||||
|
||||
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
|
||||
.addImm(0);
|
||||
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
|
||||
unsigned Quantity) {
|
||||
while (Quantity > 0) {
|
||||
unsigned Arg;
|
||||
if (Quantity >= 8)
|
||||
Arg = 7;
|
||||
else
|
||||
Arg = Quantity - 1;
|
||||
Quantity -= Arg + 1;
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
|
||||
.addImm(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::processBundle() {
|
||||
|
@ -220,11 +229,11 @@ void GCNHazardRecognizer::processBundle() {
|
|||
CurrCycleInstr = &*MI;
|
||||
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
|
||||
|
||||
if (IsHazardRecognizerMode)
|
||||
if (IsHazardRecognizerMode) {
|
||||
fixHazards(CurrCycleInstr);
|
||||
|
||||
for (unsigned i = 0; i < WaitStates; ++i)
|
||||
insertNoopInBundle(CurrCycleInstr, TII);
|
||||
insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
|
||||
}
|
||||
|
||||
// It’s unnecessary to track more than MaxLookAhead instructions. Since we
|
||||
// include the bundled MI directly after, only add a maximum of
|
||||
|
|
|
@ -64,3 +64,21 @@ body: |
|
|||
}
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
|
||||
# GCN: S_LOAD_DWORDX2_IMM
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_hazard_in_bundle
|
||||
body: |
|
||||
bb.0:
|
||||
BUNDLE {
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
}
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_p gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_release_all gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
; LOOP: s_mov_b32 m0, 0{{$}}
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_v gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
|
|
Loading…
Reference in New Issue