[AMDGPU] Move insertion of function entry waitcnt later

This allows tracking these as preexisting waitcnt.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D101380
This commit is contained in:
Austin Kerbow 2021-04-27 09:29:27 -07:00
parent f6d7fc801b
commit 6617a5a5ea
2 changed files with 35 additions and 22 deletions

View File

@ -1600,6 +1600,28 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
TrackedWaitcntSet.clear();
BlockInfos.clear();
bool Modified = false;
if (!MFI->isEntryFunction()) {
// Wait for any outstanding memory operations that the input registers may
// depend on. We can't track them and it's better to do the wait after the
// costly call sequence.
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
MachineBasicBlock::iterator I = EntryBB.begin();
for (MachineBasicBlock::iterator E = EntryBB.end();
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
if (ST->hasVscnt())
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
Modified = true;
}
// Keep iterating over the blocks in reverse post order, inserting and
// updating s_waitcnt where needed, until a fix point is reached.
@ -1607,7 +1629,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
BlockInfos.insert({MBB, BlockInfo(MBB)});
std::unique_ptr<WaitcntBrackets> Brackets;
bool Modified = false;
bool Repeat;
do {
Repeat = false;
@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
}
if (!MFI->isEntryFunction()) {
// Wait for any outstanding memory operations that the input registers may
// depend on. We can't track them and it's better to the wait after the
// costly call sequence.
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
MachineBasicBlock::iterator I = EntryBB.begin();
for (MachineBasicBlock::iterator E = EntryBB.end();
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
if (ST->hasVscnt())
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
Modified = true;
}
return Modified;
}

View File

@ -192,4 +192,17 @@ body: |
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
KILL $vgpr0
# Combine preexisting waitcnt with wait added to the start of a non-entry function.
---
name: test_waitcnt_preexisting_func_start
body: |
bb.0:
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
; GFX9: S_WAITCNT 0
; GFX9-NOT: S_WAITCNT 0
; GFX9: S_ENDPGM 0
S_WAITCNT 0
S_ENDPGM 0
...