AMDGPU/InsertWaitcnts: Cleanup some old cruft (NFCI)

Summary: Remove redundant logic and simplify control flow.

Reviewers: msearles, rampitec, scott.linder, kanarayan

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D54086

llvm-svn: 346363
This commit is contained in:
Nicolai Haehnle 2018-11-07 21:53:36 +00:00
parent 0ab31c9c44
commit 61396ff67c
1 changed files with 69 additions and 89 deletions

View File

@ -880,24 +880,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// Start with an assumption that there is no need to emit. // Start with an assumption that there is no need to emit.
unsigned int EmitWaitcnt = 0; unsigned int EmitWaitcnt = 0;
// No need to wait before phi. If a phi-move exists, then the wait should
// has been inserted before the move. If a phi-move does not exist, then
// wait should be inserted before the real use. The same is true for
// sc-merge. It is not a coincident that all these cases correspond to the
// instructions that are skipped in the assembling loop.
bool NeedLineMapping = false; // TODO: Check on this.
// ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
bool ForceEmitZeroWaitcnt = false; bool ForceEmitZeroWaitcnt = false;
setForceEmitWaitcnt(); setForceEmitWaitcnt();
bool IsForceEmitWaitcnt = isForceEmitWaitcnt(); bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
if (MI.isDebugInstr() && if (MI.isDebugInstr())
// TODO: any other opcode?
!NeedLineMapping) {
return; return;
}
// See if an s_waitcnt is forced at block entry, or is needed at // See if an s_waitcnt is forced at block entry, or is needed at
// program end. // program end.
@ -1141,7 +1131,6 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
if (EmitWaitcnt || IsForceEmitWaitcnt) { if (EmitWaitcnt || IsForceEmitWaitcnt) {
int CntVal[NUM_INST_CNTS]; int CntVal[NUM_INST_CNTS];
bool UseDefaultWaitcntStrategy = true;
if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) { if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
// Force all waitcnts to 0. // Force all waitcnts to 0.
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
@ -1151,10 +1140,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
CntVal[VM_CNT] = 0; CntVal[VM_CNT] = 0;
CntVal[EXP_CNT] = 0; CntVal[EXP_CNT] = 0;
CntVal[LGKM_CNT] = 0; CntVal[LGKM_CNT] = 0;
UseDefaultWaitcntStrategy = false; } else {
}
if (UseDefaultWaitcntStrategy) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) { T = (enum InstCounterType)(T + 1)) {
if (EmitWaitcnt & CNT_MASK(T)) { if (EmitWaitcnt & CNT_MASK(T)) {
@ -1178,95 +1164,89 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
} }
} }
// If we are not waiting on any counter we can skip the wait altogether. MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) { int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt(); if (!OldWaitcnt ||
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm(); (AMDGPU::decodeVmcnt(IV, Imm) !=
if (!OldWaitcnt || (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
(AMDGPU::decodeVmcnt(IV, Imm) != (AMDGPU::decodeExpcnt(IV, Imm) !=
(CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) || (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
(AMDGPU::decodeExpcnt(IV, Imm) != (AMDGPU::decodeLgkmcnt(IV, Imm) !=
(CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) || (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
(AMDGPU::decodeLgkmcnt(IV, Imm) != MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) { if (ContainingLoop) {
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent()); MachineBasicBlock *TBB = ContainingLoop->getHeader();
if (ContainingLoop) { BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
MachineBasicBlock *TBB = ContainingLoop->getHeader(); if (!ScoreBracket) {
BlockWaitcntBrackets *ScoreBracket = assert(!BlockVisitedSet.count(TBB));
BlockWaitcntBracketsMap[TBB].get(); BlockWaitcntBracketsMap[TBB] =
if (!ScoreBracket) { llvm::make_unique<BlockWaitcntBrackets>(ST);
assert(!BlockVisitedSet.count(TBB)); ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
BlockWaitcntBracketsMap[TBB] =
llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
}
ScoreBracket->setRevisitLoop(true);
LLVM_DEBUG(dbgs()
<< "set-revisit2: Block"
<< ContainingLoop->getHeader()->getNumber() << '\n';);
} }
ScoreBracket->setRevisitLoop(true);
LLVM_DEBUG(dbgs() << "set-revisit2: Block"
<< ContainingLoop->getHeader()->getNumber() << '\n';);
} }
}
// Update an existing waitcount, or make a new one. // Update an existing waitcount, or make a new one.
unsigned Enc = AMDGPU::encodeWaitcnt(IV, unsigned Enc = AMDGPU::encodeWaitcnt(IV,
ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT], ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT], ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]); ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
// We don't remove waitcnts that existed prior to the waitcnt // We don't remove waitcnts that existed prior to the waitcnt
// pass. Check if the waitcnt to-be-inserted can be avoided // pass. Check if the waitcnt to-be-inserted can be avoided
// or if the prev waitcnt can be updated. // or if the prev waitcnt can be updated.
bool insertSWaitInst = true; bool insertSWaitInst = true;
for (MachineBasicBlock::iterator I = MI.getIterator(), for (MachineBasicBlock::iterator I = MI.getIterator(),
B = MI.getParent()->begin(); B = MI.getParent()->begin();
insertSWaitInst && I != B; --I) { insertSWaitInst && I != B; --I) {
if (I == MI.getIterator()) if (I == MI.getIterator())
continue; continue;
switch (I->getOpcode()) { switch (I->getOpcode()) {
case AMDGPU::S_WAITCNT: case AMDGPU::S_WAITCNT:
if (isWaitcntStronger(I->getOperand(0).getImm(), Enc)) if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
insertSWaitInst = false; insertSWaitInst = false;
else if (!OldWaitcnt) { else if (!OldWaitcnt) {
OldWaitcnt = &*I; OldWaitcnt = &*I;
Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc); Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
}
break;
// TODO: skip over instructions which never require wait.
} }
break; break;
// TODO: skip over instructions which never require wait.
} }
if (insertSWaitInst) { break;
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) { }
if (ForceEmitZeroWaitcnts) if (insertSWaitInst) {
LLVM_DEBUG( if (OldWaitcnt) {
dbgs() assert(OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT);
<< "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n"); if (ForceEmitZeroWaitcnts)
if (IsForceEmitWaitcnt) LLVM_DEBUG(dbgs()
LLVM_DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
<< "Force emit a s_waitcnt due to debug counter\n"); if (IsForceEmitWaitcnt)
LLVM_DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
OldWaitcnt->getOperand(0).setImm(Enc); OldWaitcnt->getOperand(0).setImm(Enc);
if (!OldWaitcnt->getParent()) if (!OldWaitcnt->getParent())
MI.getParent()->insert(MI, OldWaitcnt); MI.getParent()->insert(MI, OldWaitcnt);
LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n" LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
<< "Old Instr: " << MI << '\n' << "Old Instr: " << MI << '\n'
<< "New Instr: " << *OldWaitcnt << '\n'); << "New Instr: " << *OldWaitcnt << '\n');
} else { } else {
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(), auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(Enc); .addImm(Enc);
TrackedWaitcntSet.insert(SWaitInst); TrackedWaitcntSet.insert(SWaitInst);
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n" LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
<< "Old Instr: " << MI << '\n' << "Old Instr: " << MI << '\n'
<< "New Instr: " << *SWaitInst << '\n'); << "New Instr: " << *SWaitInst << '\n');
}
} }
}
if (CntVal[EXP_CNT] == 0) { if (CntVal[EXP_CNT] == 0) {
ScoreBrackets->setMixedExpTypes(false); ScoreBrackets->setMixedExpTypes(false);
}
} }
} }
} }