forked from OSchip/llvm-project
AMDGPU/InsertWaitcnts: Cleanup some old cruft (NFCI)
Summary: Remove redundant logic and simplify control flow. Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D54086 llvm-svn: 346363
This commit is contained in:
parent
0ab31c9c44
commit
61396ff67c
|
@ -880,24 +880,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||||
// Start with an assumption that there is no need to emit.
|
// Start with an assumption that there is no need to emit.
|
||||||
unsigned int EmitWaitcnt = 0;
|
unsigned int EmitWaitcnt = 0;
|
||||||
|
|
||||||
// No need to wait before phi. If a phi-move exists, then the wait should
|
|
||||||
// has been inserted before the move. If a phi-move does not exist, then
|
|
||||||
// wait should be inserted before the real use. The same is true for
|
|
||||||
// sc-merge. It is not a coincident that all these cases correspond to the
|
|
||||||
// instructions that are skipped in the assembling loop.
|
|
||||||
bool NeedLineMapping = false; // TODO: Check on this.
|
|
||||||
|
|
||||||
// ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
|
// ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
|
||||||
bool ForceEmitZeroWaitcnt = false;
|
bool ForceEmitZeroWaitcnt = false;
|
||||||
|
|
||||||
setForceEmitWaitcnt();
|
setForceEmitWaitcnt();
|
||||||
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
|
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
|
||||||
|
|
||||||
if (MI.isDebugInstr() &&
|
if (MI.isDebugInstr())
|
||||||
// TODO: any other opcode?
|
|
||||||
!NeedLineMapping) {
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
// See if an s_waitcnt is forced at block entry, or is needed at
|
// See if an s_waitcnt is forced at block entry, or is needed at
|
||||||
// program end.
|
// program end.
|
||||||
|
@ -1141,7 +1131,6 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||||
if (EmitWaitcnt || IsForceEmitWaitcnt) {
|
if (EmitWaitcnt || IsForceEmitWaitcnt) {
|
||||||
int CntVal[NUM_INST_CNTS];
|
int CntVal[NUM_INST_CNTS];
|
||||||
|
|
||||||
bool UseDefaultWaitcntStrategy = true;
|
|
||||||
if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
|
if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
|
||||||
// Force all waitcnts to 0.
|
// Force all waitcnts to 0.
|
||||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||||
|
@ -1151,10 +1140,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||||
CntVal[VM_CNT] = 0;
|
CntVal[VM_CNT] = 0;
|
||||||
CntVal[EXP_CNT] = 0;
|
CntVal[EXP_CNT] = 0;
|
||||||
CntVal[LGKM_CNT] = 0;
|
CntVal[LGKM_CNT] = 0;
|
||||||
UseDefaultWaitcntStrategy = false;
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
if (UseDefaultWaitcntStrategy) {
|
|
||||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||||
T = (enum InstCounterType)(T + 1)) {
|
T = (enum InstCounterType)(T + 1)) {
|
||||||
if (EmitWaitcnt & CNT_MASK(T)) {
|
if (EmitWaitcnt & CNT_MASK(T)) {
|
||||||
|
@ -1178,95 +1164,89 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are not waiting on any counter we can skip the wait altogether.
|
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
|
||||||
if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
|
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
|
||||||
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
|
if (!OldWaitcnt ||
|
||||||
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
|
(AMDGPU::decodeVmcnt(IV, Imm) !=
|
||||||
if (!OldWaitcnt ||
|
(CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
|
||||||
(AMDGPU::decodeVmcnt(IV, Imm) !=
|
(AMDGPU::decodeExpcnt(IV, Imm) !=
|
||||||
(CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
|
(CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
|
||||||
(AMDGPU::decodeExpcnt(IV, Imm) !=
|
(AMDGPU::decodeLgkmcnt(IV, Imm) !=
|
||||||
(CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
|
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
|
||||||
(AMDGPU::decodeLgkmcnt(IV, Imm) !=
|
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
|
||||||
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
|
if (ContainingLoop) {
|
||||||
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
|
MachineBasicBlock *TBB = ContainingLoop->getHeader();
|
||||||
if (ContainingLoop) {
|
BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
|
||||||
MachineBasicBlock *TBB = ContainingLoop->getHeader();
|
if (!ScoreBracket) {
|
||||||
BlockWaitcntBrackets *ScoreBracket =
|
assert(!BlockVisitedSet.count(TBB));
|
||||||
BlockWaitcntBracketsMap[TBB].get();
|
BlockWaitcntBracketsMap[TBB] =
|
||||||
if (!ScoreBracket) {
|
llvm::make_unique<BlockWaitcntBrackets>(ST);
|
||||||
assert(!BlockVisitedSet.count(TBB));
|
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
|
||||||
BlockWaitcntBracketsMap[TBB] =
|
|
||||||
llvm::make_unique<BlockWaitcntBrackets>(ST);
|
|
||||||
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
|
|
||||||
}
|
|
||||||
ScoreBracket->setRevisitLoop(true);
|
|
||||||
LLVM_DEBUG(dbgs()
|
|
||||||
<< "set-revisit2: Block"
|
|
||||||
<< ContainingLoop->getHeader()->getNumber() << '\n';);
|
|
||||||
}
|
}
|
||||||
|
ScoreBracket->setRevisitLoop(true);
|
||||||
|
LLVM_DEBUG(dbgs() << "set-revisit2: Block"
|
||||||
|
<< ContainingLoop->getHeader()->getNumber() << '\n';);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update an existing waitcount, or make a new one.
|
// Update an existing waitcount, or make a new one.
|
||||||
unsigned Enc = AMDGPU::encodeWaitcnt(IV,
|
unsigned Enc = AMDGPU::encodeWaitcnt(IV,
|
||||||
ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
|
ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
|
||||||
ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
|
ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
|
||||||
ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
|
ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
|
||||||
// We don't remove waitcnts that existed prior to the waitcnt
|
// We don't remove waitcnts that existed prior to the waitcnt
|
||||||
// pass. Check if the waitcnt to-be-inserted can be avoided
|
// pass. Check if the waitcnt to-be-inserted can be avoided
|
||||||
// or if the prev waitcnt can be updated.
|
// or if the prev waitcnt can be updated.
|
||||||
bool insertSWaitInst = true;
|
bool insertSWaitInst = true;
|
||||||
for (MachineBasicBlock::iterator I = MI.getIterator(),
|
for (MachineBasicBlock::iterator I = MI.getIterator(),
|
||||||
B = MI.getParent()->begin();
|
B = MI.getParent()->begin();
|
||||||
insertSWaitInst && I != B; --I) {
|
insertSWaitInst && I != B; --I) {
|
||||||
if (I == MI.getIterator())
|
if (I == MI.getIterator())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
switch (I->getOpcode()) {
|
switch (I->getOpcode()) {
|
||||||
case AMDGPU::S_WAITCNT:
|
case AMDGPU::S_WAITCNT:
|
||||||
if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
|
if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
|
||||||
insertSWaitInst = false;
|
insertSWaitInst = false;
|
||||||
else if (!OldWaitcnt) {
|
else if (!OldWaitcnt) {
|
||||||
OldWaitcnt = &*I;
|
OldWaitcnt = &*I;
|
||||||
Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
|
Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
|
||||||
}
|
|
||||||
break;
|
|
||||||
// TODO: skip over instructions which never require wait.
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
// TODO: skip over instructions which never require wait.
|
||||||
}
|
}
|
||||||
if (insertSWaitInst) {
|
break;
|
||||||
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
|
}
|
||||||
if (ForceEmitZeroWaitcnts)
|
if (insertSWaitInst) {
|
||||||
LLVM_DEBUG(
|
if (OldWaitcnt) {
|
||||||
dbgs()
|
assert(OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT);
|
||||||
<< "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
|
if (ForceEmitZeroWaitcnts)
|
||||||
if (IsForceEmitWaitcnt)
|
LLVM_DEBUG(dbgs()
|
||||||
LLVM_DEBUG(dbgs()
|
<< "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
|
||||||
<< "Force emit a s_waitcnt due to debug counter\n");
|
if (IsForceEmitWaitcnt)
|
||||||
|
LLVM_DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
|
||||||
|
|
||||||
OldWaitcnt->getOperand(0).setImm(Enc);
|
OldWaitcnt->getOperand(0).setImm(Enc);
|
||||||
if (!OldWaitcnt->getParent())
|
if (!OldWaitcnt->getParent())
|
||||||
MI.getParent()->insert(MI, OldWaitcnt);
|
MI.getParent()->insert(MI, OldWaitcnt);
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
|
LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
|
||||||
<< "Old Instr: " << MI << '\n'
|
<< "Old Instr: " << MI << '\n'
|
||||||
<< "New Instr: " << *OldWaitcnt << '\n');
|
<< "New Instr: " << *OldWaitcnt << '\n');
|
||||||
} else {
|
} else {
|
||||||
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
|
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
|
||||||
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
|
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
|
||||||
.addImm(Enc);
|
.addImm(Enc);
|
||||||
TrackedWaitcntSet.insert(SWaitInst);
|
TrackedWaitcntSet.insert(SWaitInst);
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
|
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
|
||||||
<< "Old Instr: " << MI << '\n'
|
<< "Old Instr: " << MI << '\n'
|
||||||
<< "New Instr: " << *SWaitInst << '\n');
|
<< "New Instr: " << *SWaitInst << '\n');
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (CntVal[EXP_CNT] == 0) {
|
if (CntVal[EXP_CNT] == 0) {
|
||||||
ScoreBrackets->setMixedExpTypes(false);
|
ScoreBrackets->setMixedExpTypes(false);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue