forked from OSchip/llvm-project
[unroll] Make the unroll cost analysis terminate deterministically and
reasonably quickly. I don't have a reduced test case, but for a version of FFMPEG, this makes the loop unroller start finishing at all (after over 15 minutes of running, it hadn't terminated for me, no idea if it was a true infloop or just exponential work). The key thing here is to check the DeadInstructions set when pulling things off the worklist. Without this, we would re-walk the user list of already dead instructions again and again and again. Consider phi nodes with many, many operands and other patterns. The other important aspect of this is that because we would keep re-visiting instructions that were already known dead, we kept adding their cost savings to this! This would cause our cost savings to be *insanely* inflated from this. While I was here, I also rotated the operand walk out of the worklist loop to make the code easier to read. There is still work to be done to minimize worklist traffic because we don't de-duplicate operands. This means we may add the same instruction onto the worklist 1000s of times if it shows up in 1000s of operansd to a PHI node for example. Still, with this patch, the ffmpeg testcase I have finishes quickly and I can't measure the runtime impact of the unroll analysis any more. I'll probably try to do a few more cleanups to this code, but not sure how much cleanup I can justify right now. llvm-svn: 229038
This commit is contained in:
parent
b4aa16f2bc
commit
93063e6191
|
@ -506,38 +506,41 @@ public:
|
|||
// Start by initializing worklist with simplified instructions.
|
||||
for (auto &FoldedKeyValue : SimplifiedValues)
|
||||
if (auto *FoldedInst = dyn_cast<Instruction>(FoldedKeyValue.first)) {
|
||||
Worklist.push_back(FoldedInst);
|
||||
DeadInstructions.insert(FoldedInst);
|
||||
|
||||
// Add each instruction operand of this dead instruction to the
|
||||
// worklist.
|
||||
for (auto *Op : FoldedInst->operand_values())
|
||||
if (auto *OpI = dyn_cast<Instruction>(Op))
|
||||
Worklist.push_back(OpI);
|
||||
}
|
||||
|
||||
// If a definition of an insn is only used by simplified or dead
|
||||
// instructions, it's also dead. Check defs of all instructions from the
|
||||
// worklist.
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *FoldedInst = Worklist.pop_back_val();
|
||||
for (Value *Op : FoldedInst->operands()) {
|
||||
if (auto *I = dyn_cast<Instruction>(Op)) {
|
||||
if (!L->contains(I))
|
||||
continue;
|
||||
if (SimplifiedValues[I])
|
||||
continue; // This insn has been counted already.
|
||||
if (I->getNumUses() == 0)
|
||||
continue;
|
||||
bool AllUsersFolded = true;
|
||||
for (User *U : I->users()) {
|
||||
Instruction *UI = dyn_cast<Instruction>(U);
|
||||
if (!SimplifiedValues[UI] && !DeadInstructions.count(UI)) {
|
||||
AllUsersFolded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (AllUsersFolded) {
|
||||
NumberOfOptimizedInstructions += TTI.getUserCost(I);
|
||||
Worklist.push_back(I);
|
||||
DeadInstructions.insert(I);
|
||||
}
|
||||
Instruction *I = Worklist.pop_back_val();
|
||||
if (!L->contains(I))
|
||||
continue;
|
||||
if (DeadInstructions.count(I))
|
||||
continue;
|
||||
if (I->getNumUses() == 0)
|
||||
continue;
|
||||
bool AllUsersFolded = true;
|
||||
for (User *U : I->users()) {
|
||||
Instruction *UI = dyn_cast<Instruction>(U);
|
||||
if (!SimplifiedValues[UI] && !DeadInstructions.count(UI)) {
|
||||
AllUsersFolded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (AllUsersFolded) {
|
||||
NumberOfOptimizedInstructions += TTI.getUserCost(I);
|
||||
DeadInstructions.insert(I);
|
||||
for (auto *Op : I->operand_values())
|
||||
if (auto *OpI = dyn_cast<Instruction>(Op))
|
||||
Worklist.push_back(OpI);
|
||||
}
|
||||
}
|
||||
return NumberOfOptimizedInstructions;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue