[StructurizeCFG] Fix region nodes ordering

This is a reimplementation of the `orderNodes` function, as the old
implementation didn't take into account all cases.

Fix PR41509

Differential Revision: https://reviews.llvm.org/D79037
This commit is contained in:
Ehud Katz 2020-05-13 15:33:36 +03:00
parent f61f6ffe11
commit 897d8ee5cd
2 changed files with 352 additions and 48 deletions

View File

@ -215,7 +215,6 @@ class StructurizeCFG : public RegionPass {
void orderNodes(); void orderNodes();
Loop *getAdjustedLoop(RegionNode *RN); Loop *getAdjustedLoop(RegionNode *RN);
unsigned getAdjustedLoopDepth(RegionNode *RN);
void analyzeLoops(RegionNode *N); void analyzeLoops(RegionNode *N);
@ -324,65 +323,108 @@ Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) {
return LI->getLoopFor(RN->getEntry()); return LI->getLoopFor(RN->getEntry());
} }
/// Use the exit block to determine the loop depth if RN is a SubRegion. /// Build up the general order of nodes, by performing a topology sort of the
unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) { /// parent region's nodes, while ensuring that there is no outer loop node
if (RN->isSubRegion()) { /// between any two inner loop nodes.
Region *SubR = RN->getNodeAs<Region>();
return LI->getLoopDepth(SubR->getExit());
}
return LI->getLoopDepth(RN->getEntry());
}
/// Build up the general order of nodes
void StructurizeCFG::orderNodes() { void StructurizeCFG::orderNodes() {
ReversePostOrderTraversal<Region*> RPOT(ParentRegion); SmallVector<RegionNode *, 32> POT;
SmallDenseMap<Loop*, unsigned, 8> LoopBlocks; SmallDenseMap<Loop *, unsigned, 8> LoopSizes;
for (RegionNode *RN : post_order(ParentRegion)) {
POT.push_back(RN);
// The reverse post-order traversal of the list gives us an ordering close // Accumulate the number of nodes inside the region that belong to a loop.
// to what we want. The only problem with it is that sometimes backedges
// for outer loops will be visited before backedges for inner loops.
for (RegionNode *RN : RPOT) {
Loop *Loop = getAdjustedLoop(RN); Loop *Loop = getAdjustedLoop(RN);
++LoopBlocks[Loop]; ++LoopSizes[Loop];
} }
// A quick exit for the case where all nodes belong to the same loop (or no
// loop at all).
if (LoopSizes.size() <= 1U) {
Order.assign(POT.begin(), POT.end());
return;
}
Order.resize(POT.size());
unsigned CurrentLoopDepth = 0; // The post-order traversal of the list gives us an ordering close to what we
// want. The only problem with it is that sometimes backedges for outer loops
// will be visited before backedges for inner loops. So now we fix that by
// inserting the nodes in order, while making sure that encountered inner loop
// are complete before their parents (outer loops).
SmallVector<Loop *, 8> WorkList;
// Get the size of the outermost region (the nodes that don't belong to any
// loop inside ParentRegion).
unsigned ZeroCurrentLoopSize = 0U;
auto LSI = LoopSizes.find(nullptr);
unsigned *CurrentLoopSize =
LSI != LoopSizes.end() ? &LSI->second : &ZeroCurrentLoopSize;
Loop *CurrentLoop = nullptr; Loop *CurrentLoop = nullptr;
for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
RegionNode *RN = cast<RegionNode>(*I);
unsigned LoopDepth = getAdjustedLoopDepth(RN);
if (is_contained(Order, *I)) // The "skipped" list is actually located at the (reversed) beginning of the
continue; // POT. This saves us the use of an intermediate container.
// Note that there is always enough room, for the skipped nodes, before the
// current location, as we have just passed at least that amount of nodes.
if (LoopDepth < CurrentLoopDepth) { auto Begin = POT.rbegin();
// Make sure we have visited all blocks in this loop before moving back to auto I = Begin, SkippedEnd = Begin;
// the outer loop. auto O = Order.rbegin(), OE = Order.rend();
while (O != OE) {
auto LoopI = I; // If we have any skipped nodes, then erase the gap between the end of the
while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { // "skipped" list, and the current location.
LoopI++; if (SkippedEnd != Begin) {
if (getAdjustedLoop(cast<RegionNode>(*LoopI)) == CurrentLoop) { POT.erase(I.base(), SkippedEnd.base());
--BlockCount; I = SkippedEnd = Begin = POT.rbegin();
Order.push_back(*LoopI);
}
}
} }
CurrentLoop = getAdjustedLoop(RN); // Keep processing outer loops, in order (from inner most, to outer).
if (CurrentLoop) if (!WorkList.empty()) {
LoopBlocks[CurrentLoop]--; CurrentLoop = WorkList.pop_back_val();
CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second;
}
CurrentLoopDepth = LoopDepth; // Keep processing loops while only going deeper (into inner loops).
Order.push_back(*I); do {
assert(I != POT.rend());
RegionNode *RN = *I++;
Loop *L = getAdjustedLoop(RN);
if (L != CurrentLoop) {
// If L is a loop inside CurrentLoop, then CurrentLoop must be the
// parent of L.
// To prove this, we will contradict the opposite:
// Let P be the parent of L. If CurrentLoop is the parent of P, then
// the header of P must have been processed already, as it must
// dominate the other blocks of P (otherwise P is an irreducible loop,
// and won't be recorded in the LoopInfo), especially L (inside). But
// then CurrentLoop must have been updated to P at the time of
// processing the header of P, which conflicts with the assumption
// that CurrentLoop is not P.
// If L is not a loop inside CurrentLoop, then skip RN.
if (!L || L->getParentLoop() != CurrentLoop) {
// Skip the node by pushing it to the end of the "skipped" list.
*SkippedEnd++ = RN;
continue;
}
// If we still haven't processed all the nodes that belong to
// CurrentLoop, then make sure we come back later, to finish the job, by
// pushing it to the WorkList.
if (*CurrentLoopSize)
WorkList.push_back(CurrentLoop);
CurrentLoop = L;
CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second;
}
assert(O != OE);
*O++ = RN;
// If we have finished processing the current loop, then we are done here.
--*CurrentLoopSize;
} while (*CurrentLoopSize);
} }
assert(WorkList.empty());
// This pass originally used a post-order traversal and then operated on assert(SkippedEnd == Begin);
// the list in reverse. Now that we are using a reverse post-order traversal
// rather than re-working the whole pass to operate on the list in order,
// we just reverse the list and continue to operate on it in reverse.
std::reverse(Order.begin(), Order.end());
} }
/// Determine the end of the loops /// Determine the end of the loops

View File

@ -0,0 +1,262 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
; This test have an outer loop containing an inner loop,
; for which there is an interleaved post-order traversal.
;
; This used to produce incorrect code.
; For example %outer.loop.body used to branched to %inner.loop.end
; (instead of %inner.loop.header).
define i1 @test_nested(i32 %x, i1 %b1, i1 %b2, i1 %b3) {
; CHECK-LABEL: @test_nested(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true
; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]]
; CHECK: Flow12:
; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]]
; CHECK: exit.true:
; CHECK-NEXT: br label [[FLOW13]]
; CHECK: Flow13:
; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]]
; CHECK: NewDefault:
; CHECK-NEXT: br label [[EXIT_FALSE:%.*]]
; CHECK: Flow14:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit.false:
; CHECK-NEXT: br label [[FLOW14]]
; CHECK: outer.loop.header:
; CHECK-NEXT: br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]]
; CHECK: outer.loop.body:
; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP12:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]]
; CHECK: inner.loop.header:
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ]
; CHECK-NEXT: br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]]
; CHECK: Flow6:
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ]
; CHECK-NEXT: br label [[FLOW5:%.*]]
; CHECK: Flow7:
; CHECK-NEXT: br i1 [[TMP10:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]]
; CHECK: inner.loop.end:
; CHECK-NEXT: br label [[FLOW8]]
; CHECK: inner.loop.body:
; CHECK-NEXT: br i1 [[B3_INV]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]]
; CHECK: inner.loop.body.else:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]]
; CHECK: inner.loop.body.then:
; CHECK-NEXT: br label [[INNER_LOOP_COND]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP10]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]]
; CHECK: inner.loop.cond:
; CHECK-NEXT: br label [[NODEBLOCK:%.*]]
; CHECK: NodeBlock:
; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1
; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]]
; CHECK: Flow8:
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]]
; CHECK: LeafBlock1:
; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1
; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]]
; CHECK: LeafBlock:
; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0
; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]]
; CHECK: Flow9:
; CHECK-NEXT: [[TMP12]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP11]], [[FLOW8]] ]
; CHECK-NEXT: br i1 [[TMP13]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]]
; CHECK: inner.loop.break:
; CHECK-NEXT: br label [[FLOW10]]
; CHECK: Flow10:
; CHECK-NEXT: [[TMP14]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ]
; CHECK-NEXT: [[TMP15]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11]], [[LEAFBLOCK1]] ]
; CHECK-NEXT: br label [[FLOW9]]
; CHECK: outer.loop.cleanup:
; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]]
; CHECK: Flow11:
; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ]
; CHECK-NEXT: br label [[FLOW3]]
; CHECK: outer.loop.latch:
; CHECK-NEXT: br label [[FLOW11]]
; CHECK: Flow5:
; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP19]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: br label [[FLOW4]]
; CHECK: inner.loop.latch:
; CHECK-NEXT: br label [[FLOW6]]
; CHECK: exit:
; CHECK-NEXT: ret i1 [[TMP0]]
;
entry:
br label %outer.loop.header
exit.true: ; preds = %outer.loop.header
br label %exit
exit.false: ; preds = %inner.loop.cond
br label %exit
outer.loop.header: ; preds = %outer.loop.latch, %entry
br i1 %b1, label %outer.loop.body, label %exit.true
outer.loop.body: ; preds = %outer.loop.header
br label %inner.loop.header
inner.loop.header: ; preds = %inner.loop.latch, %outer.loop.body
br i1 %b2, label %inner.loop.body, label %inner.loop.end
inner.loop.end: ; preds = %inner.loop.header
br label %outer.loop.cleanup
inner.loop.body: ; preds = %inner.loop.header
br i1 %b3, label %inner.loop.body.then, label %inner.loop.body.else
inner.loop.body.else: ; preds = %inner.loop.body
br label %inner.loop.cond
inner.loop.body.then: ; preds = %inner.loop.body
br label %inner.loop.cond
inner.loop.cond: ; preds = %inner.loop.body.then, %inner.loop.body.else
switch i32 %x, label %exit.false [
i32 0, label %inner.loop.latch
i32 1, label %inner.loop.break
]
inner.loop.break: ; preds = %inner.loop.cond
br label %outer.loop.cleanup
outer.loop.cleanup: ; preds = %inner.loop.break, %inner.loop.end
br label %outer.loop.latch
outer.loop.latch: ; preds = %outer.loop.cleanup
br label %outer.loop.header
inner.loop.latch: ; preds = %inner.loop.cond
br label %inner.loop.header
exit: ; preds = %exit.false, %exit.true
%r = phi i1 [ true, %exit.true ], [ false, %exit.false ]
ret i1 %r
}
; This test checks sibling loops that by default have an
; interleaved post-order traversal.
define void @test_siblings(i1 %b1, i1 %b2, i1 %b3, i1 %b4, i1 %b5, i1 %b6, i1 %b7, i1 %b8, i1 %b9) {
; CHECK-LABEL: @test_siblings(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B9_INV:%.*]] = xor i1 [[B9:%.*]], true
; CHECK-NEXT: [[B6_INV:%.*]] = xor i1 [[B6:%.*]], true
; CHECK-NEXT: [[B2_INV:%.*]] = xor i1 [[B2:%.*]], true
; CHECK-NEXT: [[B8_INV:%.*]] = xor i1 [[B8:%.*]], true
; CHECK-NEXT: [[B5_INV:%.*]] = xor i1 [[B5:%.*]], true
; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true
; CHECK-NEXT: [[B4_INV:%.*]] = xor i1 [[B4:%.*]], true
; CHECK-NEXT: [[B1_INV:%.*]] = xor i1 [[B1:%.*]], true
; CHECK-NEXT: br i1 [[B1_INV]], label [[IF_ELSE:%.*]], label [[FLOW:%.*]]
; CHECK: if.else:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP0]], [[FLOW1:%.*]] ], [ [[B2]], [[IF_ELSE]] ], [ false, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW1]] ], [ [[B2_INV]], [[IF_ELSE]] ], [ false, [[ENTRY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW1]] ], [ false, [[IF_ELSE]] ], [ true, [[ENTRY]] ]
; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP1_HEADER:%.*]], label [[FLOW1]]
; CHECK: loop1.header:
; CHECK-NEXT: br i1 [[B3_INV]], label [[LOOP1_BODY:%.*]], label [[FLOW2:%.*]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[LOOP1_BODY]] ], [ [[TMP1]], [[LOOP1_HEADER]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[B5_INV]], [[LOOP1_BODY]] ], [ [[B3]], [[LOOP1_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP1_LATCH:%.*]], label [[FLOW3:%.*]]
; CHECK: loop1.latch:
; CHECK-NEXT: br label [[FLOW3]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP5]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ [[TMP1]], [[FLOW]] ]
; CHECK-NEXT: br i1 true, label [[FLOW4:%.*]], label [[FLOW]]
; CHECK: loop1.body:
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[LOOP1_LATCH]] ], [ [[TMP3]], [[FLOW2]] ]
; CHECK-NEXT: br label [[FLOW1]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[FLOW5:%.*]] ], [ [[TMP5]], [[FLOW1]] ]
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP2_HEADER:%.*]], label [[FLOW5]]
; CHECK: loop2.header:
; CHECK-NEXT: br i1 [[B6_INV]], label [[LOOP2_BODY:%.*]], label [[FLOW6:%.*]]
; CHECK: Flow5:
; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP11:%.*]], [[FLOW7:%.*]] ], [ false, [[FLOW4]] ]
; CHECK-NEXT: br i1 true, label [[FLOW8:%.*]], label [[FLOW4]]
; CHECK: loop2.body:
; CHECK-NEXT: br label [[FLOW6]]
; CHECK: Flow6:
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ true, [[LOOP2_BODY]] ], [ false, [[LOOP2_HEADER]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[B7:%.*]], [[LOOP2_BODY]] ], [ [[B6]], [[LOOP2_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP2_LATCH:%.*]], label [[FLOW7]]
; CHECK: loop2.latch:
; CHECK-NEXT: br label [[FLOW7]]
; CHECK: Flow7:
; CHECK-NEXT: [[TMP11]] = phi i1 [ false, [[LOOP2_LATCH]] ], [ [[TMP9]], [[FLOW6]] ]
; CHECK-NEXT: br label [[FLOW5]]
; CHECK: Flow8:
; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ false, [[FLOW10:%.*]] ], [ [[TMP0]], [[FLOW5]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP8]], [[FLOW5]] ]
; CHECK-NEXT: br i1 [[TMP13]], label [[LOOP3_HEADER:%.*]], label [[FLOW9:%.*]]
; CHECK: loop3.header:
; CHECK-NEXT: br label [[FLOW9]]
; CHECK: Flow9:
; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ true, [[LOOP3_HEADER]] ], [ [[TMP12]], [[FLOW8]] ]
; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP3_LATCH:%.*]], label [[FLOW10]]
; CHECK: loop3.latch:
; CHECK-NEXT: br label [[FLOW10]]
; CHECK: Flow10:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[FLOW8]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br i1 %b1, label %loop1.header, label %if.else
if.else:
br i1 %b2, label %loop3.latch, label %loop2.header
loop1.header:
br i1 %b3, label %loop1.latch, label %loop1.body
loop1.latch:
br i1 %b4, label %loop1.header, label %exit
loop1.body:
br i1 %b5, label %loop2.header, label %loop1.latch
loop2.header:
br i1 %b6, label %loop2.latch, label %loop2.body
loop2.body:
br i1 %b7, label %loop2.latch, label %loop3.header
loop2.latch:
br i1 %b8, label %loop2.header, label %exit
loop3.header:
br label %loop3.latch
loop3.latch:
br i1 %b9, label %loop3.header, label %exit
exit:
ret void
}