forked from OSchip/llvm-project
Rework a bit of the implementation of loop block rotation to not rely so
heavily on AnalyzeBranch. That routine doesn't behave as we want given that rotation occurs mid-way through re-ordering the function. Instead merely check that there are not unanalyzable branching constructs present, and then reason about the CFG via successor lists. This actually simplifies my mental model for all of this as well. The concrete result is that we now will rotate more loop chains. I've added a test case from Olden highlighting the effect. There is still a bit more to do here though in order to regain all of the performance in Olden. llvm-svn: 145179
This commit is contained in:
parent
0bcbde46e2
commit
a054580993
|
@ -585,12 +585,14 @@ void MachineBlockPlacement::rotateLoop(MachineLoop &L,
|
|||
|
||||
// We need to analyze the branch and determine if rotating the loop will
|
||||
// completely remove a branch. We bail if the analysis fails or we don't find
|
||||
// an unconditional backedge.
|
||||
// an unconditional backedge. Note that this has to handle cases where the
|
||||
// original order was rotated, and the chain has un-done it. As a result,
|
||||
// there may not (yet) be the uncondiationl branch, but we can tell whether
|
||||
// one will be added when updating the terminators.
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
if (TII->AnalyzeBranch(*Latch, TBB, FBB, Cond) || !TBB || FBB || !Cond.empty())
|
||||
if (TII->AnalyzeBranch(*Latch, TBB, FBB, Cond) || !Cond.empty())
|
||||
return;
|
||||
assert(TBB == Header && "Found backedge that doesn't go to the header!");
|
||||
|
||||
// Next we need to find a split point. This rotate algorithm is *very*
|
||||
// narrow, and it only tries to do the rotate when it can find a split point
|
||||
|
@ -604,20 +606,29 @@ void MachineBlockPlacement::rotateLoop(MachineLoop &L,
|
|||
I != E; ++I) {
|
||||
Cond.clear();
|
||||
TBB = FBB = 0;
|
||||
if (TII->AnalyzeBranch(**I, TBB, FBB, Cond) || !TBB || Cond.empty())
|
||||
// Ensure that this is a block with a conditional branch which we can
|
||||
// analyze and re-form after rotating the loop. While it might be tempting
|
||||
// to use the TBB or FBB output parameters from this, they will often be
|
||||
// lies as they are only correct after the terminators have been updated,
|
||||
// and we are mid-chain formation.
|
||||
if (TII->AnalyzeBranch(**I, TBB, FBB, Cond) || Cond.empty())
|
||||
continue;
|
||||
// Swap things so that the in-loop successor is always in FBB or is an
|
||||
// implicit fallthrough.
|
||||
if (FBB && !LoopBlockSet.count(FBB))
|
||||
std::swap(TBB, FBB);
|
||||
// Check that we actually have a loop exit branch.
|
||||
// FIXME: We should probably just use the LoopInfo for this.
|
||||
if (!LoopBlockSet.count(TBB) && (!FBB || !LoopBlockSet.count(FBB))) {
|
||||
// This is a very arbitrary restriction, but it ensures we don't disrupt
|
||||
// the existing chain layout. We insist that the in-loop successor is
|
||||
// chained as a fallthrough successor (even if the branch hasn't been
|
||||
// updated to reflect that yet).
|
||||
if (FBB && FBB != *llvm::prior(I))
|
||||
|
||||
// See if this block is an exiting block from the loop. LoopInfo provides
|
||||
// a nice API for this, but it's actuall N*M runtime where N is the number
|
||||
// of blocks in the loop and M is the number of successors. We can
|
||||
// eliminate the N by doing this ourselves.
|
||||
// FIXME: The LoopInfo datastructure should be improved for these types of
|
||||
// queries.
|
||||
MachineBasicBlock *ExitB = 0;
|
||||
for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), SE = (*I)->succ_end();
|
||||
SI != SE; ++SI) {
|
||||
if (!(*SI)->isLandingPad() && *SI != *I && !LoopBlockSet.count(*SI)) {
|
||||
ExitB = *SI;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ExitB)
|
||||
continue;
|
||||
|
||||
NewBottom = *I;
|
||||
|
@ -625,7 +636,6 @@ void MachineBlockPlacement::rotateLoop(MachineLoop &L,
|
|||
SplitIt = I.base();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!NewBottom || !NewTop ||
|
||||
SplitIt == LoopChain.end() || SplitIt == LoopChain.begin())
|
||||
return;
|
||||
|
|
|
@ -199,6 +199,44 @@ exit:
|
|||
ret i32 %base
|
||||
}
|
||||
|
||||
define void @test_loop_rotate_reversed_blocks() {
|
||||
; This test case (greatly reduced from an Olden bencmark) ensures that the loop
|
||||
; rotate implementation doesn't assume that loops are laid out in a particular
|
||||
; order. The first loop will get split into two basic blocks, with the loop
|
||||
; header coming after the loop latch.
|
||||
;
|
||||
; CHECK: test_loop_rotate_reversed_blocks
|
||||
; CHECK: %entry
|
||||
; Look for a jump into the middle of the loop, and no branches mid-way.
|
||||
; CHECK: jmp
|
||||
; CHECK: %loop1
|
||||
; CHECK-NOT: j{{\w*}} .LBB{{.*}}
|
||||
; CHECK: %loop1
|
||||
; CHECK: je
|
||||
|
||||
entry:
|
||||
%cond1 = load volatile i1* undef
|
||||
br i1 %cond1, label %loop2.preheader, label %loop1
|
||||
|
||||
loop1:
|
||||
call i32 @f()
|
||||
%cond2 = load volatile i1* undef
|
||||
br i1 %cond2, label %loop2.preheader, label %loop1
|
||||
|
||||
loop2.preheader:
|
||||
call i32 @f()
|
||||
%cond3 = load volatile i1* undef
|
||||
br i1 %cond3, label %exit, label %loop2
|
||||
|
||||
loop2:
|
||||
call i32 @f()
|
||||
%cond4 = load volatile i1* undef
|
||||
br i1 %cond4, label %exit, label %loop2
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @test_loop_align(i32 %i, i32* %a) {
|
||||
; Check that we provide basic loop body alignment with the block placement
|
||||
; pass.
|
||||
|
@ -567,8 +605,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
|
|||
; CHECK: test_unnatural_cfg_backwards_inner_loop
|
||||
; CHECK: %entry
|
||||
; CHECK: %body
|
||||
; CHECK: %loop1
|
||||
; CHECK: %loop2b
|
||||
; CHECK: %loop1
|
||||
; CHECK: %loop2a
|
||||
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue