forked from OSchip/llvm-project
Revert Revert [MBP] do not rotate loop if it creates extra branch
This is a second attempt to land this patch. The first one resulted in a crash of clang sanitizer buildbot. The fix is here and regression test is added. This is a last fix for the corner case of PR32214. Actually this is not really corner case in general. We should not do a loop rotation if we create an additional branch due to it. Consider the case where we have a loop chain H, M, B, C , where H is header with viable fallthrough from pre-header and exit from the loop M - some middle block B - backedge to Header but with exit from the loop also. C - some cold block of the loop. Let's H is determined as a best exit. If we do a loop rotation M, B, C, H we can introduce the extra branch. Let's compute the change in number of branches: +1 branch from pre-header to header -1 branch from header to exit +1 branch from header to middle block if there is such -1 branch from cold bock to header if there is one So if C is not a predecessor of H then we introduce extra branch. This change actually prohibits rotation of the loop if both true Best Exit has next element in chain as successor. Last element in chain is not a predecessor of first element of chain. Reviewers: iteratee, xur, sammccall, chandlerc Reviewed By: iteratee Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34745 llvm-svn: 307631
This commit is contained in:
parent
324d3791f8
commit
0e831c996c
|
@ -1917,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
|
|||
return;
|
||||
|
||||
MachineBasicBlock *Top = *LoopChain.begin();
|
||||
MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
|
||||
|
||||
// If ExitingBB is already the last one in a chain then nothing to do.
|
||||
if (Bottom == ExitingBB)
|
||||
return;
|
||||
|
||||
bool ViableTopFallthrough = false;
|
||||
for (MachineBasicBlock *Pred : Top->predecessors()) {
|
||||
BlockChain *PredChain = BlockToChain[Pred];
|
||||
|
@ -1931,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
|
|||
// bottom is a viable exiting block. If so, bail out as rotating will
|
||||
// introduce an unnecessary branch.
|
||||
if (ViableTopFallthrough) {
|
||||
MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
|
||||
for (MachineBasicBlock *Succ : Bottom->successors()) {
|
||||
BlockChain *SuccChain = BlockToChain[Succ];
|
||||
if (!LoopBlockSet.count(Succ) &&
|
||||
|
@ -1944,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
|
|||
if (ExitIt == LoopChain.end())
|
||||
return;
|
||||
|
||||
// Rotating a loop exit to the bottom when there is a fallthrough to top
|
||||
// trades the entry fallthrough for an exit fallthrough.
|
||||
// If there is no bottom->top edge, but the chosen exit block does have
|
||||
// a fallthrough, we break that fallthrough for nothing in return.
|
||||
|
||||
// Let's consider an example. We have a built chain of basic blocks
|
||||
// B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
|
||||
// By doing a rotation we get
|
||||
// Bk+1, ..., Bn, B1, ..., Bk
|
||||
// Break of fallthrough to B1 is compensated by a fallthrough from Bk.
|
||||
// If we had a fallthrough Bk -> Bk+1 it is broken now.
|
||||
// It might be compensated by fallthrough Bn -> B1.
|
||||
// So we have a condition to avoid creation of extra branch by loop rotation.
|
||||
// All below must be true to avoid loop rotation:
|
||||
// If there is a fallthrough to top (B1)
|
||||
// There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
|
||||
// There is no fallthrough from bottom (Bn) to top (B1).
|
||||
// Please note that there is no exit fallthrough from Bn because we checked it
|
||||
// above.
|
||||
if (ViableTopFallthrough) {
|
||||
assert(std::next(ExitIt) != LoopChain.end() &&
|
||||
"Exit should not be last BB");
|
||||
MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
|
||||
if (ExitingBB->isSuccessor(NextBlockInChain))
|
||||
if (!Bottom->isSuccessor(Top))
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
|
||||
<< " at bottom\n");
|
||||
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
|
||||
}
|
||||
|
||||
|
|
|
@ -354,6 +354,7 @@ define void @unnatural_cfg2() {
|
|||
; single-source GCC.
|
||||
; CHECK-LABEL: unnatural_cfg2
|
||||
; CHECK: %entry
|
||||
; CHECK: %loop.header
|
||||
; CHECK: %loop.body1
|
||||
; CHECK: %loop.body2
|
||||
; CHECK: %loop.body4
|
||||
|
@ -361,7 +362,6 @@ define void @unnatural_cfg2() {
|
|||
; CHECK: %loop.inner2.begin
|
||||
; CHECK: %loop.body3
|
||||
; CHECK: %loop.inner1.begin
|
||||
; CHECK: %loop.header
|
||||
; CHECK: %bail
|
||||
|
||||
entry:
|
||||
|
@ -1491,6 +1491,102 @@ ret: ; preds = %endif, %then
|
|||
ret void
|
||||
}
|
||||
|
||||
define i32 @not_rotate_if_extra_branch(i32 %count) {
|
||||
; Test checks that there is no loop rotation
|
||||
; if it introduces extra branch.
|
||||
; Specifically in this case because best exit is .header
|
||||
; but it has fallthrough to .middle block and last block in
|
||||
; loop chain .slow does not have afallthrough to .header.
|
||||
; CHECK-LABEL: not_rotate_if_extra_branch
|
||||
; CHECK: %.entry
|
||||
; CHECK: %.header
|
||||
; CHECK: %.middle
|
||||
; CHECK: %.backedge
|
||||
; CHECK: %.slow
|
||||
; CHECK: %.bailout
|
||||
; CHECK: %.stop
|
||||
.entry:
|
||||
%sum.0 = shl nsw i32 %count, 1
|
||||
br label %.header
|
||||
|
||||
.header:
|
||||
%i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ]
|
||||
%sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ]
|
||||
%is_exc = icmp sgt i32 %i, 9000000
|
||||
br i1 %is_exc, label %.bailout, label %.middle, !prof !13
|
||||
|
||||
.bailout:
|
||||
%sum.2 = add nsw i32 %count, 1
|
||||
br label %.stop
|
||||
|
||||
.middle:
|
||||
%pr.1 = and i32 %i, 1023
|
||||
%pr.2 = icmp eq i32 %pr.1, 0
|
||||
br i1 %pr.2, label %.slow, label %.backedge, !prof !14
|
||||
|
||||
.slow:
|
||||
tail call void @effect(i32 %sum)
|
||||
br label %.backedge
|
||||
|
||||
.backedge:
|
||||
%sum.1 = add nsw i32 %i, %sum
|
||||
%i.1 = add nsw i32 %i, 1
|
||||
%end = icmp slt i32 %i.1, %count
|
||||
br i1 %end, label %.header, label %.stop, !prof !15
|
||||
|
||||
.stop:
|
||||
%sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ]
|
||||
ret i32 %sum.phi
|
||||
}
|
||||
|
||||
define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) {
|
||||
; This is a regression test against patch avoid loop rotation if
|
||||
; it introduce an extra btanch.
|
||||
; CHECK-LABEL: not_rotate_if_extra_branch_regression
|
||||
; CHECK: %.entry
|
||||
; CHECK: %.first_backedge
|
||||
; CHECK: %.slow
|
||||
; CHECK: %.second_header
|
||||
.entry:
|
||||
%sum.0 = shl nsw i32 %count, 1
|
||||
br label %.first_header
|
||||
|
||||
.first_header:
|
||||
%i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ]
|
||||
%is_bo1 = icmp sgt i32 %i, 9000000
|
||||
br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14
|
||||
|
||||
.first_backedge:
|
||||
%i.1 = add nsw i32 %i, 1
|
||||
%end = icmp slt i32 %i.1, %count
|
||||
br i1 %end, label %.first_header, label %.second_header, !prof !13
|
||||
|
||||
.second_header:
|
||||
%j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ]
|
||||
%end.2 = icmp sgt i32 %j, %count
|
||||
br i1 %end.2, label %.stop, label %.second_middle, !prof !14
|
||||
|
||||
.second_middle:
|
||||
%is_slow = icmp sgt i32 %j, 9000000
|
||||
br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14
|
||||
|
||||
.slow:
|
||||
tail call void @effect(i32 %j)
|
||||
br label %.second_backedge
|
||||
|
||||
.second_backedge:
|
||||
%j.1 = add nsw i32 %j, 1
|
||||
%end.3 = icmp slt i32 %j, 10000000
|
||||
br i1 %end.3, label %.second_header, label %.stop, !prof !13
|
||||
|
||||
.stop:
|
||||
%res = add nsw i32 %j, %i.1
|
||||
ret i32 %res
|
||||
|
||||
.bailout:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @effect(i32)
|
||||
|
||||
!5 = !{!"branch_weights", i32 84, i32 16}
|
||||
|
@ -1501,3 +1597,6 @@ declare void @effect(i32)
|
|||
!10 = !{!"branch_weights", i32 90, i32 10}
|
||||
!11 = !{!"branch_weights", i32 1, i32 1}
|
||||
!12 = !{!"branch_weights", i32 5, i32 3}
|
||||
!13 = !{!"branch_weights", i32 1, i32 1}
|
||||
!14 = !{!"branch_weights", i32 1, i32 1023}
|
||||
!15 = !{!"branch_weights", i32 4095, i32 1}
|
||||
|
|
|
@ -37,7 +37,7 @@ end:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @nested_loop_0() !prof !1 {
|
||||
define void @nested_loop_0(i1 %flag) !prof !1 {
|
||||
; Test if a block that is cold in the inner loop but not cold in the outer loop
|
||||
; will merged to the outer loop chain.
|
||||
;
|
||||
|
@ -68,8 +68,7 @@ if.then:
|
|||
|
||||
if.else:
|
||||
call void @e()
|
||||
%call2 = call zeroext i1 @a()
|
||||
br i1 %call2, label %header2, label %header, !prof !3
|
||||
br i1 %flag, label %header2, label %header, !prof !3
|
||||
|
||||
end:
|
||||
call void @f()
|
||||
|
|
Loading…
Reference in New Issue