forked from OSchip/llvm-project
[IndVars] Eliminate loop exits with equivalent exit counts
We can end up with two loop exits whose exit counts are equivalent, but whose textual representation is different and non-obvious. For the sub-case where we have a series of exits which dominate one another (common), eliminate any exits which would iterate *after* a previous exit on the exiting iteration. As noted in the TODO being removed, I'd always thought this was a good idea, but I've now seen this in a real workload as well. Interestingly, in review, Nikita pointed out there's let another oppurtunity to leverage SCEV's reasoning. If we kept track of the min of dominanting exits so far, we could discharge exits with EC >= MDE. This is less powerful than the existing transform (since later exits aren't considered), but potentially more powerful for any case where SCEV can prove a >= b, but neither a == b or a > b. I don't have an example to illustrate that oppurtunity, but won't be suprised if we find one and return to handle that case as well. Differential Revision: https://reviews.llvm.org/D69009 llvm-svn: 375379
This commit is contained in:
parent
7015a5c54b
commit
8cbcd2f484
|
@ -2717,6 +2717,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
|
||||||
if (isa<SCEVCouldNotCompute>(MaxExitCount))
|
if (isa<SCEVCouldNotCompute>(MaxExitCount))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// Visit our exit blocks in order of dominance. We know from the fact that
|
||||||
|
// all exits (left) are analyzeable that the must be a total dominance order
|
||||||
|
// between them as each must dominate the latch. The visit order only
|
||||||
|
// matters for the provably equal case.
|
||||||
|
llvm::sort(ExitingBlocks,
|
||||||
|
[&](BasicBlock *A, BasicBlock *B) {
|
||||||
|
// std::sort sorts in ascending order, so we want the inverse of
|
||||||
|
// the normal dominance relation.
|
||||||
|
if (DT->properlyDominates(A, B)) return true;
|
||||||
|
if (DT->properlyDominates(B, A)) return false;
|
||||||
|
llvm_unreachable("expected total dominance order!");
|
||||||
|
});
|
||||||
|
#ifdef ASSERT
|
||||||
|
for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
|
||||||
|
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
|
auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
|
||||||
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
||||||
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
|
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
|
||||||
|
@ -2729,6 +2747,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
|
SmallSet<const SCEV*, 8> DominatingExitCounts;
|
||||||
for (BasicBlock *ExitingBB : ExitingBlocks) {
|
for (BasicBlock *ExitingBB : ExitingBlocks) {
|
||||||
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
|
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
|
||||||
assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
|
assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
|
||||||
|
@ -2766,10 +2785,15 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: If we can prove that the exiting iteration is equal to the exit
|
// As we run, keep track of which exit counts we've encountered. If we
|
||||||
// count for this exit and that no previous exit oppurtunities exist within
|
// find a duplicate, we've found an exit which would have exited on the
|
||||||
// the loop, then we can discharge all other exits. (May fall out of
|
// exiting iteration, but (from the visit order) strictly follows another
|
||||||
// previous TODO.)
|
// which does the same and is thus dead.
|
||||||
|
if (!DominatingExitCounts.insert(ExitCount).second) {
|
||||||
|
FoldExit(ExitingBB, false);
|
||||||
|
Changed = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
|
@ -185,5 +185,39 @@ exit:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @mixed_width(i32 %len) {
|
||||||
|
; CHECK-LABEL: @mixed_width(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i32 [[LEN:%.*]] to i64
|
||||||
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
|
; CHECK: loop:
|
||||||
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
|
||||||
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
||||||
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[LEN_ZEXT]]
|
||||||
|
; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[EXIT:%.*]]
|
||||||
|
; CHECK: backedge:
|
||||||
|
; CHECK-NEXT: call void @side_effect()
|
||||||
|
; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT]]
|
||||||
|
; CHECK: exit:
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%len.zext = zext i32 %len to i64
|
||||||
|
br label %loop
|
||||||
|
loop:
|
||||||
|
%iv = phi i64 [0, %entry], [%iv.next, %backedge]
|
||||||
|
%iv2 = phi i32 [0, %entry], [%iv2.next, %backedge]
|
||||||
|
%iv.next = add i64 %iv, 1
|
||||||
|
%iv2.next = add i32 %iv2, 1
|
||||||
|
%cmp1 = icmp ult i64 %iv, %len.zext
|
||||||
|
br i1 %cmp1, label %backedge, label %exit
|
||||||
|
|
||||||
|
backedge:
|
||||||
|
call void @side_effect()
|
||||||
|
%cmp2 = icmp ult i32 %iv2, %len
|
||||||
|
br i1 %cmp2, label %loop, label %exit
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare void @side_effect()
|
declare void @side_effect()
|
||||||
|
|
|
@ -464,7 +464,6 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
|
||||||
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
|
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
|
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
|
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
|
||||||
|
@ -478,7 +477,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l
|
||||||
; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
|
; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
|
||||||
; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4
|
; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4
|
||||||
; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
|
; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
|
||||||
; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0
|
; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0
|
||||||
; CHECK: deopt2:
|
; CHECK: deopt2:
|
||||||
; CHECK-NEXT: call void @prevent_merging()
|
; CHECK-NEXT: call void @prevent_merging()
|
||||||
; CHECK-NEXT: ret i32 -1
|
; CHECK-NEXT: ret i32 -1
|
||||||
|
@ -784,7 +783,7 @@ exit:
|
||||||
; If we have a dominating exit (exit1) which can't be itself rewritten, we
|
; If we have a dominating exit (exit1) which can't be itself rewritten, we
|
||||||
; can't rewrite a later exit (exit2). Doing so would cause the loop to exit
|
; can't rewrite a later exit (exit2). Doing so would cause the loop to exit
|
||||||
; from the exit2 when it should have exited from exit1.
|
; from the exit2 when it should have exited from exit1.
|
||||||
define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) {
|
define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %length2, i32 %n) {
|
||||||
; CHECK-LABEL: @neg_dominating_exit(
|
; CHECK-LABEL: @neg_dominating_exit(
|
||||||
; CHECK-NEXT: loop.preheader:
|
; CHECK-NEXT: loop.preheader:
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
|
@ -798,7 +797,7 @@ define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) {
|
||||||
; CHECK-NEXT: call void @prevent_merging()
|
; CHECK-NEXT: call void @prevent_merging()
|
||||||
; CHECK-NEXT: ret i32 [[RESULT]]
|
; CHECK-NEXT: ret i32 [[RESULT]]
|
||||||
; CHECK: guarded:
|
; CHECK: guarded:
|
||||||
; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
|
; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]]
|
||||||
; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0
|
; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0
|
||||||
; CHECK: deopt2:
|
; CHECK: deopt2:
|
||||||
; CHECK-NEXT: call void @prevent_merging()
|
; CHECK-NEXT: call void @prevent_merging()
|
||||||
|
@ -830,7 +829,7 @@ deopt: ; preds = %loop
|
||||||
ret i32 %result
|
ret i32 %result
|
||||||
|
|
||||||
guarded: ; preds = %loop
|
guarded: ; preds = %loop
|
||||||
%within.bounds2 = icmp ult i32 %i, %length
|
%within.bounds2 = icmp ult i32 %i, %length2
|
||||||
br i1 %within.bounds2, label %guarded2, label %deopt2, !prof !0
|
br i1 %within.bounds2, label %guarded2, label %deopt2, !prof !0
|
||||||
|
|
||||||
deopt2: ; preds = %loop
|
deopt2: ; preds = %loop
|
||||||
|
|
|
@ -14,10 +14,9 @@ define i32 @test_01() {
|
||||||
; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32
|
||||||
; CHECK-NEXT: br label [[FOR_BODY6:%.*]]
|
; CHECK-NEXT: br label [[FOR_BODY6:%.*]]
|
||||||
; CHECK: for.cond4:
|
; CHECK: for.cond4:
|
||||||
; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2
|
; CHECK-NEXT: br i1 true, label [[FOR_BODY6]], label [[FOR_END:%.*]]
|
||||||
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]]
|
|
||||||
; CHECK: for.body6:
|
; CHECK: for.body6:
|
||||||
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ]
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC:%.*]], [[FOR_COND4:%.*]] ]
|
||||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]]
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]]
|
||||||
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
|
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
|
||||||
; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]
|
; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]
|
||||||
|
|
Loading…
Reference in New Issue