[LoopPred] Generalize profitability check to handle unswitch output

Unswitch (and other loop transforms) like to generate loop exit blocks with unconditional successors, and phi nodes (LCSSA, or simple multiple exiting blocks sharing an exit).  Generalize the "likely very rare exit" check slightly to handle this form.
This commit is contained in:
Philip Reames 2019-11-19 14:06:36 -08:00
parent cf823ce4ad
commit f3eb5dee57
2 changed files with 93 additions and 1 deletions

View File

@ -1019,6 +1019,17 @@ static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
return SE.getUMinFromMismatchedTypes(ExitCounts);
}
/// Return true if we can be fairly sure that executing block BB will probably
/// lead to executing an __llvm_deoptimize. This is a profitability heuristic,
/// not a legality constraint.
static bool isVeryLikelyToDeopt(BasicBlock *BB) {
while (BB->getUniqueSuccessor())
// Will skip side effects, that's okay
BB = BB->getUniqueSuccessor();
return BB->getTerminatingDeoptimizeCall();
}
/// This implements an analogous, but entirely distinct transform from the main
/// loop predication transform. This one is phrased in terms of using a
/// widenable branch *outside* the loop to allow us to simplify loop exits in a
@ -1109,7 +1120,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
if (!ExitBB->getTerminatingDeoptimizeCall())
if (!isVeryLikelyToDeopt(ExitBB))
// Profitability: indicator of rarely/never taken exit
continue;

View File

@ -675,11 +675,92 @@ exit:
}
;; Unswitch likes to produce some ugly exit blocks without simplifications
;; being applied. Make sure we can handle that form.
define i32 @unswitch_exit_form(i32* %array, i32 %length, i32 %n, i1 %cond_0) {
; CHECK-LABEL: @unswitch_exit_form(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1
; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]]
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]]
; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]]
; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0
; CHECK: deopt.loopexit:
; CHECK-NEXT: br label [[DEOPT]]
; CHECK: deopt:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[DEOPT_LOOPEXIT:%.*]] ]
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: br label [[ACTUAL_DEOPT:%.*]]
; CHECK: actual_deopt:
; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[PHI]]) ]
; CHECK-NEXT: ret i32 [[DEOPTRET]]
; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT]], !prof !0
; CHECK: guarded:
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]]
; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
; CHECK-NEXT: ret i32 [[RESULT]]
;
entry:
%widenable_cond = call i1 @llvm.experimental.widenable.condition()
%exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0
deopt:
;; This is written to look like an unsimplified loop exit after unswitch
;; (i.e. phis, merge, and branch to actual block)
%phi = phi i32 [0, %entry], [1, %loop]
call void @unknown() ;; it's okay to skip possible throws
br label %actual_deopt
actual_deopt:
%deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %phi) ]
ret i32 %deoptret
loop.preheader:
br label %loop
loop:
%loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
call void @unknown()
%within.bounds = icmp ult i32 %i, %length
br i1 %within.bounds, label %guarded, label %deopt, !prof !0
guarded:
%i.i64 = zext i32 %i to i64
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
%array.i = load i32, i32* %array.i.ptr, align 4
store i32 0, i32* %array.i.ptr
%loop.acc.next = add i32 %loop.acc, %array.i
%i.next = add nuw i32 %i, 1
%continue = icmp ult i32 %i.next, %n
br i1 %continue, label %loop, label %exit
exit:
%result = phi i32 [ %loop.acc.next, %guarded ]
ret i32 %result
}
; TODO: Non-latch exits can still be predicated
; This is currently prevented by an overly restrictive profitability check.