forked from OSchip/llvm-project
[LoopPred] Generalize profitability check to handle unswitch output
Unswitch (and other loop transforms) like to generate loop exit blocks with unconditional successors, and phi nodes (LCSSA, or simple multiple exiting blocks sharing an exit). Generalize the "likely very rare exit" check slightly to handle this form.
This commit is contained in:
parent
cf823ce4ad
commit
f3eb5dee57
|
@ -1019,6 +1019,17 @@ static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
|
|||
return SE.getUMinFromMismatchedTypes(ExitCounts);
|
||||
}
|
||||
|
||||
/// Return true if we can be fairly sure that executing block BB will probably
|
||||
/// lead to executing an __llvm_deoptimize. This is a profitability heuristic,
|
||||
/// not a legality constraint.
|
||||
static bool isVeryLikelyToDeopt(BasicBlock *BB) {
|
||||
while (BB->getUniqueSuccessor())
|
||||
// Will skip side effects, that's okay
|
||||
BB = BB->getUniqueSuccessor();
|
||||
|
||||
return BB->getTerminatingDeoptimizeCall();
|
||||
}
|
||||
|
||||
/// This implements an analogous, but entirely distinct transform from the main
|
||||
/// loop predication transform. This one is phrased in terms of using a
|
||||
/// widenable branch *outside* the loop to allow us to simplify loop exits in a
|
||||
|
@ -1109,7 +1120,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
|
|||
|
||||
const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
|
||||
BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
|
||||
if (!ExitBB->getTerminatingDeoptimizeCall())
|
||||
if (!isVeryLikelyToDeopt(ExitBB))
|
||||
// Profitability: indicator of rarely/never taken exit
|
||||
continue;
|
||||
|
||||
|
|
|
@ -675,11 +675,92 @@ exit:
|
|||
}
|
||||
|
||||
|
||||
;; Unswitch likes to produce some ugly exit blocks without simplifications
|
||||
;; being applied. Make sure we can handle that form.
|
||||
define i32 @unswitch_exit_form(i32* %array, i32 %length, i32 %n, i1 %cond_0) {
|
||||
; CHECK-LABEL: @unswitch_exit_form(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
|
||||
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]]
|
||||
; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]]
|
||||
; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0
|
||||
; CHECK: deopt.loopexit:
|
||||
; CHECK-NEXT: br label [[DEOPT]]
|
||||
; CHECK: deopt:
|
||||
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[DEOPT_LOOPEXIT:%.*]] ]
|
||||
; CHECK-NEXT: call void @unknown()
|
||||
; CHECK-NEXT: br label [[ACTUAL_DEOPT:%.*]]
|
||||
; CHECK: actual_deopt:
|
||||
; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[PHI]]) ]
|
||||
; CHECK-NEXT: ret i32 [[DEOPTRET]]
|
||||
; CHECK: loop.preheader:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
|
||||
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
|
||||
; CHECK-NEXT: call void @unknown()
|
||||
; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
|
||||
; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT]], !prof !0
|
||||
; CHECK: guarded:
|
||||
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
|
||||
; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
|
||||
; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
|
||||
; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]]
|
||||
; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
|
||||
; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
|
||||
; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
|
||||
; CHECK-NEXT: ret i32 [[RESULT]]
|
||||
;
|
||||
entry:
|
||||
%widenable_cond = call i1 @llvm.experimental.widenable.condition()
|
||||
%exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
|
||||
br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0
|
||||
|
||||
deopt:
|
||||
;; This is written to look like an unsimplified loop exit after unswitch
|
||||
;; (i.e. phis, merge, and branch to actual block)
|
||||
%phi = phi i32 [0, %entry], [1, %loop]
|
||||
call void @unknown() ;; it's okay to skip possible throws
|
||||
br label %actual_deopt
|
||||
|
||||
actual_deopt:
|
||||
%deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %phi) ]
|
||||
ret i32 %deoptret
|
||||
|
||||
loop.preheader:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
|
||||
%i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
|
||||
call void @unknown()
|
||||
%within.bounds = icmp ult i32 %i, %length
|
||||
br i1 %within.bounds, label %guarded, label %deopt, !prof !0
|
||||
|
||||
guarded:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
|
||||
%array.i = load i32, i32* %array.i.ptr, align 4
|
||||
store i32 0, i32* %array.i.ptr
|
||||
%loop.acc.next = add i32 %loop.acc, %array.i
|
||||
%i.next = add nuw i32 %i, 1
|
||||
%continue = icmp ult i32 %i.next, %n
|
||||
br i1 %continue, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
%result = phi i32 [ %loop.acc.next, %guarded ]
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; TODO: Non-latch exits can still be predicated
|
||||
; This is currently prevented by an overly restrictive profitability check.
|
||||
|
|
Loading…
Reference in New Issue