llvm-project/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll

103 lines
2.9 KiB
LLVM
Raw Normal View History

; Check that we don't crash on corner cases.
[Unroll] Rework the naming and structure of the new unroll heuristics. The new naming is (to me) much easier to understand. Here is a summary of the new state of the world: - '*Threshold' is the threshold for full unrolling. It is measured against the estimated unrolled cost as computed by getUserCost in TTI (or CodeMetrics, etc). We will exceed this threshold when unrolling loops where unrolling exposes a significant degree of simplification of the logic within the loop. - '*PercentDynamicCostSavedThreshold' is the percentage of the loop's estimated dynamic execution cost which needs to be saved by unrolling to apply a discount to the estimated unrolled cost. - '*DynamicCostSavingsDiscount' is the discount applied to the estimated unrolling cost when the dynamic savings are expected to be high. When actually analyzing the loop, we now produce both an estimated unrolled cost, and an estimated rolled cost. The rolled cost is notably a dynamic estimate based on our analysis of the expected execution of each iteration. While we're still working to build up the infrastructure for making these estimates, to me it is much more clear *how* to make them better when they have reasonably descriptive names. For example, we may want to apply estimated (from heuristics or profiles) dynamic execution weights to the *dynamic* cost estimates. If we start doing that, we would also need to track the static unrolled cost and the dynamic unrolled cost, as only the latter could reasonably be weighted by profile information. This patch is sadly not without functionality change for the new unroll analysis logic. Buried in the heuristic management were several things that surprised me. For example, we never subtracted the optimized instruction count off when comparing against the unroll heursistics! I don't know if this just got lost somewhere along the way or what, but with the new accounting of things, this is much easier to keep track of and we use the post-simplification cost estimate to compare to the thresholds, and use the dynamic cost reduction ratio to select whether we can exceed the baseline threshold. The old values of these flags also don't necessarily make sense. My impression is that none of these thresholds or discounts have been tuned yet, and so they're just arbitrary placehold numbers. As such, I've not bothered to adjust for the fact that this is now a discount and not a tow-tier threshold model. We need to tune all these values once the logic is ready to be enabled. Differential Revision: http://reviews.llvm.org/D9966 llvm-svn: 239164
2015-06-06 01:01:43 +08:00
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
define void @foo1() {
entry:
br label %for.body
for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%idx = zext i32 undef to i64
%add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
br i1 %cmp, label %for.body, label %for.exit
for.exit:
ret void
}
define void @foo2() {
entry:
br label %for.body
for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
br i1 %cmp, label %for.body, label %for.exit
for.exit:
ret void
}
define void @cmp_undef() {
entry:
br label %for.body
for.body: ; preds = %for.inc, %entry
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%cmp = icmp eq i32 %x1, undef
br i1 %cmp, label %if.then, label %for.inc
if.then: ; preds = %for.body
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%iv.1 = add nuw nsw i64 %iv.0, 1
%exitcond = icmp eq i64 %iv.1, 10
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.inc
ret void
}
define void @switch() {
entry:
br label %for.body
for.body:
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
switch i32 %x1, label %l1 [
]
l1:
%x2 = add i32 %x1, 2
br label %for.inc
for.inc:
%iv.1 = add nuw nsw i64 %iv.0, 1
%exitcond = icmp eq i64 %iv.1, 10
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define <4 x i32> @vec_load() {
entry:
br label %for.body
for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
%bc = bitcast i32* %arrayidx to <4 x i32>*
%x = load <4 x i32>, < 4 x i32>* %bc, align 4
%r = add <4 x i32> %x, %vec_phi
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
br i1 %cmp, label %for.body, label %for.exit
for.exit:
ret <4 x i32> %r
}