llvm-project/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll

; Check that we don't crash on corner cases.
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16

define void @foo1() {
entry:
  br label %for.body

for.body:
  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %idx = zext i32 undef to i64
  %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
  %inc = add nuw nsw i64 %phi, 1
  %cmp = icmp ult i64 %inc, 999
  br i1 %cmp, label %for.body, label %for.exit

for.exit:
  ret void
}

define void @foo2() {
entry:
  br label %for.body

for.body:
  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %inc = add nuw nsw i64 %phi, 1
  %cmp = icmp ult i64 %inc, 999
  br i1 %cmp, label %for.body, label %for.exit

for.exit:
  ret void
}

define void @cmp_undef() {
entry:
  br label %for.body

for.body:                                         ; preds = %for.inc, %entry
  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
  %x1 = load i32, i32* %arrayidx1, align 4
  %cmp = icmp eq i32 %x1, undef
  br i1 %cmp, label %if.then, label %for.inc

if.then:                                          ; preds = %for.body
  br label %for.inc

for.inc:                                          ; preds = %for.body, %if.then
  %iv.1 = add nuw nsw i64 %iv.0, 1
  %exitcond = icmp eq i64 %iv.1, 10
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.inc
  ret void
}

define void @switch() {
entry:
  br label %for.body

for.body:
  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
  %x1 = load i32, i32* %arrayidx1, align 4
  switch i32 %x1, label %l1 [
  ]

l1:
  %x2 = add i32 %x1, 2
  br label %for.inc

for.inc:
  %iv.1 = add nuw nsw i64 %iv.0, 1
  %exitcond = icmp eq i64 %iv.1, 10
  br i1 %exitcond, label %for.end, label %for.body

for.end:
  ret void
}

define <4 x i32> @vec_load() {
entry:
  br label %for.body

for.body:
  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
  %bc = bitcast i32* %arrayidx to <4 x i32>*
  %x = load <4 x i32>, < 4 x i32>* %bc, align 4
  %r = add <4 x i32> %x, %vec_phi
  %inc = add nuw nsw i64 %phi, 1
  %cmp = icmp ult i64 %inc, 999
  br i1 %cmp, label %for.body, label %for.exit

for.exit:
  ret <4 x i32> %r
}
Reimplement heuristic for estimating complete-unroll optimization effects. Summary: This patch reimplements heuristic that tries to estimate optimization beneftis from complete loop unrolling. In this patch I kept the minimal changes - e.g. I removed code handling branches and folding compares. That's a promising area, but now there are too many questions to discuss before we can enable it. Test Plan: Tests are included in the patch. Reviewers: hfinkel, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8816 llvm-svn: 237156 2015-05-13 01:20:03 +08:00			`; Check that we don't crash on corner cases.`
[Unroll] Rework the naming and structure of the new unroll heuristics. The new naming is (to me) much easier to understand. Here is a summary of the new state of the world: - 'Threshold' is the threshold for full unrolling. It is measured against the estimated unrolled cost as computed by getUserCost in TTI (or CodeMetrics, etc). We will exceed this threshold when unrolling loops where unrolling exposes a significant degree of simplification of the logic within the loop. - 'PercentDynamicCostSavedThreshold' is the percentage of the loop's estimated dynamic execution cost which needs to be saved by unrolling to apply a discount to the estimated unrolled cost. - 'DynamicCostSavingsDiscount' is the discount applied to the estimated unrolling cost when the dynamic savings are expected to be high. When actually analyzing the loop, we now produce both an estimated unrolled cost, and an estimated rolled cost. The rolled cost is notably a dynamic estimate based on our analysis of the expected execution of each iteration. While we're still working to build up the infrastructure for making these estimates, to me it is much more clear how* to make them better when they have reasonably descriptive names. For example, we may want to apply estimated (from heuristics or profiles) dynamic execution weights to the dynamic cost estimates. If we start doing that, we would also need to track the static unrolled cost and the dynamic unrolled cost, as only the latter could reasonably be weighted by profile information. This patch is sadly not without functionality change for the new unroll analysis logic. Buried in the heuristic management were several things that surprised me. For example, we never subtracted the optimized instruction count off when comparing against the unroll heursistics! I don't know if this just got lost somewhere along the way or what, but with the new accounting of things, this is much easier to keep track of and we use the post-simplification cost estimate to compare to the thresholds, and use the dynamic cost reduction ratio to select whether we can exceed the baseline threshold. The old values of these flags also don't necessarily make sense. My impression is that none of these thresholds or discounts have been tuned yet, and so they're just arbitrary placehold numbers. As such, I've not bothered to adjust for the fact that this is now a discount and not a tow-tier threshold model. We need to tune all these values once the logic is ready to be enabled. Differential Revision: http://reviews.llvm.org/D9966 llvm-svn: 239164 2015-06-06 01:01:43 +08:00			`; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null`
Reimplement heuristic for estimating complete-unroll optimization effects. Summary: This patch reimplements heuristic that tries to estimate optimization beneftis from complete loop unrolling. In this patch I kept the minimal changes - e.g. I removed code handling branches and folding compares. That's a promising area, but now there are too many questions to discuss before we can enable it. Test Plan: Tests are included in the patch. Reviewers: hfinkel, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8816 llvm-svn: 237156 2015-05-13 01:20:03 +08:00			`target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"`

[Unroll] Don't crash when simplified branch condition is undef. llvm-svn: 243544 2015-07-30 02:10:29 +08:00			`@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16`

Reimplement heuristic for estimating complete-unroll optimization effects. Summary: This patch reimplements heuristic that tries to estimate optimization beneftis from complete loop unrolling. In this patch I kept the minimal changes - e.g. I removed code handling branches and folding compares. That's a promising area, but now there are too many questions to discuss before we can enable it. Test Plan: Tests are included in the patch. Reviewers: hfinkel, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8816 llvm-svn: 237156 2015-05-13 01:20:03 +08:00			`define void @foo1() {`
			`entry:`
			`br label %for.body`

			`for.body:`
			`%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]`
			`%idx = zext i32 undef to i64`
			`%add.ptr = getelementptr inbounds i64, i64* null, i64 %idx`
			`%inc = add nuw nsw i64 %phi, 1`
			`%cmp = icmp ult i64 %inc, 999`
			`br i1 %cmp, label %for.body, label %for.exit`

			`for.exit:`
			`ret void`
			`}`

			`define void @foo2() {`
			`entry:`
			`br label %for.body`

			`for.body:`
			`%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]`
			`%x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>`
			`%inc = add nuw nsw i64 %phi, 1`
			`%cmp = icmp ult i64 %inc, 999`
			`br i1 %cmp, label %for.body, label %for.exit`

			`for.exit:`
			`ret void`
			`}`
[Unroll] Don't crash when simplified branch condition is undef. llvm-svn: 243544 2015-07-30 02:10:29 +08:00
			`define void @cmp_undef() {`
			`entry:`
			`br label %for.body`

			`for.body: ; preds = %for.inc, %entry`
			`%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]`
			`%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0`
			`%x1 = load i32, i32* %arrayidx1, align 4`
			`%cmp = icmp eq i32 %x1, undef`
			`br i1 %cmp, label %if.then, label %for.inc`

			`if.then: ; preds = %for.body`
			`br label %for.inc`

			`for.inc: ; preds = %for.body, %if.then`
			`%iv.1 = add nuw nsw i64 %iv.0, 1`
			`%exitcond = icmp eq i64 %iv.1, 10`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.inc`
			`ret void`
			`}`
[Unroll] Handle SwitchInst properly. Previously successor selection was simply wrong. llvm-svn: 243545 2015-07-30 02:10:33 +08:00
			`define void @switch() {`
			`entry:`
			`br label %for.body`

			`for.body:`
			`%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]`
			`%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0`
			`%x1 = load i32, i32* %arrayidx1, align 4`
			`switch i32 %x1, label %l1 [`
			`]`

			`l1:`
			`%x2 = add i32 %x1, 2`
			`br label %for.inc`

			`for.inc:`
			`%iv.1 = add nuw nsw i64 %iv.0, 1`
			`%exitcond = icmp eq i64 %iv.1, 10`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end:`
			`ret void`
			`}`
[Unroll] Do not crash trying to propagate a value to vector load. llvm-svn: 248333 2015-09-23 06:27:12 +08:00
			`define <4 x i32> @vec_load() {`
			`entry:`
			`br label %for.body`

			`for.body:`
			`%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]`
			`%vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]`
			`%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi`
			`%bc = bitcast i32* %arrayidx to <4 x i32>*`
			`%x = load <4 x i32>, < 4 x i32>* %bc, align 4`
			`%r = add <4 x i32> %x, %vec_phi`
			`%inc = add nuw nsw i64 %phi, 1`
			`%cmp = icmp ult i64 %inc, 999`
			`br i1 %cmp, label %for.body, label %for.exit`

			`for.exit:`
			`ret <4 x i32> %r`
			`}`