llvm-project/polly/test/ScopInfo/assume_gep_bounds_2.ll

; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
;
;    void foo(float A[restrict][20], float B[restrict][20], long n, long m,
;             long p) {
;      for (long i = 0; i < n; i++)
;        for (long j = 0; j < m; j++)
;          A[i][j] = i + j;
;      for (long i = 0; i < m; i++)
;        for (long j = 0; j < p; j++)
;          B[i][j] = i + j;
;    }

; This code is within bounds either if m and p are smaller than the array sizes,
; but also if only p is smaller than the size of the second B dimension and n
; is such that the first loop is never executed and consequently A is never
; accessed. In this case the value of m does not matter.

; CHECK:      Assumed Context:
; CHECK-NEXT: [n, m, p] -> {  : p <= 20 and (n <= 0 or (n > 0 and m <= 20)) }

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @foo([20 x float]* noalias %A, [20 x float]* noalias %B, i64 %n, i64 %m, i64 %p) {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.inc5, %entry
  %i.0 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
  %cmp = icmp slt i64 %i.0, %n
  br i1 %cmp, label %for.body, label %for.end7

for.body:                                         ; preds = %for.cond
  br label %for.cond1

for.cond1:                                        ; preds = %for.inc, %for.body
  %j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ]
  %cmp2 = icmp slt i64 %j.0, %m
  br i1 %cmp2, label %for.body3, label %for.end

for.body3:                                        ; preds = %for.cond1
  %add = add nsw i64 %i.0, %j.0
  %conv = sitofp i64 %add to float
  %arrayidx4 = getelementptr inbounds [20 x float], [20 x float]* %A, i64 %i.0, i64 %j.0
  store float %conv, float* %arrayidx4, align 4
  br label %for.inc

for.inc:                                          ; preds = %for.body3
  %inc = add nsw i64 %j.0, 1
  br label %for.cond1

for.end:                                          ; preds = %for.cond1
  br label %for.inc5

for.inc5:                                         ; preds = %for.end
  %inc6 = add nsw i64 %i.0, 1
  br label %for.cond

for.end7:                                         ; preds = %for.cond
  br label %for.cond9

for.cond9:                                        ; preds = %for.inc25, %for.end7
  %i8.0 = phi i64 [ 0, %for.end7 ], [ %inc26, %for.inc25 ]
  %cmp10 = icmp slt i64 %i8.0, %m
  br i1 %cmp10, label %for.body12, label %for.end27

for.body12:                                       ; preds = %for.cond9
  br label %for.cond14

for.cond14:                                       ; preds = %for.inc22, %for.body12
  %j13.0 = phi i64 [ 0, %for.body12 ], [ %inc23, %for.inc22 ]
  %cmp15 = icmp slt i64 %j13.0, %p
  br i1 %cmp15, label %for.body17, label %for.end24

for.body17:                                       ; preds = %for.cond14
  %add18 = add nsw i64 %i8.0, %j13.0
  %conv19 = sitofp i64 %add18 to float
  %arrayidx21 = getelementptr inbounds [20 x float], [20 x float]* %B, i64 %i8.0, i64 %j13.0
  store float %conv19, float* %arrayidx21, align 4
  br label %for.inc22

for.inc22:                                        ; preds = %for.body17
  %inc23 = add nsw i64 %j13.0, 1
  br label %for.cond14

for.end24:                                        ; preds = %for.cond14
  br label %for.inc25

for.inc25:                                        ; preds = %for.end24
  %inc26 = add nsw i64 %i8.0, 1
  br label %for.cond9

for.end27:                                        ; preds = %for.cond9
  ret void
}
tests: Drop -polly-detect-unprofitable and -polly-no-early-exit These flags are now always passed to all tests and need to be disabled if not needed. Disabling these flags, rather than passing them to almost all tests, significantly simplfies our RUN: lines. llvm-svn: 249422 2015-10-06 23:36:44 +08:00			`; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s \| FileCheck %s`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00			`;`
			`; void foo(float A[restrict][20], float B[restrict][20], long n, long m,`
			`; long p) {`
			`; for (long i = 0; i < n; i++)`
			`; for (long j = 0; j < m; j++)`
			`; A[i][j] = i + j;`
			`; for (long i = 0; i < m; i++)`
			`; for (long j = 0; j < p; j++)`
			`; B[i][j] = i + j;`
			`; }`

			`; This code is within bounds either if m and p are smaller than the array sizes,`
			`; but also if only p is smaller than the size of the second B dimension and n`
			`; is such that the first loop is never executed and consequently A is never`
			`; accessed. In this case the value of m does not matter.`

Prepare unit tests for update to ISL 0.16 ISL 0.16 will change how sets are printed which breaks 117 unit tests that text-compare printed sets. This patch re-formats most of these unit tests using a script and small manual editing on top of that. When actually updating ISL, most work is done by just re-running the script to adapt to the changed output. Some tests that compare IR and tests with single CHECK-lines that can be easily updated manually are not included here. The re-format script will also be committed afterwards. The per-test formatter invocation command lines options will not be added in the near future because it is ad hoc and would overwrite the manual edits. Ideally it also shouldn't be required anymore because ISL's set printing has become more stable in 0.16. Differential Revision: http://reviews.llvm.org/D16095 llvm-svn: 257851 2016-01-15 08:48:42 +08:00			`; CHECK: Assumed Context:`
Update to ISL 0.16.1 llvm-svn: 257898 2016-01-15 23:54:45 +08:00			`; CHECK-NEXT: [n, m, p] -> { : p <= 20 and (n <= 0 or (n > 0 and m <= 20)) }`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00
			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`

			`define void @foo([20 x float]* noalias %A, [20 x float]* noalias %B, i64 %n, i64 %m, i64 %p) {`
			`entry:`
			`br label %for.cond`

			`for.cond: ; preds = %for.inc5, %entry`
			`%i.0 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]`
			`%cmp = icmp slt i64 %i.0, %n`
			`br i1 %cmp, label %for.body, label %for.end7`

			`for.body: ; preds = %for.cond`
			`br label %for.cond1`

			`for.cond1: ; preds = %for.inc, %for.body`
			`%j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ]`
			`%cmp2 = icmp slt i64 %j.0, %m`
			`br i1 %cmp2, label %for.body3, label %for.end`

			`for.body3: ; preds = %for.cond1`
			`%add = add nsw i64 %i.0, %j.0`
			`%conv = sitofp i64 %add to float`
Update Polly tests to handle explicitly typed gep changes in LLVM llvm-svn: 230784 2015-02-28 03:20:19 +08:00			`%arrayidx4 = getelementptr inbounds [20 x float], [20 x float]* %A, i64 %i.0, i64 %j.0`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00			`store float %conv, float* %arrayidx4, align 4`
			`br label %for.inc`

			`for.inc: ; preds = %for.body3`
			`%inc = add nsw i64 %j.0, 1`
			`br label %for.cond1`

			`for.end: ; preds = %for.cond1`
			`br label %for.inc5`

			`for.inc5: ; preds = %for.end`
			`%inc6 = add nsw i64 %i.0, 1`
			`br label %for.cond`

			`for.end7: ; preds = %for.cond`
			`br label %for.cond9`

			`for.cond9: ; preds = %for.inc25, %for.end7`
			`%i8.0 = phi i64 [ 0, %for.end7 ], [ %inc26, %for.inc25 ]`
			`%cmp10 = icmp slt i64 %i8.0, %m`
			`br i1 %cmp10, label %for.body12, label %for.end27`

			`for.body12: ; preds = %for.cond9`
			`br label %for.cond14`

			`for.cond14: ; preds = %for.inc22, %for.body12`
			`%j13.0 = phi i64 [ 0, %for.body12 ], [ %inc23, %for.inc22 ]`
			`%cmp15 = icmp slt i64 %j13.0, %p`
			`br i1 %cmp15, label %for.body17, label %for.end24`

			`for.body17: ; preds = %for.cond14`
			`%add18 = add nsw i64 %i8.0, %j13.0`
			`%conv19 = sitofp i64 %add18 to float`
Update Polly tests to handle explicitly typed gep changes in LLVM llvm-svn: 230784 2015-02-28 03:20:19 +08:00			`%arrayidx21 = getelementptr inbounds [20 x float], [20 x float]* %B, i64 %i8.0, i64 %j13.0`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00			`store float %conv19, float* %arrayidx21, align 4`
			`br label %for.inc22`

			`for.inc22: ; preds = %for.body17`
			`%inc23 = add nsw i64 %j13.0, 1`
			`br label %for.cond14`

			`for.end24: ; preds = %for.cond14`
			`br label %for.inc25`

			`for.inc25: ; preds = %for.end24`
			`%inc26 = add nsw i64 %i8.0, 1`
			`br label %for.cond9`

			`for.end27: ; preds = %for.cond9`
			`ret void`
			`}`