llvm-project/llvm/test/Transforms/LoopInterchange/currentLimitation.ll

; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
; RUN:   -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
 
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x [100 x i32]]] zeroinitializer
@C = common global [100 x [100 x i64]] zeroinitializer
 
;;--------------------------------------Test case 01------------------------------------
;; [FIXME] This loop though valid is currently not interchanged due to the limitation that we cannot split the inner loop latch due to multiple use of inner induction
;; variable.(used to increment the loop counter and to access A[j+1][i+1]
;;  for(int i=0;i<N-1;i++)
;;    for(int j=1;j<N-1;j++)
;;      A[j+1][i+1] = A[j+1][i+1] + k;

; FIXME: Currently fails because of DA changes.
; IR-LABEL: @interchange_01
; IR-NOT: split

; CHECK:      Name:            Dependence
; CHECK-NEXT: Function:        interchange_01

define void @interchange_01(i32 %k, i32 %N) {
 entry:
   %sub = add nsw i32 %N, -1
   %cmp26 = icmp sgt i32 %N, 1
   br i1 %cmp26, label %for.cond1.preheader.lr.ph, label %for.end17
 
 for.cond1.preheader.lr.ph:
   %cmp324 = icmp sgt i32 %sub, 1
   %0 = add i32 %N, -2
   %1 = sext i32 %sub to i64
   br label %for.cond1.preheader
 
 for.cond.loopexit:
   %cmp = icmp slt i64 %indvars.iv.next29, %1
   br i1 %cmp, label %for.cond1.preheader, label %for.end17
 
 for.cond1.preheader:
   %indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.cond.loopexit ]
   %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
   br i1 %cmp324, label %for.body4, label %for.cond.loopexit
 
 for.body4:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.cond1.preheader ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29
   %2 = load i32, i32* %arrayidx7
   %add8 = add nsw i32 %2, %k
   store i32 %add8, i32* %arrayidx7
   %lftr.wideiv = trunc i64 %indvars.iv to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %0
   br i1 %exitcond, label %for.cond.loopexit, label %for.body4
 
 for.end17: 
   ret void
}

; When currently cannot interchange this loop, because transform currently
; expects the latches to be the exiting blocks too.

; IR-LABEL: @interchange_02
; IR-NOT: split
;
; CHECK:      Name:            ExitingNotLatch
; CHECK-NEXT: Function:        interchange_02
define void @interchange_02(i64 %k, i64 %N) {
entry:
  br label %for1.header

for1.header:
  %j23 = phi i64 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
  br label %for2

for2:
  %j = phi i64 [ %j.next, %latch ], [ 0, %for1.header ]
  %arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @C, i64 0, i64 %j, i64 %j23
  %lv = load i64, i64* %arrayidx5
  %add = add nsw i64 %lv, %k
  store i64 %add, i64* %arrayidx5
  %exitcond = icmp eq i64 %j, 99
  br i1 %exitcond, label %for1.inc10, label %latch
latch:
  %j.next = add nuw nsw i64 %j, 1
  br label %for2

for1.inc10:
  %j.next24 = add nuw nsw i64 %j23, 1
  %exitcond26 = icmp eq i64 %j23, 99
  br i1 %exitcond26, label %for.end12, label %for1.header

for.end12:
  ret void
}
[LoopInterchange] Use getExitBlock()/getExitingBlock instead of manual impl. This also means we have to check if the latch is the exiting block now, as `transform` expects the latches to be the exiting blocks too. https://bugs.llvm.org/show_bug.cgi?id=36586 Reviewers: efriedma, davide, karthikthecool Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D45279 llvm-svn: 330806 2018-04-25 17:35:54 +08:00			`; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \`
			`; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S \| FileCheck -check-prefix=IR %s`
			`; RUN: FileCheck --input-file=%t %s`
Add a new pass "Loop Interchange" This pass interchanges loops to provide a more cache-friendly memory access. For e.g. given a loop like - for(int i=0;i<N;i++) for(int j=0;j<N;j++) A[j][i] = A[j][i]+B[j][i]; is interchanged to - for(int j=0;j<N;j++) for(int i=0;i<N;i++) A[j][i] = A[j][i]+B[j][i]; This pass is currently disabled by default. To give a brief introduction it consists of 3 stages- LoopInterchangeLegality : Checks the legality of loop interchange based on Dependency matrix. LoopInterchangeProfitability: A very basic heuristic has been added to check for profitibility. This will evolve over time. LoopInterchangeTransform : Which does the actual transform. LNT Performance tests shows improvement in Polybench/linear-algebra/kernels/mvt and Polybench/linear-algebra/kernels/gemver becnmarks. TODO: 1) Add support for reductions and lcssa phi. 2) Improve profitability model. 3) Improve loop selection algorithm to select best loop for interchange. Currently the innermost loop is selected for interchange. 4) Improve compile time regression found in llvm lnt due to this pass. 5) Fix issues in Dependency Analysis module. A special thanks to Hal for reviewing this code. Review: http://reviews.llvm.org/D7499 llvm-svn: 231458 2015-03-06 18:11:25 +08:00
			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`
			`target triple = "x86_64-unknown-linux-gnu"`

			`@A = common global [100 x [100 x i32]] zeroinitializer`
			`@B = common global [100 x [100 x [100 x i32]]] zeroinitializer`
[LoopInterchange] Use getExitBlock()/getExitingBlock instead of manual impl. This also means we have to check if the latch is the exiting block now, as `transform` expects the latches to be the exiting blocks too. https://bugs.llvm.org/show_bug.cgi?id=36586 Reviewers: efriedma, davide, karthikthecool Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D45279 llvm-svn: 330806 2018-04-25 17:35:54 +08:00			`@C = common global [100 x [100 x i64]] zeroinitializer`
Add a new pass "Loop Interchange" This pass interchanges loops to provide a more cache-friendly memory access. For e.g. given a loop like - for(int i=0;i<N;i++) for(int j=0;j<N;j++) A[j][i] = A[j][i]+B[j][i]; is interchanged to - for(int j=0;j<N;j++) for(int i=0;i<N;i++) A[j][i] = A[j][i]+B[j][i]; This pass is currently disabled by default. To give a brief introduction it consists of 3 stages- LoopInterchangeLegality : Checks the legality of loop interchange based on Dependency matrix. LoopInterchangeProfitability: A very basic heuristic has been added to check for profitibility. This will evolve over time. LoopInterchangeTransform : Which does the actual transform. LNT Performance tests shows improvement in Polybench/linear-algebra/kernels/mvt and Polybench/linear-algebra/kernels/gemver becnmarks. TODO: 1) Add support for reductions and lcssa phi. 2) Improve profitability model. 3) Improve loop selection algorithm to select best loop for interchange. Currently the innermost loop is selected for interchange. 4) Improve compile time regression found in llvm lnt due to this pass. 5) Fix issues in Dependency Analysis module. A special thanks to Hal for reviewing this code. Review: http://reviews.llvm.org/D7499 llvm-svn: 231458 2015-03-06 18:11:25 +08:00
			`;;--------------------------------------Test case 01------------------------------------`
			`;; [FIXME] This loop though valid is currently not interchanged due to the limitation that we cannot split the inner loop latch due to multiple use of inner induction`
			`;; variable.(used to increment the loop counter and to access A[j+1][i+1]`
			`;; for(int i=0;i<N-1;i++)`
			`;; for(int j=1;j<N-1;j++)`
			`;; A[j+1][i+1] = A[j+1][i+1] + k;`

[LoopInterchange] Use getExitBlock()/getExitingBlock instead of manual impl. This also means we have to check if the latch is the exiting block now, as `transform` expects the latches to be the exiting blocks too. https://bugs.llvm.org/show_bug.cgi?id=36586 Reviewers: efriedma, davide, karthikthecool Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D45279 llvm-svn: 330806 2018-04-25 17:35:54 +08:00			`; FIXME: Currently fails because of DA changes.`
			`; IR-LABEL: @interchange_01`
			`; IR-NOT: split`

			`; CHECK: Name: Dependence`
			`; CHECK-NEXT: Function: interchange_01`
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3i - 6] = i; ;; B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837 2018-03-07 05:55:59 +08:00
Add a new pass "Loop Interchange" This pass interchanges loops to provide a more cache-friendly memory access. For e.g. given a loop like - for(int i=0;i<N;i++) for(int j=0;j<N;j++) A[j][i] = A[j][i]+B[j][i]; is interchanged to - for(int j=0;j<N;j++) for(int i=0;i<N;i++) A[j][i] = A[j][i]+B[j][i]; This pass is currently disabled by default. To give a brief introduction it consists of 3 stages- LoopInterchangeLegality : Checks the legality of loop interchange based on Dependency matrix. LoopInterchangeProfitability: A very basic heuristic has been added to check for profitibility. This will evolve over time. LoopInterchangeTransform : Which does the actual transform. LNT Performance tests shows improvement in Polybench/linear-algebra/kernels/mvt and Polybench/linear-algebra/kernels/gemver becnmarks. TODO: 1) Add support for reductions and lcssa phi. 2) Improve profitability model. 3) Improve loop selection algorithm to select best loop for interchange. Currently the innermost loop is selected for interchange. 4) Improve compile time regression found in llvm lnt due to this pass. 5) Fix issues in Dependency Analysis module. A special thanks to Hal for reviewing this code. Review: http://reviews.llvm.org/D7499 llvm-svn: 231458 2015-03-06 18:11:25 +08:00			`define void @interchange_01(i32 %k, i32 %N) {`
			`entry:`
			`%sub = add nsw i32 %N, -1`
			`%cmp26 = icmp sgt i32 %N, 1`
			`br i1 %cmp26, label %for.cond1.preheader.lr.ph, label %for.end17`

			`for.cond1.preheader.lr.ph:`
			`%cmp324 = icmp sgt i32 %sub, 1`
			`%0 = add i32 %N, -2`
			`%1 = sext i32 %sub to i64`
			`br label %for.cond1.preheader`

			`for.cond.loopexit:`
			`%cmp = icmp slt i64 %indvars.iv.next29, %1`
			`br i1 %cmp, label %for.cond1.preheader, label %for.end17`

			`for.cond1.preheader:`
			`%indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.cond.loopexit ]`
			`%indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1`
			`br i1 %cmp324, label %for.body4, label %for.cond.loopexit`

			`for.body4:`
			`%indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.cond1.preheader ]`
			`%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1`
			`%arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29`
			`%2 = load i32, i32* %arrayidx7`
			`%add8 = add nsw i32 %2, %k`
			`store i32 %add8, i32* %arrayidx7`
			`%lftr.wideiv = trunc i64 %indvars.iv to i32`
			`%exitcond = icmp eq i32 %lftr.wideiv, %0`
			`br i1 %exitcond, label %for.cond.loopexit, label %for.body4`

			`for.end17:`
			`ret void`
			`}`
[LoopInterchange] Use getExitBlock()/getExitingBlock instead of manual impl. This also means we have to check if the latch is the exiting block now, as `transform` expects the latches to be the exiting blocks too. https://bugs.llvm.org/show_bug.cgi?id=36586 Reviewers: efriedma, davide, karthikthecool Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D45279 llvm-svn: 330806 2018-04-25 17:35:54 +08:00
			`; When currently cannot interchange this loop, because transform currently`
			`; expects the latches to be the exiting blocks too.`

			`; IR-LABEL: @interchange_02`
			`; IR-NOT: split`
			`;`
			`; CHECK: Name: ExitingNotLatch`
			`; CHECK-NEXT: Function: interchange_02`
			`define void @interchange_02(i64 %k, i64 %N) {`
			`entry:`
			`br label %for1.header`

			`for1.header:`
			`%j23 = phi i64 [ 0, %entry ], [ %j.next24, %for1.inc10 ]`
			`br label %for2`

			`for2:`
			`%j = phi i64 [ %j.next, %latch ], [ 0, %for1.header ]`
			`%arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @C, i64 0, i64 %j, i64 %j23`
			`%lv = load i64, i64* %arrayidx5`
			`%add = add nsw i64 %lv, %k`
			`store i64 %add, i64* %arrayidx5`
			`%exitcond = icmp eq i64 %j, 99`
			`br i1 %exitcond, label %for1.inc10, label %latch`
			`latch:`
			`%j.next = add nuw nsw i64 %j, 1`
			`br label %for2`

			`for1.inc10:`
			`%j.next24 = add nuw nsw i64 %j23, 1`
			`%exitcond26 = icmp eq i64 %j23, 99`
			`br i1 %exitcond26, label %for.end12, label %for1.header`

			`for.end12:`
			`ret void`
			`}`