llvm-project/polly/test/ScopInfo/multidim_fortran_2d_params.ll

; RUN: opt %loadPolly -polly-scops -analyze \
; RUN: -polly-precise-fold-accesses \
; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s

;   subroutine init_array(ni, nj, pi, pj, a)
;   implicit none

;   double precision, dimension(nj, ni) :: a
;   integer*8 :: ni, nj
;   integer*8 :: pi, pj
;   integer*8 :: i, j

;   do i = 1, ni
;     do j = 1, nj
;       a(j + pi, i + pj) = i + j
;     end do
;   end do
;   end subroutine

; CHECK: [tmp9, nj_loaded2, tmp20, tmp19] -> { Stmt_bb17[i0, i1] -> MemRef_a[-1 + tmp20 + i0, nj_loaded2 + tmp19 + i1] : i1 < -tmp19; Stmt_bb17[i0, i1] -> MemRef_a[tmp20 + i0, tmp19 + i1] : i1 >= -tmp19 };


target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"

module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22"

; Function Attrs: nounwind uwtable
define void @blam(i64* noalias %arg, i64* noalias %nj, i64* noalias %arg2, i64* noalias %arg3, [0 x double]* noalias %a) unnamed_addr #0 {
bb:
  br label %bb5

bb5:                                              ; preds = %bb
  %nj_loaded = load i64, i64* %nj, align 8
  %tmp6 = icmp slt i64 %nj_loaded, 0
  %tmp7 = select i1 %tmp6, i64 0, i64 %nj_loaded
  %tmp8 = xor i64 %tmp7, -1
  %tmp9 = load i64, i64* %arg, align 8
  %tmp10 = icmp sgt i64 %tmp9, 0
  br i1 %tmp10, label %bb11, label %bb36

bb11:                                             ; preds = %bb5
  br label %bb12

bb12:                                             ; preds = %bb32, %bb11
  %tmp13 = phi i64 [ %tmp34, %bb32 ], [ 1, %bb11 ]
  %nj_loaded2 = load i64, i64* %nj, align 8
  %tmp15 = icmp sgt i64 %nj_loaded2, 0
  br i1 %tmp15, label %bb16, label %bb32

bb16:                                             ; preds = %bb12
  br label %bb17

bb17:                                             ; preds = %bb17, %bb16
  %tmp18 = phi i64 [ %tmp30, %bb17 ], [ 1, %bb16 ]
  %tmp19 = load i64, i64* %arg2, align 8
  %tmp20 = load i64, i64* %arg3, align 8
  %tmp21 = add i64 %tmp20, %tmp13
  %tmp22 = mul i64 %tmp21, %tmp7
  %tmp23 = add i64 %tmp18, %tmp8
  %tmp24 = add i64 %tmp23, %tmp19
  %tmp25 = add i64 %tmp24, %tmp22
  %tmp26 = add i64 %tmp18, %tmp13
  %tmp27 = sitofp i64 %tmp26 to double
  %tmp28 = getelementptr [0 x double], [0 x double]* %a, i64 0, i64 %tmp25
  store double %tmp27, double* %tmp28, align 8
  %tmp29 = icmp eq i64 %tmp18, %nj_loaded2
  %tmp30 = add i64 %tmp18, 1
  br i1 %tmp29, label %bb31, label %bb17

bb31:                                             ; preds = %bb17
  br label %bb32

bb32:                                             ; preds = %bb31, %bb12
  %tmp33 = icmp eq i64 %tmp13, %tmp9
  %tmp34 = add i64 %tmp13, 1
  br i1 %tmp33, label %bb35, label %bb12

bb35:                                             ; preds = %bb32
  br label %bb36

bb36:                                             ; preds = %bb35, %bb5
  ret void
}

attributes #0 = { nounwind uwtable }
[tests] Force invariant load hoisting for test cases that need it -- III llvm-svn: 278673 2016-08-15 23:56:24 +08:00			`; RUN: opt %loadPolly -polly-scops -analyze \`
[ScopInfo] Disable memory folding in case it results in multi-disjunct relations Multi-disjunct access maps can easily result in inbound assumptions which explode in case of many memory accesses and many parameters. This change reduces compilation time of some larger kernel from over 15 minutes to less than 16 seconds. Interesting is the test case test/ScopInfo/multidim_param_in_subscript.ll which has a memory access [n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0, -1 + n - i1] } which requires folding, but where only a single disjunct remains. We can still model this test case even when only using limited memory folding. For people only reading commit messages, here the comment that explains what memory folding is: To recover memory accesses with array size parameters in the subscript expression we post-process the delinearization results. We would normally recover from an access A[exp0(i) * N + exp1(i)] into an array A[][N] the 2D access A[exp0(i)][exp1(i)]. However, another valid delinearization is A[exp0(i) - 1][exp1(i) + N] which - depending on the range of exp1(i) - may be preferrable. Specifically, for cases where we know exp1(i) is negative, we want to choose the latter expression. As we commonly do not have any information about the range of exp1(i), we do not choose one of the two options, but instead create a piecewise access function that adds the (-1, N) offsets as soon as exp1(i) becomes negative. For a 2D array such an access function is created by applying the piecewise map: [i,j] -> [i, j] : j >= 0 [i,j] -> [i-1, j+N] : j < 0 After this patch we generate only the first case, except for situations where we can proove the first case to be invalid and can consequently select the second without introducing disjuncts. llvm-svn: 296679 2017-03-02 05:11:27 +08:00			`; RUN: -polly-precise-fold-accesses \`
[tests] Force invariant load hoisting for test cases that need it -- III llvm-svn: 278673 2016-08-15 23:56:24 +08:00			`; RUN: -polly-invariant-load-hoisting=true < %s \| FileCheck %s`
ScopInfo: Add support for delinearizing fortran arrays gfortran (and fortran in general?) does not compute the address of an array element directly from the array sizes (e.g., %s0, %s1), but takes first the maximum of the sizes and 0 (e.g., max(0, %s0)) before multiplying the resulting value with the per-dimension array subscript expressions. To successfully delinearize index expressions as we see them in fortran, we first filter 'smax' expressions out of the SCEV expression, use them to guess array size parameters and only then continue with the existing delinearization. llvm-svn: 253995 2015-11-25 01:06:38 +08:00
			`; subroutine init_array(ni, nj, pi, pj, a)`
			`; implicit none`

			`; double precision, dimension(nj, ni) :: a`
			`; integer*8 :: ni, nj`
			`; integer*8 :: pi, pj`
			`; integer*8 :: i, j`

			`; do i = 1, ni`
			`; do j = 1, nj`
			`; a(j + pi, i + pj) = i + j`
			`; end do`
			`; end do`
			`; end subroutine`

Update to ISL 0.16.1 llvm-svn: 257898 2016-01-15 23:54:45 +08:00			`; CHECK: [tmp9, nj_loaded2, tmp20, tmp19] -> { Stmt_bb17[i0, i1] -> MemRef_a[-1 + tmp20 + i0, nj_loaded2 + tmp19 + i1] : i1 < -tmp19; Stmt_bb17[i0, i1] -> MemRef_a[tmp20 + i0, tmp19 + i1] : i1 >= -tmp19 };`
ScopInfo: Add support for delinearizing fortran arrays gfortran (and fortran in general?) does not compute the address of an array element directly from the array sizes (e.g., %s0, %s1), but takes first the maximum of the sizes and 0 (e.g., max(0, %s0)) before multiplying the resulting value with the per-dimension array subscript expressions. To successfully delinearize index expressions as we see them in fortran, we first filter 'smax' expressions out of the SCEV expression, use them to guess array size parameters and only then continue with the existing delinearization. llvm-svn: 253995 2015-11-25 01:06:38 +08:00

			`target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"`
			`target triple = "x86_64-unknown-linux-gnu"`

			`module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22"`

			`; Function Attrs: nounwind uwtable`
			`define void @blam(i64* noalias %arg, i64* noalias %nj, i64* noalias %arg2, i64* noalias %arg3, [0 x double]* noalias %a) unnamed_addr #0 {`
			`bb:`
			`br label %bb5`

			`bb5: ; preds = %bb`
			`%nj_loaded = load i64, i64* %nj, align 8`
			`%tmp6 = icmp slt i64 %nj_loaded, 0`
			`%tmp7 = select i1 %tmp6, i64 0, i64 %nj_loaded`
			`%tmp8 = xor i64 %tmp7, -1`
			`%tmp9 = load i64, i64* %arg, align 8`
			`%tmp10 = icmp sgt i64 %tmp9, 0`
			`br i1 %tmp10, label %bb11, label %bb36`

			`bb11: ; preds = %bb5`
			`br label %bb12`

			`bb12: ; preds = %bb32, %bb11`
			`%tmp13 = phi i64 [ %tmp34, %bb32 ], [ 1, %bb11 ]`
			`%nj_loaded2 = load i64, i64* %nj, align 8`
			`%tmp15 = icmp sgt i64 %nj_loaded2, 0`
			`br i1 %tmp15, label %bb16, label %bb32`

			`bb16: ; preds = %bb12`
			`br label %bb17`

			`bb17: ; preds = %bb17, %bb16`
			`%tmp18 = phi i64 [ %tmp30, %bb17 ], [ 1, %bb16 ]`
			`%tmp19 = load i64, i64* %arg2, align 8`
			`%tmp20 = load i64, i64* %arg3, align 8`
			`%tmp21 = add i64 %tmp20, %tmp13`
			`%tmp22 = mul i64 %tmp21, %tmp7`
			`%tmp23 = add i64 %tmp18, %tmp8`
			`%tmp24 = add i64 %tmp23, %tmp19`
			`%tmp25 = add i64 %tmp24, %tmp22`
			`%tmp26 = add i64 %tmp18, %tmp13`
			`%tmp27 = sitofp i64 %tmp26 to double`
			`%tmp28 = getelementptr [0 x double], [0 x double]* %a, i64 0, i64 %tmp25`
			`store double %tmp27, double* %tmp28, align 8`
			`%tmp29 = icmp eq i64 %tmp18, %nj_loaded2`
			`%tmp30 = add i64 %tmp18, 1`
			`br i1 %tmp29, label %bb31, label %bb17`

			`bb31: ; preds = %bb17`
			`br label %bb32`

			`bb32: ; preds = %bb31, %bb12`
			`%tmp33 = icmp eq i64 %tmp13, %tmp9`
			`%tmp34 = add i64 %tmp13, 1`
			`br i1 %tmp33, label %bb35, label %bb12`

			`bb35: ; preds = %bb32`
			`br label %bb36`

			`bb36: ; preds = %bb35, %bb5`
			`ret void`
			`}`

			`attributes #0 = { nounwind uwtable }`