[ScopInfo] Disable memory folding in case it results in multi-disjunct relations
Multi-disjunct access maps can easily result in inbound assumptions which
explode in case of many memory accesses and many parameters. This change reduces
compilation time of some larger kernel from over 15 minutes to less than 16
seconds.
Interesting is the test case test/ScopInfo/multidim_param_in_subscript.ll
which has a memory access
[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0, -1 + n - i1] }
which requires folding, but where only a single disjunct remains. We can still
model this test case even when only using limited memory folding.
For people only reading commit messages, here the comment that explains what
memory folding is:
To recover memory accesses with array size parameters in the subscript
expression we post-process the delinearization results.
We would normally recover from an access A[exp0(i) * N + exp1(i)] into an
array A[][N] the 2D access A[exp0(i)][exp1(i)]. However, another valid
delinearization is A[exp0(i) - 1][exp1(i) + N] which - depending on the
range of exp1(i) - may be preferrable. Specifically, for cases where we
know exp1(i) is negative, we want to choose the latter expression.
As we commonly do not have any information about the range of exp1(i),
we do not choose one of the two options, but instead create a piecewise
access function that adds the (-1, N) offsets as soon as exp1(i) becomes
negative. For a 2D array such an access function is created by applying
the piecewise map:
[i,j] -> [i, j] : j >= 0
[i,j] -> [i-1, j+N] : j < 0
After this patch we generate only the first case, except for situations where
we can proove the first case to be invalid and can consequently select the
second without introducing disjuncts.
llvm-svn: 296679
2017-03-02 05:11:27 +08:00
|
|
|
; RUN: opt %loadPolly -polly-scops -analyze < %s \
|
|
|
|
; RUN: -polly-precise-fold-accesses | FileCheck %s
|
2012-09-11 22:03:19 +08:00
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
|
|
|
|
; void foo(long n, long m, long o, double A[n][m][o], long p, long q, long r) {
|
|
|
|
;
|
|
|
|
; for (long i = 0; i < n; i++)
|
|
|
|
; for (long j = 0; j < m; j++)
|
|
|
|
; for (long k = 0; k < o; k++)
|
|
|
|
; A[i+p][j+q][k+r] = 1.0;
|
|
|
|
; }
|
|
|
|
;
|
|
|
|
; Access function:
|
|
|
|
; {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,
|
|
|
|
; (8 * %o)}<%for.j>,+,8}<%for.k>
|
2014-07-03 01:47:48 +08:00
|
|
|
|
2016-01-15 08:48:42 +08:00
|
|
|
; CHECK: Assumed Context:
|
2016-01-15 23:54:45 +08:00
|
|
|
; CHECK-NEXT: [o, m, n, p, q, r] -> { : -m <= q <= 1 and ((-m < q <= 0 and -o < r < 0) or (r = 0 and q <= 0) or (r = -o and q > -m)) }
|
2012-09-11 22:03:19 +08:00
|
|
|
;
|
2016-01-15 08:48:42 +08:00
|
|
|
; CHECK: p0: %o
|
|
|
|
; CHECK-NEXT: p1: %m
|
|
|
|
; CHECK-NEXT: p2: %n
|
|
|
|
; CHECK-NEXT: p3: %p
|
|
|
|
; CHECK-NEXT: p4: %q
|
|
|
|
; CHECK-NEXT: p5: %r
|
|
|
|
; CHECK-NOT: p6
|
2014-04-09 05:20:44 +08:00
|
|
|
;
|
2016-01-15 08:48:42 +08:00
|
|
|
; CHECK: Statements {
|
|
|
|
; CHECK-NEXT: Stmt_for_k
|
|
|
|
; CHECK-NEXT: Domain :=
|
2016-01-15 23:54:45 +08:00
|
|
|
; CHECK-NEXT: [o, m, n, p, q, r] -> { Stmt_for_k[i0, i1, i2] : 0 <= i0 < n and 0 <= i1 < m and 0 <= i2 < o };
|
2016-01-15 08:48:42 +08:00
|
|
|
; CHECK-NEXT: Schedule :=
|
|
|
|
; CHECK-NEXT: [o, m, n, p, q, r] -> { Stmt_for_k[i0, i1, i2] -> [i0, i1, i2] };
|
|
|
|
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
2016-01-15 23:54:45 +08:00
|
|
|
; CHECK-NEXT: [o, m, n, p, q, r] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[-1 + p + i0, -1 + m + q + i1, o + r + i2] : i1 <= -q and i2 < -r; Stmt_for_k[i0, i1, i2] -> MemRef_A[p + i0, -1 + q + i1, o + r + i2] : i1 > -q and i2 < -r; Stmt_for_k[i0, i1, i2] -> MemRef_A[-1 + p + i0, m + q + i1, r + i2] : i1 < -q and i2 >= -r; Stmt_for_k[i0, i1, i2] -> MemRef_A[p + i0, q + i1, r + i2] : i1 >= -q and i2 >= -r };
|
2016-01-15 08:48:42 +08:00
|
|
|
; CHECK-NEXT: }
|
2012-09-11 22:03:19 +08:00
|
|
|
|
|
|
|
define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
|
|
|
|
entry:
|
|
|
|
br label %for.i
|
|
|
|
|
|
|
|
for.i:
|
|
|
|
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
|
|
|
|
br label %for.j
|
|
|
|
|
|
|
|
for.j:
|
|
|
|
%j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
|
|
|
|
br label %for.k
|
|
|
|
|
|
|
|
for.k:
|
|
|
|
%k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
|
|
|
|
%offset0 = add nsw i64 %i, %p
|
|
|
|
%subscript0 = mul i64 %offset0, %m
|
|
|
|
%offset1 = add nsw i64 %j, %q
|
|
|
|
%subscript1 = add i64 %offset1, %subscript0
|
|
|
|
%subscript2 = mul i64 %subscript1, %o
|
|
|
|
%offset2 = add nsw i64 %k, %r
|
|
|
|
%subscript = add i64 %subscript2, %offset2
|
2015-02-28 03:20:19 +08:00
|
|
|
%idx = getelementptr inbounds double, double* %A, i64 %subscript
|
2012-09-11 22:03:19 +08:00
|
|
|
store double 1.0, double* %idx
|
|
|
|
br label %for.k.inc
|
|
|
|
|
|
|
|
for.k.inc:
|
|
|
|
%k.inc = add nsw i64 %k, 1
|
|
|
|
%k.exitcond = icmp eq i64 %k.inc, %o
|
|
|
|
br i1 %k.exitcond, label %for.j.inc, label %for.k
|
|
|
|
|
|
|
|
for.j.inc:
|
|
|
|
%j.inc = add nsw i64 %j, 1
|
|
|
|
%j.exitcond = icmp eq i64 %j.inc, %m
|
|
|
|
br i1 %j.exitcond, label %for.i.inc, label %for.j
|
|
|
|
|
|
|
|
for.i.inc:
|
|
|
|
%i.inc = add nsw i64 %i, 1
|
|
|
|
%i.exitcond = icmp eq i64 %i.inc, %n
|
|
|
|
br i1 %i.exitcond, label %end, label %for.i
|
|
|
|
|
|
|
|
end:
|
|
|
|
ret void
|
|
|
|
}
|