2015-06-08 18:27:06 +08:00
|
|
|
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
|
2016-07-07 09:01:53 +08:00
|
|
|
; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
|
2015-06-08 18:27:06 +08:00
|
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
|
|
target triple = "aarch64--linux-gnueabi"
|
|
|
|
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; 3 reads and 3 writes should need 12 memchecks
|
|
|
|
; CHECK: function 'testf':
|
2015-06-08 18:27:06 +08:00
|
|
|
; CHECK: Memory dependences are safe with run-time checks
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
|
|
|
|
; Memory dependencies have labels starting from 0, so in
|
2015-06-08 18:27:06 +08:00
|
|
|
; order to verify that we have n checks, we look for
|
|
|
|
; (n-1): and not n:.
|
|
|
|
|
|
|
|
; CHECK: Run-time memory checks:
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: Check 0:
|
|
|
|
; CHECK: Check 11:
|
|
|
|
; CHECK-NOT: Check 12:
|
2015-06-08 18:27:06 +08:00
|
|
|
|
|
|
|
define void @testf(i16* %a,
|
|
|
|
i16* %b,
|
|
|
|
i16* %c,
|
|
|
|
i16* %d,
|
|
|
|
i16* %e,
|
|
|
|
i16* %f) {
|
|
|
|
entry:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
|
|
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
|
|
|
|
|
|
%add = add nuw nsw i64 %ind, 1
|
|
|
|
|
|
|
|
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
|
|
|
|
%loadA = load i16, i16* %arrayidxA, align 2
|
|
|
|
|
|
|
|
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
|
|
|
%loadB = load i16, i16* %arrayidxB, align 2
|
|
|
|
|
|
|
|
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
|
|
|
|
%loadC = load i16, i16* %arrayidxC, align 2
|
|
|
|
|
|
|
|
%mul = mul i16 %loadB, %loadA
|
|
|
|
%mul1 = mul i16 %mul, %loadC
|
|
|
|
|
|
|
|
%arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
|
|
|
|
store i16 %mul1, i16* %arrayidxD, align 2
|
|
|
|
|
|
|
|
%arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
|
|
|
|
store i16 %mul, i16* %arrayidxE, align 2
|
|
|
|
|
|
|
|
%arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
|
|
|
|
store i16 %mul1, i16* %arrayidxF, align 2
|
|
|
|
|
|
|
|
%exitcond = icmp eq i64 %add, 20
|
|
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body
|
|
|
|
ret void
|
|
|
|
}
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
|
|
|
|
; The following (testg and testh) check that we can group
|
|
|
|
; memory checks of accesses which differ by a constant value.
|
|
|
|
; Both tests are based on the following C code:
|
|
|
|
;
|
|
|
|
; void testh(short *a, short *b, short *c) {
|
|
|
|
; unsigned long ind = 0;
|
|
|
|
; for (unsigned long ind = 0; ind < 20; ++ind) {
|
|
|
|
; c[2 * ind] = a[ind] * a[ind + 1];
|
|
|
|
; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
;
|
|
|
|
; It is sufficient to check the intervals
|
|
|
|
; [a, a + 21], [b, b + 20] against [c, c + 41].
|
|
|
|
|
|
|
|
; 3 reads and 2 writes - two of the reads can be merged,
|
|
|
|
; and the writes can be merged as well. This gives us a
|
|
|
|
; total of 2 memory checks.
|
|
|
|
|
|
|
|
; CHECK: function 'testg':
|
|
|
|
|
|
|
|
; CHECK: Run-time memory checks:
|
|
|
|
; CHECK-NEXT: Check 0:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[ONE:.+]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: Check 1:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[TWO:.+]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: Grouped accesses:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
[SCEV] No-wrap flags are not propagated when folding "{S,+,X}+T ==> {S+T,+,X}"
Summary:
**Description**
This makes `WidenIV::widenIVUse` (IndVarSimplify.cpp) fail to widen narrow IV uses in some cases. The latter affects IndVarSimplify which may not eliminate narrow IV's when there actually exists such a possibility, thereby producing ineffective code.
When `WidenIV::widenIVUse` gets a NarrowUse such as `{(-2 + %inc.lcssa),+,1}<nsw><%for.body3>`, it first tries to get a wide recurrence for it via the `getWideRecurrence` call.
`getWideRecurrence` returns recurrence like this: `{(sext i32 (-2 + %inc.lcssa) to i64),+,1}<nsw><%for.body3>`.
Then a wide use operation is generated by `cloneIVUser`. The generated wide use is evaluated to `{(-2 + (sext i32 %inc.lcssa to i64))<nsw>,+,1}<nsw><%for.body3>`, which is different from the `getWideRecurrence` result. `cloneIVUser` sees the difference and returns nullptr.
This patch also fixes the broken LLVM tests by adding missing <nsw> entries introduced by the correction.
**Minimal reproducer:**
```
int foo(int a, int b, int c);
int baz();
void bar()
{
int arr[20];
int i = 0;
for (i = 0; i < 4; ++i)
arr[i] = baz();
for (; i < 20; ++i)
arr[i] = foo(arr[i - 4], arr[i - 3], arr[i - 2]);
}
```
**Clang command line:**
```
clang++ -mllvm -debug -S -emit-llvm -O3 --target=aarch64-linux-elf test.cpp -o test.ir
```
**Expected result:**
The ` -mllvm -debug` log shows that all the IV's for the second `for` loop have been eliminated.
Reviewers: sanjoy
Subscribers: atrick, asl, aemerson, mzolotukhin, llvm-commits
Differential Revision: http://reviews.llvm.org/D20058
llvm-svn: 270695
2016-05-25 21:01:33 +08:00
|
|
|
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: Member: {%c,+,4}
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
[SCEV] No-wrap flags are not propagated when folding "{S,+,X}+T ==> {S+T,+,X}"
Summary:
**Description**
This makes `WidenIV::widenIVUse` (IndVarSimplify.cpp) fail to widen narrow IV uses in some cases. The latter affects IndVarSimplify which may not eliminate narrow IV's when there actually exists such a possibility, thereby producing ineffective code.
When `WidenIV::widenIVUse` gets a NarrowUse such as `{(-2 + %inc.lcssa),+,1}<nsw><%for.body3>`, it first tries to get a wide recurrence for it via the `getWideRecurrence` call.
`getWideRecurrence` returns recurrence like this: `{(sext i32 (-2 + %inc.lcssa) to i64),+,1}<nsw><%for.body3>`.
Then a wide use operation is generated by `cloneIVUser`. The generated wide use is evaluated to `{(-2 + (sext i32 %inc.lcssa to i64))<nsw>,+,1}<nsw><%for.body3>`, which is different from the `getWideRecurrence` result. `cloneIVUser` sees the difference and returns nullptr.
This patch also fixes the broken LLVM tests by adding missing <nsw> entries introduced by the correction.
**Minimal reproducer:**
```
int foo(int a, int b, int c);
int baz();
void bar()
{
int arr[20];
int i = 0;
for (i = 0; i < 4; ++i)
arr[i] = baz();
for (; i < 20; ++i)
arr[i] = foo(arr[i - 4], arr[i - 3], arr[i - 2]);
}
```
**Clang command line:**
```
clang++ -mllvm -debug -S -emit-llvm -O3 --target=aarch64-linux-elf test.cpp -o test.ir
```
**Expected result:**
The ` -mllvm -debug` log shows that all the IV's for the second `for` loop have been eliminated.
Reviewers: sanjoy
Subscribers: atrick, asl, aemerson, mzolotukhin, llvm-commits
Differential Revision: http://reviews.llvm.org/D20058
llvm-svn: 270695
2016-05-25 21:01:33 +08:00
|
|
|
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: Member: {%a,+,2}
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: Member: {%b,+,2}
|
|
|
|
|
|
|
|
define void @testg(i16* %a,
|
|
|
|
i16* %b,
|
|
|
|
i16* %c) {
|
|
|
|
entry:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
|
|
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
|
|
%store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
|
|
|
|
|
|
|
|
%add = add nuw nsw i64 %ind, 1
|
|
|
|
%store_ind_inc = add nuw nsw i64 %store_ind, 1
|
|
|
|
%store_ind_next = add nuw nsw i64 %store_ind_inc, 1
|
|
|
|
|
|
|
|
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
|
|
|
|
%loadA = load i16, i16* %arrayidxA, align 2
|
|
|
|
|
|
|
|
%arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
|
|
|
|
%loadA1 = load i16, i16* %arrayidxA1, align 2
|
|
|
|
|
|
|
|
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
|
|
|
%loadB = load i16, i16* %arrayidxB, align 2
|
|
|
|
|
|
|
|
%mul = mul i16 %loadA, %loadA1
|
|
|
|
%mul1 = mul i16 %mul, %loadB
|
|
|
|
|
|
|
|
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
|
|
|
store i16 %mul1, i16* %arrayidxC, align 2
|
|
|
|
|
|
|
|
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
|
|
|
store i16 %mul, i16* %arrayidxC1, align 2
|
|
|
|
|
|
|
|
%exitcond = icmp eq i64 %add, 20
|
|
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; 3 reads and 2 writes - the writes can be merged into a single
|
|
|
|
; group, but the GEPs used for the reads are not marked as inbounds.
|
|
|
|
; We can still merge them because we are using a unit stride for
|
|
|
|
; accesses, so we cannot overflow the GEPs.
|
|
|
|
|
|
|
|
; CHECK: function 'testh':
|
|
|
|
; CHECK: Run-time memory checks:
|
|
|
|
; CHECK-NEXT: Check 0:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[ONE:.+]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: Check 1:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[TWO:.+]]):
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: Grouped accesses:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
[SCEV] No-wrap flags are not propagated when folding "{S,+,X}+T ==> {S+T,+,X}"
Summary:
**Description**
This makes `WidenIV::widenIVUse` (IndVarSimplify.cpp) fail to widen narrow IV uses in some cases. The latter affects IndVarSimplify which may not eliminate narrow IV's when there actually exists such a possibility, thereby producing ineffective code.
When `WidenIV::widenIVUse` gets a NarrowUse such as `{(-2 + %inc.lcssa),+,1}<nsw><%for.body3>`, it first tries to get a wide recurrence for it via the `getWideRecurrence` call.
`getWideRecurrence` returns recurrence like this: `{(sext i32 (-2 + %inc.lcssa) to i64),+,1}<nsw><%for.body3>`.
Then a wide use operation is generated by `cloneIVUser`. The generated wide use is evaluated to `{(-2 + (sext i32 %inc.lcssa to i64))<nsw>,+,1}<nsw><%for.body3>`, which is different from the `getWideRecurrence` result. `cloneIVUser` sees the difference and returns nullptr.
This patch also fixes the broken LLVM tests by adding missing <nsw> entries introduced by the correction.
**Minimal reproducer:**
```
int foo(int a, int b, int c);
int baz();
void bar()
{
int arr[20];
int i = 0;
for (i = 0; i < 4; ++i)
arr[i] = baz();
for (; i < 20; ++i)
arr[i] = foo(arr[i - 4], arr[i - 3], arr[i - 2]);
}
```
**Clang command line:**
```
clang++ -mllvm -debug -S -emit-llvm -O3 --target=aarch64-linux-elf test.cpp -o test.ir
```
**Expected result:**
The ` -mllvm -debug` log shows that all the IV's for the second `for` loop have been eliminated.
Reviewers: sanjoy
Subscribers: atrick, asl, aemerson, mzolotukhin, llvm-commits
Differential Revision: http://reviews.llvm.org/D20058
llvm-svn: 270695
2016-05-25 21:01:33 +08:00
|
|
|
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: Member: {%c,+,4}
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
2015-07-09 23:18:25 +08:00
|
|
|
; CHECK-NEXT: Member: {(2 + %a),+,2}
|
2015-07-13 22:48:24 +08:00
|
|
|
; CHECK-NEXT: Member: {%a,+,2}
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
[LAA] Merge memchecks for accesses separated by a constant offset
Summary:
Often filter-like loops will do memory accesses that are
separated by constant offsets. In these cases it is
common that we will exceed the threshold for the
allowable number of checks.
However, it should be possible to merge such checks,
sice a check of any interval againt two other intervals separated
by a constant offset (a,b), (a+c, b+c) will be equivalent with
a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
Assuming the loop will be executed for a sufficient number of
iterations, this will be true. If not true, checking against
(a, b+c) is still safe (although not equivalent).
As long as there are no dependencies between two accesses,
we can merge their checks into a single one. We use this
technique to construct groups of accesses, and then check
the intervals associated with the groups instead of
checking the accesses directly.
Reviewers: anemet
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D10386
llvm-svn: 241673
2015-07-08 17:16:33 +08:00
|
|
|
; CHECK-NEXT: Member: {%b,+,2}
|
|
|
|
|
|
|
|
define void @testh(i16* %a,
|
|
|
|
i16* %b,
|
|
|
|
i16* %c) {
|
|
|
|
entry:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
|
|
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
|
|
%store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
|
|
|
|
|
|
|
|
%add = add nuw nsw i64 %ind, 1
|
|
|
|
%store_ind_inc = add nuw nsw i64 %store_ind, 1
|
|
|
|
%store_ind_next = add nuw nsw i64 %store_ind_inc, 1
|
|
|
|
|
|
|
|
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
|
|
|
|
%loadA = load i16, i16* %arrayidxA, align 2
|
|
|
|
|
|
|
|
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
|
|
|
|
%loadA1 = load i16, i16* %arrayidxA1, align 2
|
|
|
|
|
|
|
|
%arrayidxB = getelementptr i16, i16* %b, i64 %ind
|
|
|
|
%loadB = load i16, i16* %arrayidxB, align 2
|
|
|
|
|
|
|
|
%mul = mul i16 %loadA, %loadA1
|
|
|
|
%mul1 = mul i16 %mul, %loadB
|
|
|
|
|
|
|
|
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
|
|
|
|
store i16 %mul1, i16* %arrayidxC, align 2
|
|
|
|
|
|
|
|
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
|
|
|
|
store i16 %mul, i16* %arrayidxC1, align 2
|
|
|
|
|
|
|
|
%exitcond = icmp eq i64 %add, 20
|
|
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body
|
|
|
|
ret void
|
|
|
|
}
|
2015-07-10 00:40:25 +08:00
|
|
|
|
2015-07-28 21:44:08 +08:00
|
|
|
; Don't merge pointers if we need to perform a check against a pointer
|
|
|
|
; to the same underlying object (doing so would emit a check that could be
|
|
|
|
; falsely invalidated) For example, in the following loop:
|
2015-07-10 00:40:25 +08:00
|
|
|
;
|
|
|
|
; for (i = 0; i < 5000; ++i)
|
|
|
|
; a[i + offset] = a[i] + a[i + 10000]
|
|
|
|
;
|
|
|
|
; we should not merge the intervals associated with the reads (0,5000) and
|
|
|
|
; (10000, 15000) into (0, 15000) as this will pottentially fail the check
|
|
|
|
; against the interval associated with the write.
|
2015-07-28 21:44:08 +08:00
|
|
|
;
|
|
|
|
; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
|
|
|
|
; and therefore the grouping algorithm would create a separate group for
|
|
|
|
; each pointer.
|
2015-07-10 00:40:25 +08:00
|
|
|
|
|
|
|
; CHECK: function 'testi':
|
|
|
|
; CHECK: Run-time memory checks:
|
|
|
|
; CHECK-NEXT: Check 0:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[ONE:.+]]):
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
|
|
|
|
; CHECK-NEXT: Check 1:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Against group ([[TWO:.+]]):
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
|
|
|
|
; CHECK-NEXT: Grouped accesses:
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
|
[SCEV] No-wrap flags are not propagated when folding "{S,+,X}+T ==> {S+T,+,X}"
Summary:
**Description**
This makes `WidenIV::widenIVUse` (IndVarSimplify.cpp) fail to widen narrow IV uses in some cases. The latter affects IndVarSimplify which may not eliminate narrow IV's when there actually exists such a possibility, thereby producing ineffective code.
When `WidenIV::widenIVUse` gets a NarrowUse such as `{(-2 + %inc.lcssa),+,1}<nsw><%for.body3>`, it first tries to get a wide recurrence for it via the `getWideRecurrence` call.
`getWideRecurrence` returns recurrence like this: `{(sext i32 (-2 + %inc.lcssa) to i64),+,1}<nsw><%for.body3>`.
Then a wide use operation is generated by `cloneIVUser`. The generated wide use is evaluated to `{(-2 + (sext i32 %inc.lcssa to i64))<nsw>,+,1}<nsw><%for.body3>`, which is different from the `getWideRecurrence` result. `cloneIVUser` sees the difference and returns nullptr.
This patch also fixes the broken LLVM tests by adding missing <nsw> entries introduced by the correction.
**Minimal reproducer:**
```
int foo(int a, int b, int c);
int baz();
void bar()
{
int arr[20];
int i = 0;
for (i = 0; i < 4; ++i)
arr[i] = baz();
for (; i < 20; ++i)
arr[i] = foo(arr[i - 4], arr[i - 3], arr[i - 2]);
}
```
**Clang command line:**
```
clang++ -mllvm -debug -S -emit-llvm -O3 --target=aarch64-linux-elf test.cpp -o test.ir
```
**Expected result:**
The ` -mllvm -debug` log shows that all the IV's for the second `for` loop have been eliminated.
Reviewers: sanjoy
Subscribers: atrick, asl, aemerson, mzolotukhin, llvm-commits
Differential Revision: http://reviews.llvm.org/D20058
llvm-svn: 270695
2016-05-25 21:01:33 +08:00
|
|
|
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: %a High: (10000 + %a))
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
|
[LAA] Split out a helper to print a collection of memchecks
This is effectively an NFC but we can no longer print the index of the
pointer group so instead I print its address. This still lets us
cross-check the section that list the checks against the section that
list the groups (see how I modified the test).
E.g. before we printed this:
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
Against group 1:
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
...
Grouped accesses:
Group 0:
(Low: %c High: (78 + %c))
Member: {%c,+,4}<%for.body>
Member: {(2 + %c),+,4}<%for.body>
Now we print this (changes are underlined):
Run-time memory checks:
Check 0:
Comparing group (0x7f9c6040c320):
~~~~~~~~~~~~~~
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Against group (0x7f9c6040c358):
~~~~~~~~~~~~~~
%arrayidxA1 = getelementptr i16, i16* %a, i64 %add
%arrayidxA = getelementptr i16, i16* %a, i64 %ind
...
Grouped accesses:
Group 0x7f9c6040c320:
~~~~~~~~~~~~~~
(Low: %c High: (78 + %c))
Member: {(2 + %c),+,4}<%for.body>
Member: {%c,+,4}<%for.body>
llvm-svn: 243354
2015-07-28 07:54:41 +08:00
|
|
|
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
2016-08-28 16:53:53 +08:00
|
|
|
; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
|
2015-07-10 00:40:25 +08:00
|
|
|
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
|
|
|
|
|
|
|
|
define void @testi(i16* %a,
|
|
|
|
i64 %offset) {
|
|
|
|
entry:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
|
|
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
|
|
%store_ind = phi i64 [ %offset, %entry ], [ %store_ind_inc, %for.body ]
|
|
|
|
|
|
|
|
%add = add nuw nsw i64 %ind, 1
|
|
|
|
%store_ind_inc = add nuw nsw i64 %store_ind, 1
|
|
|
|
|
|
|
|
%arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
|
|
|
|
%ind2 = add nuw nsw i64 %ind, 10000
|
|
|
|
%arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
|
|
|
|
|
|
|
|
%loadA1 = load i16, i16* %arrayidxA1, align 2
|
|
|
|
%loadA2 = load i16, i16* %arrayidxA2, align 2
|
|
|
|
|
|
|
|
%addres = add i16 %loadA1, %loadA2
|
|
|
|
|
|
|
|
%storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
|
|
|
|
store i16 %addres, i16* %storeidx, align 2
|
|
|
|
|
|
|
|
%exitcond = icmp eq i64 %add, 5000
|
|
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body
|
|
|
|
ret void
|
|
|
|
}
|