[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
|
|
|
|
|
|
|
|
; This test set ensures that we can correctly operate with recurrencies from
|
|
|
|
; different loops.
|
|
|
|
|
|
|
|
; Check that we can evaluate a sum of phis from two different loops in any
|
|
|
|
; order.
|
|
|
|
|
|
|
|
define void @test_00() {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_00
|
|
|
|
; CHECK: %sum1 = add i32 %phi1, %phi2
|
|
|
|
; CHECK-NEXT: --> {14,+,3}<%loop1>
|
|
|
|
; CHECK: %sum2 = add i32 %sum1, %phi3
|
|
|
|
; CHECK-NEXT: --> {20,+,6}<%loop1>
|
|
|
|
; CHECK: %sum3 = add i32 %phi4, %phi5
|
|
|
|
; CHECK-NEXT: --> {116,+,3}<%loop2>
|
|
|
|
; CHECK: %sum4 = add i32 %sum3, %phi6
|
|
|
|
; CHECK-NEXT: --> {159,+,6}<%loop2>
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %phi4
|
|
|
|
; CHECK-NEXT: --> {{{{}}73,+,1}<%loop1>,+,1}<%loop2>
|
|
|
|
; CHECK: %s2 = add i32 %phi5, %phi2
|
|
|
|
; CHECK-NEXT: --> {{{{}}57,+,2}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s3 = add i32 %sum1, %sum3
|
|
|
|
; CHECK-NEXT: --> {{{{}}130,+,3}<%loop1>,+,3}<%loop2>
|
|
|
|
; CHECK: %s4 = add i32 %sum4, %sum2
|
|
|
|
; CHECK-NEXT: --> {{{{}}179,+,6}<%loop1>,+,6}<%loop2>
|
|
|
|
; CHECK: %s5 = add i32 %phi3, %sum3
|
|
|
|
; CHECK-NEXT: --> {{{{}}122,+,3}<%loop1>,+,3}<%loop2>
|
|
|
|
; CHECK: %s6 = add i32 %sum2, %phi6
|
|
|
|
; CHECK-NEXT: --> {{{{}}63,+,6}<%loop1>,+,3}<%loop2>
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1 ]
|
|
|
|
%phi2 = phi i32 [ 4, %entry ], [ %phi2.inc, %loop1 ]
|
|
|
|
%phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%phi3.inc = add i32 %phi3, 3
|
|
|
|
%sum1 = add i32 %phi1, %phi2
|
|
|
|
%sum2 = add i32 %sum1, %phi3
|
|
|
|
%cond1 = icmp ult i32 %sum2, 1000
|
|
|
|
br i1 %cond1, label %loop1, label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ]
|
|
|
|
%phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ]
|
|
|
|
%phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
|
|
|
|
%phi4.inc = add i32 %phi4, 1
|
|
|
|
%phi5.inc = add i32 %phi5, 2
|
|
|
|
%phi6.inc = add i32 %phi6, 3
|
|
|
|
%sum3 = add i32 %phi4, %phi5
|
|
|
|
%sum4 = add i32 %sum3, %phi6
|
|
|
|
%cond2 = icmp ult i32 %sum4, 1000
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%s1 = add i32 %phi1, %phi4
|
|
|
|
%s2 = add i32 %phi5, %phi2
|
|
|
|
%s3 = add i32 %sum1, %sum3
|
|
|
|
%s4 = add i32 %sum4, %sum2
|
|
|
|
%s5 = add i32 %phi3, %sum3
|
|
|
|
%s6 = add i32 %sum2, %phi6
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that we can evaluate a sum of phis+invariants from two different loops
|
|
|
|
; in any order.
|
|
|
|
|
|
|
|
define void @test_01(i32 %a, i32 %b) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_01
|
|
|
|
; CHECK: %sum1 = add i32 %phi1, %phi2
|
|
|
|
; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1>
|
|
|
|
; CHECK: %sum2 = add i32 %sum1, %phi3
|
|
|
|
; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1>
|
|
|
|
; CHECK: %is1 = add i32 %sum2, %a
|
|
|
|
; CHECK-NEXT: --> {(6 + (2 * %a) + %b),+,6}<%loop1>
|
|
|
|
; CHECK: %sum3 = add i32 %phi4, %phi5
|
|
|
|
; CHECK-NEXT: --> {116,+,3}<%loop2>
|
|
|
|
; CHECK: %sum4 = add i32 %sum3, %phi6
|
|
|
|
; CHECK-NEXT: --> {159,+,6}<%loop2>
|
|
|
|
; CHECK: %is2 = add i32 %sum4, %b
|
|
|
|
; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2>
|
|
|
|
; CHECK: %ec2 = add i32 %is1, %is2
|
|
|
|
; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %is1
|
|
|
|
; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1>
|
|
|
|
; CHECK: %s2 = add i32 %is2, %phi4
|
|
|
|
; CHECK-NEXT: --> {(222 + %b),+,7}<%loop2>
|
|
|
|
; CHECK: %s3 = add i32 %is1, %phi5
|
|
|
|
; CHECK-NEXT: --> {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s4 = add i32 %phi2, %is2
|
|
|
|
; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2>
|
|
|
|
; CHECK: %s5 = add i32 %is1, %is2
|
|
|
|
; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
|
|
|
|
; CHECK: %s6 = add i32 %is2, %is1
|
|
|
|
; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
|
|
|
|
%phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ]
|
|
|
|
%phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%phi3.inc = add i32 %phi3, 3
|
|
|
|
%sum1 = add i32 %phi1, %phi2
|
|
|
|
%sum2 = add i32 %sum1, %phi3
|
|
|
|
%is1 = add i32 %sum2, %a
|
|
|
|
%cond1 = icmp ult i32 %is1, 1000
|
|
|
|
br i1 %cond1, label %loop1, label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ]
|
|
|
|
%phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ]
|
|
|
|
%phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
|
|
|
|
%phi4.inc = add i32 %phi4, 1
|
|
|
|
%phi5.inc = add i32 %phi5, 2
|
|
|
|
%phi6.inc = add i32 %phi6, 3
|
|
|
|
%sum3 = add i32 %phi4, %phi5
|
|
|
|
%sum4 = add i32 %sum3, %phi6
|
|
|
|
%is2 = add i32 %sum4, %b
|
|
|
|
%ec2 = add i32 %is1, %is2
|
|
|
|
%cond2 = icmp ult i32 %ec2, 1000
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%s1 = add i32 %phi1, %is1
|
|
|
|
%s2 = add i32 %is2, %phi4
|
|
|
|
%s3 = add i32 %is1, %phi5
|
|
|
|
%s4 = add i32 %phi2, %is2
|
|
|
|
%s5 = add i32 %is1, %is2
|
|
|
|
%s6 = add i32 %is2, %is1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that we can correctly evaluate a sum of phis+variants from two different
|
|
|
|
; loops in any order.
|
|
|
|
|
|
|
|
define void @test_02(i32 %a, i32 %b, i32* %p) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_02
|
|
|
|
; CHECK: %sum1 = add i32 %phi1, %phi2
|
|
|
|
; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1>
|
|
|
|
; CHECK: %sum2 = add i32 %sum1, %phi3
|
|
|
|
; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1>
|
|
|
|
; CHECK: %is1 = add i32 %sum2, %v1
|
|
|
|
; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1)
|
|
|
|
; CHECK: %sum3 = add i32 %phi4, %phi5
|
|
|
|
; CHECK-NEXT: --> {(%a + %b),+,3}<%loop2>
|
|
|
|
; CHECK: %sum4 = add i32 %sum3, %phi6
|
|
|
|
; CHECK-NEXT: --> {(43 + %a + %b),+,6}<%loop2>
|
|
|
|
; CHECK: %is2 = add i32 %sum4, %v2
|
|
|
|
; CHECK-NEXT: --> ({(43 + %a + %b),+,6}<%loop2> + %v2)
|
|
|
|
; CHECK: %is3 = add i32 %v1, %sum2
|
|
|
|
; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1)
|
|
|
|
; CHECK: %ec2 = add i32 %is1, %is3
|
|
|
|
; CHECK-NEXT: --> (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1))
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %is1
|
|
|
|
; CHECK-NEXT: --> ({(6 + (2 * %a) + %b),+,7}<%loop1> + %v1)
|
|
|
|
; CHECK: %s2 = add i32 %is2, %phi4
|
|
|
|
; CHECK-NEXT: --> ({(43 + (2 * %a) + %b),+,7}<%loop2> + %v2)
|
|
|
|
; CHECK: %s3 = add i32 %is1, %phi5
|
|
|
|
; CHECK-NEXT: --> {({(6 + (2 * %b) + %a),+,6}<%loop1> + %v1),+,2}<%loop2>
|
|
|
|
; CHECK: %s4 = add i32 %phi2, %is2
|
|
|
|
; CHECK-NEXT: --> ({{{{}}(43 + (2 * %b) + %a),+,2}<%loop1>,+,6}<%loop2> + %v2)
|
|
|
|
; CHECK: %s5 = add i32 %is1, %is2
|
|
|
|
; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2)
|
|
|
|
; CHECK: %s6 = add i32 %is2, %is1
|
|
|
|
; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2)
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
|
|
|
|
%phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ]
|
|
|
|
%phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%phi3.inc = add i32 %phi3, 3
|
|
|
|
%v1 = load i32, i32* %p
|
|
|
|
%sum1 = add i32 %phi1, %phi2
|
|
|
|
%sum2 = add i32 %sum1, %phi3
|
|
|
|
%is1 = add i32 %sum2, %v1
|
|
|
|
%cond1 = icmp ult i32 %is1, 1000
|
|
|
|
br i1 %cond1, label %loop1, label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi4 = phi i32 [ %a, %loop1 ], [ %phi4.inc, %loop2 ]
|
|
|
|
%phi5 = phi i32 [ %b, %loop1 ], [ %phi5.inc, %loop2 ]
|
|
|
|
%phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
|
|
|
|
%phi4.inc = add i32 %phi4, 1
|
|
|
|
%phi5.inc = add i32 %phi5, 2
|
|
|
|
%phi6.inc = add i32 %phi6, 3
|
|
|
|
%v2 = load i32, i32* %p
|
|
|
|
%sum3 = add i32 %phi4, %phi5
|
|
|
|
%sum4 = add i32 %sum3, %phi6
|
|
|
|
%is2 = add i32 %sum4, %v2
|
|
|
|
%is3 = add i32 %v1, %sum2
|
|
|
|
%ec2 = add i32 %is1, %is3
|
|
|
|
%cond2 = icmp ult i32 %ec2, 1000
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%s1 = add i32 %phi1, %is1
|
|
|
|
%s2 = add i32 %is2, %phi4
|
|
|
|
%s3 = add i32 %is1, %phi5
|
|
|
|
%s4 = add i32 %phi2, %is2
|
|
|
|
%s5 = add i32 %is1, %is2
|
|
|
|
%s6 = add i32 %is2, %is1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as
|
|
|
|
; a recurrence of loop1 because of operands order if we pick recurrencies in an
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown)
|
|
|
|
; because we cannot prove for sure that it doesn't use Phis of loop 2.
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
|
|
|
|
define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_03
|
|
|
|
; CHECK: %v1 = load i32, i32* %p
|
|
|
|
; CHECK-NEXT: --> %v1
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %v1
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1)
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; CHECK: %s2 = add i32 %s1, %b
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1)
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; CHECK: %s3 = add i32 %s2, %phi2
|
|
|
|
; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1)
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%cond1 = icmp ult i32 %phi1, %c
|
|
|
|
br i1 %cond1, label %loop1, label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi2 = phi i32 [ %a, %loop1 ], [ %phi2.inc, %loop2 ]
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%v1 = load i32, i32* %p
|
|
|
|
%s1 = add i32 %phi1, %v1
|
|
|
|
%s2 = add i32 %s1, %b
|
|
|
|
%s3 = add i32 %s2, %phi2
|
|
|
|
%cond2 = icmp ult i32 %s3, %c
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Another mix of previous use cases that demonstrates that incorrect picking of
|
|
|
|
; a loop for a recurrence may cause a crash of SCEV analysis.
|
|
|
|
define void @test_04() {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_04
|
|
|
|
; CHECK: %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]
|
|
|
|
; CHECK-NEXT: --> {2,+,1}<nuw><nsw><%loop1>
|
|
|
|
; CHECK: %tmp2 = trunc i64 %tmp to i32
|
|
|
|
; CHECK-NEXT: --> {2,+,1}<%loop1>
|
|
|
|
; CHECK: %tmp4 = add nuw nsw i64 %tmp, 1
|
|
|
|
; CHECK-NEXT: --> {3,+,1}<nuw><%loop1>
|
|
|
|
; CHECK: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]
|
|
|
|
; CHECK-NEXT: --> {2,+,1}<nuw><nsw><%loop2>
|
|
|
|
; CHECK: %tmp10 = sub i64 %tmp9, %tmp7
|
|
|
|
; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<nw><%loop2>)
|
|
|
|
; CHECK: %tmp11 = add i64 %tmp10, undef
|
|
|
|
; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<nw><%loop2>)
|
|
|
|
; CHECK: %tmp13 = trunc i64 %tmp11 to i32
|
2018-06-15 01:13:35 +08:00
|
|
|
; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(-2 + (trunc i64 undef to i32)),+,-1}<%loop2>)
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; CHECK: %tmp14 = sub i32 %tmp13, %tmp2
|
2018-06-15 01:13:35 +08:00
|
|
|
; `{{[{][{]}}` is the ugliness needed to match `{{`
|
|
|
|
; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{[{][{]}}(-4 + (trunc i64 undef to i32)),+,-1}<%loop1>,+,-1}<%loop2>)
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; CHECK: %tmp15 = add nuw nsw i64 %tmp7, 1
|
|
|
|
; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%loop2>
|
|
|
|
|
|
|
|
bb:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]
|
|
|
|
%tmp2 = trunc i64 %tmp to i32
|
|
|
|
br i1 undef, label %loop2, label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%tmp4 = add nuw nsw i64 %tmp, 1
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
bb5:
|
|
|
|
ret void
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]
|
|
|
|
%tmp8 = load i8, i8 addrspace(1)* undef, align 1
|
|
|
|
%tmp9 = sext i8 %tmp8 to i64
|
|
|
|
%tmp10 = sub i64 %tmp9, %tmp7
|
|
|
|
%tmp11 = add i64 %tmp10, undef
|
|
|
|
%tmp13 = trunc i64 %tmp11 to i32
|
|
|
|
%tmp14 = sub i32 %tmp13, %tmp2
|
|
|
|
%tmp15 = add nuw nsw i64 %tmp7, 1
|
|
|
|
%tmp16 = icmp slt i64 %tmp15, %tmp
|
|
|
|
br i1 %tmp16, label %loop2, label %bb5
|
|
|
|
}
|
|
|
|
|
|
|
|
@A = weak global [1000 x i32] zeroinitializer, align 32
|
|
|
|
|
|
|
|
; Demonstrate a situation when we can add two recs with different degrees from
|
|
|
|
; the same loop.
|
|
|
|
define void @test_05(i32 %N) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_05
|
|
|
|
; CHECK: %SQ = mul i32 %i.0, %i.0
|
|
|
|
; CHECK-NEXT: --> {4,+,5,+,2}<%bb3>
|
|
|
|
; CHECK: %tmp4 = mul i32 %i.0, 2
|
2018-07-14 07:58:46 +08:00
|
|
|
; CHECK-NEXT: --> {4,+,2}<nuw><nsw><%bb3>
|
[SCEV] Fix sorting order for AddRecExprs
The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs
by loop depth, but does not pay attention to dominance of loops. This can
lead us to the following buggy situation:
for (...) { // loop1
op1 = {A,+,B}
}
for (...) { // loop2
op2 = {A,+,B}
S = add op1, op2
}
In this case there is no guarantee that in operand list of S the op2 comes
before op1 (loop depth is the same, so they will be sorted just
lexicographically), so we can incorrectly treat S as a recurrence of loop1,
which is wrong.
This patch changes the sorting logic so that it places the dominated recs
before the dominating recs. This ensures that when we pick the first recurrency
in the operands order, it will be the bottom-most in terms of domination tree.
The attached test set includes some tests that produce incorrect SCEV
estimations and crashes with oldlogic.
Reviewers: sanjoy, reames, apilipenko, anna
Reviewed By: sanjoy
Subscribers: llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D33121
llvm-svn: 303148
2017-05-16 15:27:06 +08:00
|
|
|
; CHECK: %tmp5 = sub i32 %SQ, %tmp4
|
|
|
|
; CHECK-NEXT: --> {0,+,3,+,2}<%bb3>
|
|
|
|
|
|
|
|
entry:
|
|
|
|
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb: ; preds = %bb3
|
|
|
|
%tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %i.0 ; <i32*> [#uses=1]
|
|
|
|
store i32 123, i32* %tmp
|
|
|
|
%tmp2 = add i32 %i.0, 1 ; <i32> [#uses=1]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3: ; preds = %bb, %entry
|
|
|
|
%i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3]
|
|
|
|
%SQ = mul i32 %i.0, %i.0
|
|
|
|
%tmp4 = mul i32 %i.0, 2
|
|
|
|
%tmp5 = sub i32 %SQ, %tmp4
|
|
|
|
%tmp3 = icmp sle i32 %tmp5, 9999 ; <i1> [#uses=1]
|
|
|
|
br i1 %tmp3, label %bb, label %bb5
|
|
|
|
|
|
|
|
bb5: ; preds = %bb3
|
|
|
|
br label %return
|
|
|
|
|
|
|
|
return: ; preds = %bb5
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that we can add Phis from different loops with different nesting, nested
|
|
|
|
; loop comes first.
|
|
|
|
define void @test_06() {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_06
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %phi2
|
|
|
|
; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s2 = add i32 %phi2, %phi1
|
|
|
|
; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s3 = add i32 %phi1, %phi3
|
|
|
|
; CHECK-NEXT: --> {{{{}}40,+,1}<%loop1>,+,3}<%loop3>
|
|
|
|
; CHECK: %s4 = add i32 %phi3, %phi1
|
|
|
|
; CHECK-NEXT: --> {{{{}}40,+,1}<%loop1>,+,3}<%loop3>
|
|
|
|
; CHECK: %s5 = add i32 %phi2, %phi3
|
|
|
|
; CHECK-NEXT: --> {{{{}}50,+,2}<%loop2>,+,3}<%loop3>
|
|
|
|
; CHECK: %s6 = add i32 %phi3, %phi2
|
|
|
|
; CHECK-NEXT: --> {{{{}}50,+,2}<%loop2>,+,3}<%loop3>
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1.exit ]
|
|
|
|
br label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ]
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%cond2 = icmp ult i32 %phi2.inc, 1000
|
|
|
|
br i1 %cond2, label %loop2, label %loop1.exit
|
|
|
|
|
|
|
|
loop1.exit:
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%cond1 = icmp ult i32 %phi1.inc, 1000
|
|
|
|
br i1 %cond1, label %loop1, label %loop3
|
|
|
|
|
|
|
|
loop3:
|
|
|
|
%phi3 = phi i32 [ 30, %loop1.exit ], [ %phi3.inc, %loop3 ]
|
|
|
|
%phi3.inc = add i32 %phi3, 3
|
|
|
|
%cond3 = icmp ult i32 %phi3.inc, 1000
|
|
|
|
br i1 %cond3, label %loop3, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%s1 = add i32 %phi1, %phi2
|
|
|
|
%s2 = add i32 %phi2, %phi1
|
|
|
|
%s3 = add i32 %phi1, %phi3
|
|
|
|
%s4 = add i32 %phi3, %phi1
|
|
|
|
%s5 = add i32 %phi2, %phi3
|
|
|
|
%s6 = add i32 %phi3, %phi2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that we can add Phis from different loops with different nesting, nested
|
|
|
|
; loop comes second.
|
|
|
|
define void @test_07() {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_07
|
|
|
|
; CHECK: %s1 = add i32 %phi1, %phi2
|
|
|
|
; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s2 = add i32 %phi2, %phi1
|
|
|
|
; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
|
|
|
|
; CHECK: %s3 = add i32 %phi1, %phi3
|
|
|
|
; CHECK-NEXT: --> {{{{}}40,+,3}<%loop3>,+,1}<%loop1>
|
|
|
|
; CHECK: %s4 = add i32 %phi3, %phi1
|
|
|
|
; CHECK-NEXT: --> {{{{}}40,+,3}<%loop3>,+,1}<%loop1>
|
|
|
|
; CHECK: %s5 = add i32 %phi2, %phi3
|
|
|
|
; CHECK-NEXT: --> {{{{}}50,+,3}<%loop3>,+,2}<%loop2>
|
|
|
|
; CHECK: %s6 = add i32 %phi3, %phi2
|
|
|
|
; CHECK-NEXT: --> {{{{}}50,+,3}<%loop3>,+,2}<%loop2>
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop3
|
|
|
|
|
|
|
|
loop3:
|
|
|
|
%phi3 = phi i32 [ 30, %entry ], [ %phi3.inc, %loop3 ]
|
|
|
|
%phi3.inc = add i32 %phi3, 3
|
|
|
|
%cond3 = icmp ult i32 %phi3.inc, 1000
|
|
|
|
br i1 %cond3, label %loop3, label %loop1
|
|
|
|
|
|
|
|
loop1:
|
|
|
|
%phi1 = phi i32 [ 10, %loop3 ], [ %phi1.inc, %loop1.exit ]
|
|
|
|
br label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
%phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ]
|
|
|
|
%phi2.inc = add i32 %phi2, 2
|
|
|
|
%cond2 = icmp ult i32 %phi2.inc, 1000
|
|
|
|
br i1 %cond2, label %loop2, label %loop1.exit
|
|
|
|
|
|
|
|
loop1.exit:
|
|
|
|
%phi1.inc = add i32 %phi1, 1
|
|
|
|
%cond1 = icmp ult i32 %phi1.inc, 1000
|
|
|
|
br i1 %cond1, label %exit, label %loop1
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%s1 = add i32 %phi1, %phi2
|
|
|
|
%s2 = add i32 %phi2, %phi1
|
|
|
|
%s3 = add i32 %phi1, %phi3
|
|
|
|
%s4 = add i32 %phi3, %phi1
|
|
|
|
%s5 = add i32 %phi2, %phi3
|
|
|
|
%s6 = add i32 %phi3, %phi2
|
|
|
|
ret void
|
|
|
|
}
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
|
|
|
|
; Make sure that a complicated Phi does not get folded with rec's start value
|
|
|
|
; of a loop which is above.
|
|
|
|
define void @test_08() {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_08
|
|
|
|
; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1
|
|
|
|
; CHECK-NEXT: --> ({0,+,-1}<nsw><%loop_2> + %iv.2.1)
|
|
|
|
; CHECK: %tmp12 = trunc i64 %tmp11 to i32
|
2018-06-15 01:13:35 +08:00
|
|
|
; CHECK-NEXT: --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
; CHECK: %tmp14 = mul i32 %tmp12, %tmp7
|
2018-06-15 01:13:35 +08:00
|
|
|
; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>)
|
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on
many non-X86 configs with this patch. The reason of this is that the patch
makes a correctless fix that changes optimizer's behavior for this test.
Without the change, LSR was making an overconfident simplification basing on a
wrong SCEV. Apparently it did not need the IV analysis to do this. With the
change, it chose a different way to simplify (that wasn't so confident), and
this way required the IV analysis. Now, following the right execution path,
LSR tries to make a transformation relying on IV Users analysis. This analysis
is target-dependent due to this code:
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
To make a proper transformation in this test case, the type i32 needs to be
legal for the specified data layout. When the test runs on some non-X86
configuration (e.g. pure ARM 64), opt gets confused by the specified target
and does not use it, rejecting the specified data layout as well. Instead,
it uses some default layout that does not treat i32 as a legal type
(currently the layout that is used when it is not specified does not have
legal types at all). As result, the transformation we expect to happen does
not happen for this test.
This re-enabling patch does not have any source code changes compared to the
original patch rL303730. The only difference is that the failing test is
moved to X86 directory and now has requirement of running on x86 only to comply
with the specified target triple and data layout.
Differential Revision: https://reviews.llvm.org/D33543
llvm-svn: 303971
2017-05-26 14:47:04 +08:00
|
|
|
; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1
|
|
|
|
; CHECK-NEXT: --> ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1)
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %loop_1
|
|
|
|
|
|
|
|
loop_1:
|
|
|
|
%iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]
|
|
|
|
%iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ]
|
|
|
|
br label %loop_1_exit
|
|
|
|
|
|
|
|
dead:
|
|
|
|
br label %loop_1_exit
|
|
|
|
|
|
|
|
loop_1_exit:
|
|
|
|
%tmp5 = icmp sgt i64 %iv.1.1, 2
|
|
|
|
br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch
|
|
|
|
|
|
|
|
loop_1_back_branch:
|
|
|
|
%iv.1.1.next = add nuw nsw i64 %iv.1.1, 1
|
|
|
|
%iv.1.2.next = add nsw i32 %iv.1.2, 1
|
|
|
|
br label %loop_1
|
|
|
|
|
|
|
|
loop_2_preheader:
|
|
|
|
%tmp6 = sub i64 1, %iv.1.1
|
|
|
|
%tmp7 = trunc i64 %tmp6 to i32
|
|
|
|
br label %loop_2
|
|
|
|
|
|
|
|
loop_2:
|
|
|
|
%iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ]
|
|
|
|
%iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ]
|
|
|
|
%iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ]
|
|
|
|
%tmp11 = add i64 %iv.2.2, %iv.2.1
|
|
|
|
%tmp12 = trunc i64 %tmp11 to i32
|
|
|
|
%tmp14 = mul i32 %tmp12, %tmp7
|
|
|
|
%tmp16 = mul i64 %iv.2.1, %iv.1.1
|
|
|
|
%iv.2.3.next = add nuw nsw i64 %iv.2.3, 1
|
|
|
|
%iv.2.2.next = add nsw i64 %iv.2.2, -1
|
|
|
|
%tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1
|
|
|
|
br i1 %tmp17, label %loop_2, label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
%tmp10 = add i32 %iv.1.2, 3
|
|
|
|
ret void
|
|
|
|
}
|
[SCEV] Strengthen variance condition in calculateLoopDisposition
Given loops `L1` and `L2` with AddRecs `AR1` and `AR2` varying in them respectively.
When identifying loop disposition of `AR2` w.r.t. `L1`, we only say that it is varying if
`L1` contains `L2`. But there is also a possible situation where `L1` and `L2` are
consecutive sibling loops within the parent loop. In this case, `AR2` is also varying
w.r.t. `L1`, but we don't correctly identify it.
It can lead, for exaple, to attempt of incorrect folding. Consider:
AR1 = {a,+,b}<L1>
AR2 = {c,+,d}<L2>
EXAR2 = sext(AR1)
MUL = mul AR1, EXAR2
If we incorrectly assume that `EXAR2` is invariant w.r.t. `L1`, we can end up trying to
construct something like: `{a * {c,+,d}<L2>,+,b * {c,+,d}<L2>}<L1>`, which is incorrect
because `AR2` is not available on entrance of `L1`.
Both situations "`L1` contains `L2`" and "`L1` preceeds sibling loop `L2`" can be handled
with one check: "header of `L1` dominates header of `L2`". This patch replaces the old
insufficient check with this one.
Differential Revision: https://reviews.llvm.org/D39453
llvm-svn: 318819
2017-11-22 14:21:39 +08:00
|
|
|
|
|
|
|
define i64 @test_09(i32 %param) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_09
|
|
|
|
; CHECK: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %outer.loop ]
|
|
|
|
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop1>
|
|
|
|
; CHECK: %iv1.trunc = trunc i64 %iv1 to i32
|
|
|
|
; CHECK-NEXT: --> {0,+,1}<%loop1>
|
|
|
|
; CHECK: %iv1.next = add nuw nsw i64 %iv1, 1
|
|
|
|
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop1>
|
|
|
|
; CHECK: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ]
|
|
|
|
; CHECK-NEXT: --> {%param,+,1}<%loop2>
|
|
|
|
; CHECK: %iv2.next = add i32 %iv2, 1
|
|
|
|
; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2>
|
|
|
|
; CHECK: %iv2.ext = sext i32 %iv2.next to i64
|
|
|
|
; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64)
|
|
|
|
; CHECK: %ret = mul i64 %iv1, %iv2.ext
|
|
|
|
; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<nuw><nsw><%loop1>)
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %outer.loop
|
|
|
|
|
|
|
|
outer.loop: ; preds = %loop2.exit, %entry
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1: ; preds = %guarded, %outer.loop
|
|
|
|
%iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %outer.loop ]
|
|
|
|
%iv1.trunc = trunc i64 %iv1 to i32
|
|
|
|
%cond1 = icmp ult i64 %iv1, 100
|
|
|
|
br i1 %cond1, label %guarded, label %deopt
|
|
|
|
|
|
|
|
guarded: ; preds = %loop1
|
|
|
|
%iv1.next = add nuw nsw i64 %iv1, 1
|
|
|
|
%tmp16 = icmp slt i32 %iv1.trunc, 2
|
|
|
|
br i1 %tmp16, label %loop1, label %loop2.preheader
|
|
|
|
|
|
|
|
deopt: ; preds = %loop1
|
|
|
|
unreachable
|
|
|
|
|
|
|
|
loop2.preheader: ; preds = %guarded
|
|
|
|
br label %loop2
|
|
|
|
|
|
|
|
loop2: ; preds = %loop2, %loop2.preheader
|
|
|
|
%iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ]
|
|
|
|
%iv2.next = add i32 %iv2, 1
|
|
|
|
%cond2 = icmp slt i32 %iv2, %iv1.trunc
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit: ; preds = %loop2.exit
|
|
|
|
%iv2.ext = sext i32 %iv2.next to i64
|
|
|
|
%ret = mul i64 %iv1, %iv2.ext
|
|
|
|
ret i64 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test_10(i32 %param) {
|
|
|
|
|
|
|
|
; CHECK-LABEL: Classifying expressions for: @test_10
|
|
|
|
; CHECK: %uncle = phi i64 [ %uncle.outer.next, %uncle.loop.backedge ], [ 0, %outer.loop ]
|
|
|
|
; CHECK-NEXT: --> {0,+,1}<%uncle.loop>
|
|
|
|
; CHECK: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %uncle.loop ]
|
|
|
|
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop1>
|
|
|
|
; CHECK: %iv1.trunc = trunc i64 %iv1 to i32
|
|
|
|
; CHECK-NEXT: --> {0,+,1}<%loop1>
|
|
|
|
; CHECK: %iv1.next = add nuw nsw i64 %iv1, 1
|
|
|
|
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop1>
|
|
|
|
; CHECK: %uncle.outer.next = add i64 %uncle, 1
|
|
|
|
; CHECK-NEXT: --> {1,+,1}<%uncle.loop>
|
|
|
|
; CHECK: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ]
|
|
|
|
; CHECK-NEXT: --> {%param,+,1}<%loop2>
|
|
|
|
; CHECK: %iv2.next = add i32 %iv2, 1
|
|
|
|
; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2>
|
|
|
|
; CHECK: %iv2.ext = sext i32 %iv2.next to i64
|
|
|
|
; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64)
|
|
|
|
; CHECK: %ret = mul i64 %iv1, %iv2.ext
|
|
|
|
; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<nuw><nsw><%loop1>)
|
|
|
|
|
|
|
|
entry:
|
|
|
|
br label %outer.loop
|
|
|
|
|
|
|
|
outer.loop: ; preds = %entry
|
|
|
|
br label %uncle.loop
|
|
|
|
|
|
|
|
uncle.loop: ; preds = %uncle.loop.backedge, %outer.loop
|
|
|
|
%uncle = phi i64 [ %uncle.outer.next, %uncle.loop.backedge ], [ 0, %outer.loop ]
|
|
|
|
br label %loop1
|
|
|
|
|
|
|
|
loop1: ; preds = %guarded, %uncle.loop
|
|
|
|
%iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %uncle.loop ]
|
|
|
|
%iv1.trunc = trunc i64 %iv1 to i32
|
|
|
|
%cond1 = icmp ult i64 %iv1, 100
|
|
|
|
br i1 %cond1, label %guarded, label %deopt
|
|
|
|
|
|
|
|
guarded: ; preds = %loop1
|
|
|
|
%iv1.next = add nuw nsw i64 %iv1, 1
|
|
|
|
%tmp16 = icmp slt i32 %iv1.trunc, 2
|
|
|
|
br i1 %tmp16, label %loop1, label %uncle.loop.backedge
|
|
|
|
|
|
|
|
uncle.loop.backedge: ; preds = %guarded
|
|
|
|
%uncle.outer.next = add i64 %uncle, 1
|
|
|
|
%cond.uncle = icmp ult i64 %uncle, 120
|
|
|
|
br i1 %cond.uncle, label %loop2.preheader, label %uncle.loop
|
|
|
|
|
|
|
|
deopt: ; preds = %loop1
|
|
|
|
unreachable
|
|
|
|
|
|
|
|
loop2.preheader: ; preds = %uncle.loop.backedge
|
|
|
|
br label %loop2
|
|
|
|
|
|
|
|
loop2: ; preds = %loop2, %loop2.preheader
|
|
|
|
%iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ]
|
|
|
|
%iv2.next = add i32 %iv2, 1
|
|
|
|
%cond2 = icmp slt i32 %iv2, %iv1.trunc
|
|
|
|
br i1 %cond2, label %loop2, label %exit
|
|
|
|
|
|
|
|
exit: ; preds = %loop2
|
|
|
|
%iv2.ext = sext i32 %iv2.next to i64
|
|
|
|
%ret = mul i64 %iv1, %iv2.ext
|
|
|
|
ret i64 %ret
|
|
|
|
}
|