2019-04-17 12:52:47 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
|
|
; RUN: opt < %s -chr -instcombine -simplifycfg -S | FileCheck %s
|
|
|
|
; RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine,simplify-cfg)' -S | FileCheck %s
|
|
|
|
|
|
|
|
declare void @foo()
|
|
|
|
declare void @bar()
|
|
|
|
|
|
|
|
; Simple case.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 3) != 0) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
define void @test_chr_1(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_1(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Simple case with a cold block.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) == 0) // Likely false
|
|
|
|
; bar()
|
|
|
|
; if ((t0 & 4) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 7) == 7) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) == 0)
|
|
|
|
; bar()
|
|
|
|
; if ((t0 & 4) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
define void @test_chr_1_1(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_1_1(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 7
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 7
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB5:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB2_NONCHR:%.*]], label [[BB3_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @bar()
|
|
|
|
; CHECK-NEXT: br label [[BB3_NONCHR]]
|
|
|
|
; CHECK: bb3.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 4
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb4.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB5]]
|
|
|
|
; CHECK: bb5:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb2, label %bb3, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @bar()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%5 = and i32 %0, 4
|
|
|
|
%6 = icmp eq i32 %5, 0
|
|
|
|
br i1 %6, label %bb5, label %bb4, !prof !15
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb5
|
|
|
|
|
|
|
|
bb5:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; With an aggregate bit check.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 255) != 0) // Likely true
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 3) != 0) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else if ((t0 & 255) != 0)
|
|
|
|
; if ((t0 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
define void @test_chr_2(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_2(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB4:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 255
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB4]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB2_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb3.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB4]]
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB2_NONCHR]]
|
|
|
|
; CHECK: bb4:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 255
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb4, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%3 = and i32 %0, 1
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%5 = and i32 %0, 2
|
|
|
|
%6 = icmp eq i32 %5, 0
|
|
|
|
br i1 %6, label %bb4, label %bb3, !prof !15
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb4
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Split case.
|
|
|
|
; Roughly,
|
|
|
|
; t1 = *i
|
|
|
|
; if ((t1 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t1 & 2) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; t2 = *i
|
|
|
|
; if ((t2 & 4) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t2 & 8) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; t1 = *i
|
|
|
|
; if ((t1 & 3) != 0) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t1 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t1 & 2) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; t2 = *i
|
|
|
|
; if ((t2 & 12) != 0) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t2 & 4) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t2 & 8) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
define void @test_chr_3(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_3(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 12
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 12
|
|
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb4:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB7:%.*]]
|
|
|
|
; CHECK: bb3.split.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP6]], 4
|
|
|
|
; CHECK-NEXT: [[DOTNOT1:%.*]] = icmp eq i32 [[TMP9]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT1]], label [[BB5_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb4.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB5_NONCHR]]
|
|
|
|
; CHECK: bb5.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP6]], 8
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb6.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB7]]
|
|
|
|
; CHECK: bb7:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%5 = load i32, i32* %i
|
|
|
|
%6 = and i32 %5, 4
|
|
|
|
%7 = icmp eq i32 %6, 0
|
|
|
|
br i1 %7, label %bb5, label %bb4, !prof !15
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb5
|
|
|
|
|
|
|
|
bb5:
|
|
|
|
%8 = and i32 %5, 8
|
|
|
|
%9 = icmp eq i32 %8, 0
|
|
|
|
br i1 %9, label %bb7, label %bb6, !prof !15
|
|
|
|
|
|
|
|
bb6:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb7
|
|
|
|
|
|
|
|
bb7:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
|
|
|
|
; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) // Likely false
|
|
|
|
; return sum2
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 3) == 3)
|
|
|
|
; return sum0 + 85
|
|
|
|
; else {
|
|
|
|
; sum1 = (t0 & 1) ? sum0 : (sum0 + 42)
|
|
|
|
; sum2 = (t0 & 2) ? sum1 : (sum1 + 43)
|
|
|
|
; return sum2
|
|
|
|
; }
|
|
|
|
define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_4(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: entry.split:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
|
|
|
|
; CHECK-NEXT: ret i32 [[TMP3]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 42
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
|
|
|
|
; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[DOTNOT]], i32 [[SUM0]], i32 [[TMP4]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1_NONCHR]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM1_NONCHR]], i32 [[TMP8]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM2_NONCHR]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
%3 = add i32 %sum0, 42
|
|
|
|
%sum1 = select i1 %2, i32 %sum0, i32 %3, !prof !15
|
|
|
|
%4 = and i32 %0, 2
|
|
|
|
%5 = icmp eq i32 %4, 0
|
|
|
|
%6 = add i32 %sum1, 43
|
|
|
|
%sum2 = select i1 %5, i32 %sum1, i32 %6, !prof !15
|
|
|
|
ret i32 %sum2
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + Brs
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 255) != 0) { // Likely true
|
|
|
|
; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
|
|
|
|
; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
|
|
|
|
; if ((t0 & 4) != 0) { // Likely true
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 15) != 15) { // Likely true
|
|
|
|
; sum = sum0 + 173
|
|
|
|
; } else if ((t0 & 255) != 0) {
|
|
|
|
; sum = (t0 & 1) ? sum0 : (sum0 + 42)
|
|
|
|
; sum = (t0 & 2) ? sum : (sum + 43)
|
|
|
|
; if ((t0 & 4) != 0) {
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (t0 & 8) ? sum3 : (sum3 + 44)
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_5(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 15
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM0]], 42
|
|
|
|
; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM0]], i32 [[TMP8]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[SUM1_NONCHR]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP10]], i32 [[SUM1_NONCHR]], i32 [[TMP11]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP0]], 4
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
|
|
|
|
; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP0]], 8
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
|
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP15]], i32 44, i32 88
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP13]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM6]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 255
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%3 = and i32 %0, 1
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
%5 = add i32 %sum0, 42
|
|
|
|
%sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
|
|
|
|
%6 = and i32 %0, 2
|
|
|
|
%7 = icmp eq i32 %6, 0
|
|
|
|
%8 = add i32 %sum1, 43
|
|
|
|
%sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
|
|
|
|
%9 = and i32 %0, 4
|
|
|
|
%10 = icmp eq i32 %9, 0
|
|
|
|
br i1 %10, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
%11 = and i32 %0, 8
|
|
|
|
%12 = icmp eq i32 %11, 0
|
|
|
|
%13 = add i32 %sum3, 44
|
|
|
|
%sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
|
|
|
|
ret i32 %sum6
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + Brs with a scope split in the middle
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 255) != 0) { // Likely true
|
|
|
|
; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
|
|
|
|
; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
|
|
|
|
; if ((sum0 & 4) != 0) { // Likely true. The condition doesn't use v.
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((sum0 & 4) != 0 & (t0 & 11) != 11) { // Likely true
|
|
|
|
; sum = sum0 + 173
|
|
|
|
; } else if ((t0 & 255) != 0) {
|
|
|
|
; sum = (t0 & 1) ? sum0 : (sum0 + 42)
|
|
|
|
; sum = (t0 & 2) ? sum : (sum + 43)
|
|
|
|
; if ((sum0 & 4) != 0) {
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (t0 & 8) ? sum3 : (sum3 + 44)
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_5_1(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 11
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SUM0]], 85
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM0]], 173
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP0]], 255
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP8]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[SUM0]], 42
|
|
|
|
; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP10]], i32 [[SUM0]], i32 [[TMP11]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
|
|
|
|
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM1_NONCHR]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP13]], i32 [[SUM1_NONCHR]], i32 [[TMP14]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[SUM0]], 4
|
|
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
|
|
|
|
; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP0]], 8
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP17]], 0
|
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP18]], i32 44, i32 88
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP16]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM6]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 255
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%3 = and i32 %0, 1
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
%5 = add i32 %sum0, 42
|
|
|
|
%sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
|
|
|
|
%6 = and i32 %0, 2
|
|
|
|
%7 = icmp eq i32 %6, 0
|
|
|
|
%8 = add i32 %sum1, 43
|
|
|
|
%sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
|
|
|
|
%9 = and i32 %sum0, 4 ; Split
|
|
|
|
%10 = icmp eq i32 %9, 0
|
|
|
|
br i1 %10, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
%11 = and i32 %0, 8
|
|
|
|
%12 = icmp eq i32 %11, 0
|
|
|
|
%13 = add i32 %sum3, 44
|
|
|
|
%sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
|
|
|
|
ret i32 %sum6
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + Brs, non-matching bases
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; j0 = *j
|
|
|
|
; if ((i0 & 255) != 0) { // Likely true
|
|
|
|
; sum = (i0 & 2) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; if ((j0 & 4) != 0) { // Likely true. The condition uses j0, not i0.
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (i0 & 8) ? sum3 : (sum3 + 44) // Likely false
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
; ->
|
|
|
|
; i0 = *i
|
|
|
|
; j0 = *j
|
|
|
|
; if ((j0 & 4) != 0 & (i0 & 10) != 10) { // Likely true
|
|
|
|
; sum = sum0 + 131
|
|
|
|
; } else if ((i0 & 255) != 0) {
|
|
|
|
; sum = (i0 & 2) ? sum0 : (sum0 + 43)
|
|
|
|
; if ((j0 & 4) != 0) {
|
|
|
|
; sum3 = sum + 44
|
|
|
|
; sum = (i0 & 8) ? sum3 : (sum3 + 44)
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_6(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
|
|
|
|
; CHECK-NEXT: [[V10:%.*]] = icmp ne i32 [[V9]], 0
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 10
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]]
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
|
|
|
|
; CHECK-NEXT: [[V13:%.*]] = add i32 [[SUM0]], 131
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[V1:%.*]] = and i32 [[I0]], 255
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[V2_NOT:%.*]] = icmp eq i32 [[V1]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V2_NOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i32 [[I0]], 2
|
|
|
|
; CHECK-NEXT: [[V4_NONCHR:%.*]] = icmp eq i32 [[V3_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: [[V8_NONCHR:%.*]] = add i32 [[SUM0]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16
|
|
|
|
; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4
|
|
|
|
; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8
|
|
|
|
; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
|
[SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB cost
Summary:
Previously, if the threshold was 2, we were willing to speculatively
execute 2 cheap instructions in both basic blocks (thus we were willing
to speculatively execute cost = 4), but weren't willing to speculate
when one BB had 3 instructions and other one had no instructions,
even thought that would have total cost of 3.
This looks inconsistent to me.
I don't think `cmov`-like instructions will start executing
until both of it's inputs are available: https://godbolt.org/z/zgHePf
So i don't see why the existing behavior is the correct one.
Also, let's add it's own `cl::opt` for this threshold,
with default=4, so it is not stricter than the previous threshold:
will allow to fold when there are 2 BB's each with cost=2.
And since the logic has changed, it will also allow to fold when
one BB has cost=3 and other cost=1, or there is only one BB with cost=4.
This is an alternative solution to D65148:
This fix is mainly motivated by `signbit-like-value-extension.ll` test.
That pattern comes up in JPEG decoding, see e.g.
`Figure F.12 – Extending the sign bit of a decoded value in V`
of `ITU T.81` (JPEG specification).
That branch is not predictable, and it is within the innermost loop,
so the fact that that pattern ends up being stuck with a branch
instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial.
This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240)
| metric | old | new | delta | % |
| x86-mi-counting.NumMachineFunctions | 37720 | 37721 | 1 | 0.00% |
| x86-mi-counting.NumMachineBasicBlocks | 773545 | 771181 | -2364 | -0.31% |
| x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% |
| x86-mi-counting.NumUncondBR | 135770 | 135543 | -227 | -0.17% |
| x86-mi-counting.NumCondBR | 423753 | 422187 | -1566 | -0.37% |
| x86-mi-counting.NumCMOV | 24815 | 25731 | 916 | 3.69% |
| x86-mi-counting.NumVecBlend | 17 | 17 | 0 | 0.00% |
We significantly decrease basic block count, notably decrease instruction count,
significantly decrease branch count and very significantly increase `cmov` count.
Performance-wise, unsurprisingly, this has great effect on
target RawSpeed benchmark. I'm seeing 5 **major** improvements:
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean -0.3064 -0.3064 226.9913 157.4452 226.9800 157.4384
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median -0.3057 -0.3057 226.8407 157.4926 226.8282 157.4828
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev -0.4985 -0.4954 0.3051 0.1530 0.3040 0.1534
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean -0.1747 -0.1747 80.4787 66.4227 80.4771 66.4146
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median -0.1742 -0.1743 80.4686 66.4542 80.4690 66.4436
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev +0.6089 +0.5797 0.0670 0.1078 0.0673 0.1062
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean -0.1598 -0.1598 171.6996 144.2575 171.6915 144.2538
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median -0.1598 -0.1597 171.7109 144.2755 171.7018 144.2766
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev +0.4024 +0.3850 0.0847 0.1187 0.0848 0.1175
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean -0.0550 -0.0551 280.3046 264.8800 280.3017 264.8559
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median -0.0554 -0.0554 280.2628 264.7360 280.2574 264.7297
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev +0.7005 +0.7041 0.2779 0.4725 0.2775 0.4729
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean -0.0354 -0.0355 316.7396 305.5208 316.7342 305.4890
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median -0.0354 -0.0356 316.6969 305.4798 316.6917 305.4324
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev +0.0493 +0.0330 0.3562 0.3737 0.3563 0.3681
```
That being said, it's always best-effort, so there will likely
be cases where this worsens things.
Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc
Reviewed By: jmolloy
Subscribers: xbolva00, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67318
llvm-svn: 372009
2019-09-17 00:18:24 +08:00
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
|
[SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB cost
Summary:
Previously, if the threshold was 2, we were willing to speculatively
execute 2 cheap instructions in both basic blocks (thus we were willing
to speculatively execute cost = 4), but weren't willing to speculate
when one BB had 3 instructions and other one had no instructions,
even thought that would have total cost of 3.
This looks inconsistent to me.
I don't think `cmov`-like instructions will start executing
until both of it's inputs are available: https://godbolt.org/z/zgHePf
So i don't see why the existing behavior is the correct one.
Also, let's add it's own `cl::opt` for this threshold,
with default=4, so it is not stricter than the previous threshold:
will allow to fold when there are 2 BB's each with cost=2.
And since the logic has changed, it will also allow to fold when
one BB has cost=3 and other cost=1, or there is only one BB with cost=4.
This is an alternative solution to D65148:
This fix is mainly motivated by `signbit-like-value-extension.ll` test.
That pattern comes up in JPEG decoding, see e.g.
`Figure F.12 – Extending the sign bit of a decoded value in V`
of `ITU T.81` (JPEG specification).
That branch is not predictable, and it is within the innermost loop,
so the fact that that pattern ends up being stuck with a branch
instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial.
This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240)
| metric | old | new | delta | % |
| x86-mi-counting.NumMachineFunctions | 37720 | 37721 | 1 | 0.00% |
| x86-mi-counting.NumMachineBasicBlocks | 773545 | 771181 | -2364 | -0.31% |
| x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% |
| x86-mi-counting.NumUncondBR | 135770 | 135543 | -227 | -0.17% |
| x86-mi-counting.NumCondBR | 423753 | 422187 | -1566 | -0.37% |
| x86-mi-counting.NumCMOV | 24815 | 25731 | 916 | 3.69% |
| x86-mi-counting.NumVecBlend | 17 | 17 | 0 | 0.00% |
We significantly decrease basic block count, notably decrease instruction count,
significantly decrease branch count and very significantly increase `cmov` count.
Performance-wise, unsurprisingly, this has great effect on
target RawSpeed benchmark. I'm seeing 5 **major** improvements:
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean -0.3064 -0.3064 226.9913 157.4452 226.9800 157.4384
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median -0.3057 -0.3057 226.8407 157.4926 226.8282 157.4828
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev -0.4985 -0.4954 0.3051 0.1530 0.3040 0.1534
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean -0.1747 -0.1747 80.4787 66.4227 80.4771 66.4146
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median -0.1742 -0.1743 80.4686 66.4542 80.4690 66.4436
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev +0.6089 +0.5797 0.0670 0.1078 0.0673 0.1062
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean -0.1598 -0.1598 171.6996 144.2575 171.6915 144.2538
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median -0.1598 -0.1597 171.7109 144.2755 171.7018 144.2766
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev +0.4024 +0.3850 0.0847 0.1187 0.0848 0.1175
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean -0.0550 -0.0551 280.3046 264.8800 280.3017 264.8559
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median -0.0554 -0.0554 280.2628 264.7360 280.2574 264.7297
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev +0.7005 +0.7041 0.2779 0.4725 0.2775 0.4729
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean -0.0354 -0.0355 316.7396 305.5208 316.7342 305.4890
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median -0.0354 -0.0356 316.6969 305.4798 316.6917 305.4324
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev +0.0493 +0.0330 0.3562 0.3737 0.3563 0.3681
```
That being said, it's always best-effort, so there will likely
be cases where this worsens things.
Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc
Reviewed By: jmolloy
Subscribers: xbolva00, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67318
llvm-svn: 372009
2019-09-17 00:18:24 +08:00
|
|
|
; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[V10_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM6]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%j0 = load i32, i32* %j
|
|
|
|
%v1 = and i32 %i0, 255
|
|
|
|
%v2 = icmp eq i32 %v1, 0
|
|
|
|
br i1 %v2, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%v3 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v3, 0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
%v9 = and i32 %j0, 4
|
|
|
|
%v10 = icmp eq i32 %v9, 0
|
|
|
|
br i1 %v10, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
%v11 = and i32 %i0, 8
|
|
|
|
%v12 = icmp eq i32 %v11, 0
|
|
|
|
%v13 = add i32 %sum3, 44
|
|
|
|
%sum4 = select i1 %v12, i32 %sum3, i32 %v13, !prof !15
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
|
|
|
|
ret i32 %sum6
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + Brs, the branch condition can't be hoisted to be merged with a
|
|
|
|
; select. No CHR happens.
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; foo();
|
|
|
|
; j0 = *j
|
|
|
|
; if ((j0 & 4) != 0) { // Likely true
|
|
|
|
; foo();
|
|
|
|
; sum = sum + 44
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
; ->
|
|
|
|
; (no change)
|
|
|
|
define i32 @test_chr_7(i32* %i, i32* %j, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_7(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
|
|
|
|
; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
|
|
|
|
; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V10]], label [[BB2:%.*]], label [[BB1:%.*]], !prof !16
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[SUM4:%.*]] = add i32 [[SUM2]], 44
|
|
|
|
; CHECK-NEXT: br label [[BB2]]
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ [[SUM2]], [[ENTRY:%.*]] ], [ [[SUM4]], [[BB1]] ]
|
|
|
|
; CHECK-NEXT: ret i32 [[SUM5]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%v3 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v3, 0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
call void @foo()
|
|
|
|
%j0 = load i32, i32* %j
|
|
|
|
%v9 = and i32 %j0, 4
|
|
|
|
%v10 = icmp eq i32 %v9, 0
|
|
|
|
br i1 %v10, label %bb2, label %bb1, !prof !15 ; %v10 can't be hoisted above the above select
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
call void @foo()
|
|
|
|
%sum4 = add i32 %sum2, 44
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %entry ], [ %sum4, %bb1 ]
|
|
|
|
ret i32 %sum5
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + Brs, the branch condition can't be hoisted to be merged with the
|
|
|
|
; selects. Dropping the select.
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; foo();
|
|
|
|
; j0 = *j
|
|
|
|
; if ((j0 & 4) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((j0 & 8) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; return sum
|
|
|
|
; ->
|
|
|
|
; i0 = *i
|
|
|
|
; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; foo();
|
|
|
|
; j0 = *j
|
|
|
|
; if ((j0 & 12) != 12) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((j0 & 4) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((j0 & 8) != 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; return sum
|
|
|
|
define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_7_1(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 12
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[V10_NOT:%.*]] = icmp eq i32 [[V9]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V10_NOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[J0]], 8
|
|
|
|
; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-05-22 15:32:21 +08:00
|
|
|
; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
|
|
|
|
; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM2]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%v3 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v3, 0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
call void @foo()
|
|
|
|
%j0 = load i32, i32* %j
|
|
|
|
%v9 = and i32 %j0, 4
|
|
|
|
%v10 = icmp eq i32 %v9, 0
|
|
|
|
br i1 %v10, label %bb1, label %bb0, !prof !15 ; %v10 can't be hoisted above the above select
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%v11 = and i32 %j0, 8
|
|
|
|
%v12 = icmp eq i32 %v11, 0
|
|
|
|
br i1 %v12, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i32 %sum2
|
|
|
|
}
|
|
|
|
|
|
|
|
; Branches aren't biased enough. No CHR happens.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Not biased
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0) // Not biased
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; (no change)
|
|
|
|
define void @test_chr_8(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_8(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !17
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !17
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !16
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !16
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; With an existing phi at the exit.
|
|
|
|
; Roughly,
|
|
|
|
; t = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0) { // Likely true
|
|
|
|
; t = *j
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; // There's a phi for t here.
|
|
|
|
; return t
|
|
|
|
; ->
|
|
|
|
; t = *i
|
|
|
|
; if ((t & 3) == 3) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; t = *j
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t & 2) != 0) {
|
|
|
|
; t = *j
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; // There's a phi for t here.
|
|
|
|
; return t
|
|
|
|
define i32 @test_chr_9(i32* %i, i32* %j) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_9(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[J]], align 4
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP7]], [[BB2_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: ret i32 [[TMP8]]
|
2019-04-17 12:52:47 +08:00
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%5 = load i32, i32* %j
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%6 = phi i32 [ %0, %bb1 ], [ %5, %bb2 ]
|
|
|
|
ret i32 %6
|
|
|
|
}
|
|
|
|
|
|
|
|
; With no phi at the exit, but the exit needs a phi inserted after CHR.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; t1 = *j
|
|
|
|
; if ((t1 & 2) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; return (t1 * 42) - (t1 - 99)
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 3) == 3) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; t1 = *j
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((t0 & 2) != 0) {
|
|
|
|
; t1 = *j
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; // A new phi for t1 is inserted here.
|
|
|
|
; return (t1 * 42) - (t1 - 99)
|
|
|
|
define i32 @test_chr_10(i32* %i, i32* %j) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_10(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[J]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP5]], [[BB2_NONCHR]] ], [ [[TMP5]], [[BB1_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 42
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], -99
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], [[TMP10]]
|
|
|
|
; CHECK-NEXT: ret i32 [[TMP11]]
|
2019-04-17 12:52:47 +08:00
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = load i32, i32* %j
|
|
|
|
%4 = and i32 %0, 2
|
|
|
|
%5 = icmp eq i32 %4, 0
|
|
|
|
br i1 %5, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%6 = mul i32 %3, 42
|
|
|
|
%7 = sub i32 %3, 99
|
|
|
|
%8 = add i32 %6, %7
|
|
|
|
ret i32 %8
|
|
|
|
}
|
|
|
|
|
|
|
|
; Test a case where there are two use-def chain paths to the same value (t0)
|
|
|
|
; from the branch condition. This is a regression test for an old bug that
|
|
|
|
; caused a bad hoisting that moves (hoists) a value (%conv) twice to the end of
|
|
|
|
; the %entry block (once for %div and once for %mul16) and put a use ahead of
|
|
|
|
; its definition like:
|
|
|
|
; %entry:
|
|
|
|
; ...
|
|
|
|
; %div = fdiv double 1.000000e+00, %conv
|
|
|
|
; %conv = sitofp i32 %0 to double
|
|
|
|
; %mul16 = fmul double %div, %conv
|
|
|
|
;
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; // there are two use-def paths from the branch condition to t0.
|
|
|
|
; if ((1.0 / t0) * t0 < 1) // Likely true
|
|
|
|
; foo()
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0 & (1.0 / t0) * t0 > 0) { // Likely true
|
|
|
|
; foo()
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 1) != 0)
|
|
|
|
; foo()
|
|
|
|
; if ((1.0 / t0) * t0 < 1) // Likely true
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
define void @test_chr_11(i32* %i, i32 %x) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_11(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
|
|
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
|
|
; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[CONV]]
|
|
|
|
; CHECK-NEXT: [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]]
|
|
|
|
; CHECK-NEXT: [[CONV717:%.*]] = fptosi double [[MUL16]] to i32
|
|
|
|
; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]]
|
|
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !18
|
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[CONV_NONCHR:%.*]] = sitofp i32 [[TMP0]] to double
|
|
|
|
; CHECK-NEXT: [[DIV_NONCHR:%.*]] = fdiv double 1.000000e+00, [[CONV_NONCHR]]
|
|
|
|
; CHECK-NEXT: [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]]
|
|
|
|
; CHECK-NEXT: [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32
|
|
|
|
; CHECK-NEXT: [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1
|
|
|
|
; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%conv = sitofp i32 %0 to double
|
|
|
|
%div = fdiv double 1.000000e+00, %conv
|
|
|
|
%mul16 = fmul double %div, %conv
|
|
|
|
%conv717 = fptosi double %mul16 to i32
|
|
|
|
%cmp18 = icmp slt i32 %conv717, 1
|
|
|
|
br i1 %cmp18, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects + unrelated br only
|
|
|
|
define i32 @test_chr_12(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_12(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 255
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB3:%.*]], label [[BB0:%.*]], !prof !16
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SUM0:%.*]], 42
|
|
|
|
; CHECK-NEXT: [[SUM1:%.*]] = select i1 [[TMP4]], i32 [[SUM0]], i32 [[TMP5]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[TMP7]], i32 [[SUM1]], i32 [[TMP8]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP0]], 8
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP10]], [[TMP12]]
|
|
|
|
; CHECK-NEXT: br i1 [[TMP13]], label [[BB1:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM2]], 88
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb0.split.nonchr:
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[BB1_NONCHR:%.*]], label [[BB3]], !prof !18
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8
|
|
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
|
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
|
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2]], [[SUM4_NONCHR_V]]
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[SUM0]], [[ENTRY:%.*]] ], [ [[TMP14]], [[BB1]] ], [ [[SUM2]], [[BB0_SPLIT_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: ret i32 [[SUM6]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 255
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%3 = and i32 %0, 1
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
%5 = add i32 %sum0, 42
|
|
|
|
%sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
|
|
|
|
%6 = and i32 %0, 2
|
|
|
|
%7 = icmp eq i32 %6, 0
|
|
|
|
%8 = add i32 %sum1, 43
|
|
|
|
%sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
|
|
|
|
%9 = load i32, i32* %i
|
|
|
|
%10 = icmp eq i32 %9, 0
|
|
|
|
br i1 %10, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
%11 = and i32 %0, 8
|
|
|
|
%12 = icmp eq i32 %11, 0
|
|
|
|
%13 = add i32 %sum3, 44
|
|
|
|
%sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
|
|
|
|
ret i32 %sum6
|
|
|
|
}
|
|
|
|
|
|
|
|
; In the second CHR, a condition value depends on a trivial phi that's inserted
|
|
|
|
; by the first CHR.
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; v2 = (z != 1) ? pred : true // Likely false
|
|
|
|
; if (z == 0 & pred) // Likely false
|
|
|
|
; foo()
|
|
|
|
; j0 = *j
|
|
|
|
; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; sum3 = ((i0 == j0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; foo()
|
|
|
|
; if ((i0 & 4) == 0) // Unbiased
|
|
|
|
; foo()
|
|
|
|
; return i0 + sum3
|
|
|
|
; ->
|
|
|
|
; i0 = *i
|
|
|
|
; if (z != 1 & (z == 0 & pred)) // First CHR
|
|
|
|
; foo()
|
|
|
|
; // A trivial phi for i0 is inserted here by the first CHR (which gets removed
|
|
|
|
; // later) and the subsequent branch condition (for the second CHR) uses it.
|
|
|
|
; j0 = *j
|
|
|
|
; if ((i0 & 2) != j0 & i0 != j0) { // Second CHR
|
|
|
|
; sum3 = sum0 + 43
|
|
|
|
; foo()
|
|
|
|
; if (i0 & 4) == 0)
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; sum3 = (i0 == j0) ? sum0 : (sum0 + 43)
|
|
|
|
; foo()
|
|
|
|
; if (i0 & 4) == 0)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; return i0 + sum3
|
|
|
|
define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_14(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
|
|
|
|
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
|
|
|
|
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
|
|
|
|
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
|
|
|
|
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
|
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2
|
|
|
|
; CHECK-NEXT: [[V4:%.*]] = icmp ne i32 [[V6]], [[J0]]
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
|
|
|
|
; CHECK-NEXT: [[V5:%.*]] = icmp ne i32 [[I0]], [[J0]]
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[V4]], [[V5]]
|
|
|
|
; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_SPLIT:%.*]], label [[BB1_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb1.split:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
|
|
|
|
; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb1.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[I0]], [[J0]]
|
|
|
|
; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM0]], i32 [[V8]], !prof !16
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[I0]], 4
|
|
|
|
; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]]
|
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[V8]], [[BB2]] ], [ [[V8]], [[BB1_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB2_NONCHR]] ], [ [[SUM3_NONCHR]], [[BB1_SPLIT_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[TMP1]]
|
|
|
|
; CHECK-NEXT: ret i32 [[V11]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%v0 = icmp eq i32 %z, 0
|
|
|
|
%v1 = icmp ne i32 %z, 1
|
|
|
|
%v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
|
|
|
|
%v3 = and i1 %v0, %pred
|
|
|
|
br i1 %v3, label %bb0, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%j0 = load i32, i32* %j
|
|
|
|
%v6 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v6, %j0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
%v5 = icmp eq i32 %i0, %j0
|
|
|
|
%sum3 = select i1 %v5, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
call void @foo()
|
|
|
|
%v9 = and i32 %i0, 4
|
|
|
|
%v10 = icmp eq i32 %v9, 0
|
|
|
|
br i1 %v10, label %bb3, label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%v11 = add i32 %i0, %sum3
|
|
|
|
ret i32 %v11
|
|
|
|
}
|
|
|
|
|
|
|
|
; Branch or selects depends on another select. No CHR happens.
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; if (z == 0 & ((z != 1) ? pred : true)) { // Likely false
|
|
|
|
; foo()
|
|
|
|
; j0 = *j
|
|
|
|
; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; sum3 = (i0 == sum2) ? sum2 : (sum0 + 43) // Likely false. This depends on the
|
|
|
|
; // previous select.
|
|
|
|
; foo()
|
|
|
|
; if ((i0 & 4) == 0) // Unbiased
|
|
|
|
; foo()
|
|
|
|
; return i0 + sum3
|
|
|
|
; ->
|
|
|
|
; (no change)
|
|
|
|
define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_15(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z:%.*]], 0
|
|
|
|
; CHECK-NEXT: [[V3:%.*]] = and i1 [[V0]], [[PRED:%.*]]
|
|
|
|
; CHECK-NEXT: br i1 [[V3]], label [[BB0:%.*]], label [[BB1:%.*]], !prof !16
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2
|
|
|
|
; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
|
|
|
|
; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
|
|
|
|
; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-05-22 15:32:21 +08:00
|
|
|
; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
|
|
|
|
; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]]
|
|
|
|
; CHECK-NEXT: ret i32 [[V11]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%v0 = icmp eq i32 %z, 0
|
|
|
|
%v1 = icmp ne i32 %z, 1
|
|
|
|
%v2 = select i1 %v1, i1 %pred, i1 true, !prof !15
|
|
|
|
%v3 = and i1 %v0, %v2
|
|
|
|
br i1 %v3, label %bb0, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%j0 = load i32, i32* %j
|
|
|
|
%v6 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v6, %j0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
%v5 = icmp eq i32 %i0, %sum2
|
|
|
|
%sum3 = select i1 %v5, i32 %sum2, i32 %v8, !prof !15
|
|
|
|
call void @foo()
|
|
|
|
%v9 = and i32 %i0, 4
|
|
|
|
%v10 = icmp eq i32 %v9, 0
|
|
|
|
br i1 %v10, label %bb3, label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%v11 = add i32 %i0, %sum3
|
|
|
|
ret i32 %v11
|
|
|
|
}
|
|
|
|
|
|
|
|
; With an existing phi at the exit but a value (%v40) is both alive and is an
|
|
|
|
; operand to a phi at the exit block.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; v40 = t0 + 44
|
|
|
|
; if ((t0 & 2) != 0) // Likely true
|
|
|
|
; v41 = t0 + 99
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; v42 = phi v40, v41
|
|
|
|
; return v42 + v40
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 3) == 3) // Likely true
|
|
|
|
; foo()
|
|
|
|
; v40 = t0 + 44
|
|
|
|
; v41 = t0 + 99
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 1) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; v40_nc = t0 + 44
|
|
|
|
; if ((t0 & 2) != 0) // Likely true
|
|
|
|
; v41_nc = t0 + 99
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; t7 = phi v40, v40_nc
|
|
|
|
; v42 = phi v41, v41_nc
|
|
|
|
; v43 = v42 + t7
|
|
|
|
; return v43
|
|
|
|
define i32 @test_chr_16(i32* %i) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_16(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[V40:%.*]] = add i32 [[TMP0]], 44
|
|
|
|
; CHECK-NEXT: [[V41:%.*]] = add i32 [[TMP0]], 99
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1_NONCHR]]
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[V40_NONCHR:%.*]] = add i32 [[TMP0]], 44
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: [[V41_NONCHR:%.*]] = add i32 [[TMP0]], 99
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[V40]], [[BB0]] ], [ [[V40_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[V42:%.*]] = phi i32 [ [[V41]], [[BB0]] ], [ [[V41_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ]
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[V43:%.*]] = add i32 [[V42]], [[TMP6]]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[V43]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%v40 = add i32 %0, 44
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%v41 = add i32 %0, 99
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%v42 = phi i32 [ %v41, %bb2 ], [ %v40, %bb1 ]
|
|
|
|
%v43 = add i32 %v42, %v40
|
|
|
|
ret i32 %v43
|
|
|
|
}
|
|
|
|
|
|
|
|
; Two consecutive regions have an entry in the middle of them. No CHR happens.
|
|
|
|
; Roughly,
|
|
|
|
; if ((i & 4) == 0) {
|
|
|
|
; if (!j)
|
|
|
|
; goto bb1
|
|
|
|
; } else {
|
|
|
|
; t0 = (i & 1)
|
|
|
|
; if (t0 != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; s = (i & 1) + i
|
|
|
|
; }
|
|
|
|
; bb1:
|
|
|
|
; p = phi i, t0, s
|
|
|
|
; if ((i & 2) != 0) // Likely true
|
|
|
|
; foo()
|
|
|
|
; q = p + 2
|
|
|
|
; }
|
|
|
|
; r = phi p, q, i
|
|
|
|
; return r
|
|
|
|
; ->
|
|
|
|
; (no change)
|
|
|
|
define i32 @test_chr_17(i32 %i, i1 %j) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_17(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[V0:%.*]] = and i32 [[I:%.*]], 4
|
|
|
|
; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[V1]], label [[BBE:%.*]], label [[BBQ:%.*]]
|
|
|
|
; CHECK: bbq:
|
|
|
|
; CHECK-NEXT: br i1 [[J:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
|
|
|
|
; CHECK: bbe:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I]], 1
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[BB1]], label [[BB0:%.*]], !prof !16
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[S:%.*]] = add i32 [[TMP0]], [[I]]
|
|
|
|
; CHECK-NEXT: br label [[BB1]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[I]], [[BBQ]] ], [ [[TMP0]], [[BBE]] ], [ [[S]], [[BB0]] ]
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[I]], 2
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[BB3]], label [[BB2:%.*]], !prof !16
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: [[Q:%.*]] = add i32 [[P]], [[TMP2]]
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[P]], [[BB1]] ], [ [[Q]], [[BB2]] ], [ [[I]], [[BBQ]] ]
|
|
|
|
; CHECK-NEXT: ret i32 [[R]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%v0 = and i32 %i, 4
|
|
|
|
%v1 = icmp eq i32 %v0, 0
|
|
|
|
br i1 %v1, label %bbe, label %bbq
|
|
|
|
|
|
|
|
bbq:
|
|
|
|
br i1 %j, label %bb3, label %bb1
|
|
|
|
|
|
|
|
bbe:
|
|
|
|
%0 = and i32 %i, 1
|
|
|
|
%1 = icmp eq i32 %0, 0
|
|
|
|
br i1 %1, label %bb1, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
%s = add i32 %0, %i
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%p = phi i32 [ %i, %bbq ], [ %0, %bbe ], [ %s, %bb0 ]
|
|
|
|
%2 = and i32 %i, 2
|
|
|
|
%3 = icmp eq i32 %2, 0
|
|
|
|
br i1 %3, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
%q = add i32 %p, %2
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%r = phi i32 [ %p, %bb1 ], [ %q, %bb2 ], [ %i, %bbq ]
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; Select + br, there's a loop and we need to update the user of an inserted phi
|
|
|
|
; at the entry block. This is a regression test for a bug that's fixed.
|
|
|
|
; Roughly,
|
|
|
|
; do {
|
|
|
|
; inc1 = phi inc2, 0
|
|
|
|
; li = *i
|
|
|
|
; sum1 = sum0 + 42
|
|
|
|
; sum2 = ((li & 1) == 0) ? sum0 : sum1 // Likely false
|
|
|
|
; inc2 = inc1 + 1
|
|
|
|
; if ((li & 4) != 0) // Likely true
|
|
|
|
; sum3 = sum2 + 44
|
|
|
|
; sum4 = phi sum1, sum3
|
|
|
|
; } while (inc2 != 100) // Likely true (loop back)
|
|
|
|
; return sum4
|
|
|
|
; ->
|
|
|
|
; do {
|
|
|
|
; inc1 = phi tmp2, 0 // The first operand needed to be updated
|
|
|
|
; li = *i
|
|
|
|
; sum1 = sum0 + 42
|
|
|
|
; if ((li & 5) == 5) { // Likely true
|
|
|
|
; inc2 = inc1 + 1
|
|
|
|
; sum3 = sum0 + 86
|
|
|
|
; } else {
|
|
|
|
; inc2_nc = inc1 + 1
|
|
|
|
; if ((li & 4) == 0)
|
|
|
|
; sum2_nc = ((li & 1) == 0) ? sum0 : sum1
|
|
|
|
; sum3_nc = sum2_nc + 44
|
|
|
|
; }
|
|
|
|
; tmp2 = phi inc2, in2c_nc
|
|
|
|
; sum4 = phi sum3, sum3_nc, sum1
|
|
|
|
; } while (tmp2 != 100)
|
|
|
|
; return sum4
|
|
|
|
define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_18(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: br label [[BB0:%.*]]
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB2:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
|
|
; CHECK-NEXT: [[LI:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[SUM0:%.*]], 42
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[LI]], 5
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 5
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0.split:
|
|
|
|
; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1
|
|
|
|
; CHECK-NEXT: [[SUM3:%.*]] = add i32 [[SUM0]], 86
|
|
|
|
; CHECK-NEXT: br label [[BB2]]
|
|
|
|
; CHECK: bb0.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[A4_NONCHR:%.*]] = and i32 [[LI]], 4
|
|
|
|
; CHECK-NEXT: [[CMP4_NONCHR:%.*]] = icmp eq i32 [[A4_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: [[INC2_NONCHR:%.*]] = add i32 [[INC1]], 1
|
|
|
|
; CHECK-NEXT: br i1 [[CMP4_NONCHR]], label [[BB2]], label [[BB1_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[A1:%.*]] = and i32 [[LI]], 1
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[A1]], 0
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[CMP1_NOT]], i32 [[SUM0]], i32 [[SUM1]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
|
|
|
|
; CHECK-NEXT: br label [[BB2]]
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: [[TMP2]] = phi i32 [ [[INC2]], [[BB0_SPLIT]] ], [ [[INC2_NONCHR]], [[BB1_NONCHR]] ], [ [[INC2_NONCHR]], [[BB0_SPLIT_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: [[SUM4:%.*]] = phi i32 [ [[SUM3]], [[BB0_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM1]], [[BB0_SPLIT_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 100
|
|
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB0]], !prof !16
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: ret i32 [[SUM4]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
br label %bb0
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%inc1 = phi i32 [ %inc2, %bb2 ], [ 0, %entry ]
|
|
|
|
%li = load i32, i32* %i
|
|
|
|
%a1 = and i32 %li, 1
|
|
|
|
%cmp1 = icmp eq i32 %a1, 0
|
|
|
|
%sum1 = add i32 %sum0, 42
|
|
|
|
%sum2 = select i1 %cmp1, i32 %sum0, i32 %sum1, !prof !15
|
|
|
|
%a4 = and i32 %li, 4
|
|
|
|
%cmp4 = icmp eq i32 %a4, 0
|
|
|
|
%inc2 = add i32 %inc1, 1
|
|
|
|
br i1 %cmp4, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum4 = phi i32 [ %sum1, %bb0 ], [ %sum3, %bb1 ]
|
|
|
|
%cmp = icmp eq i32 %inc2, 100
|
|
|
|
br i1 %cmp, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i32 %sum4
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
; Selects + Brs. Those share the condition value, which causes the
|
|
|
|
; targets/operands of the branch/select to be flipped.
|
|
|
|
; Roughly,
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 255) != 0) { // Likely true
|
|
|
|
; sum1 = ((t0 & 1) == 0) ? sum0 : (sum0 + 42) // Likely false
|
|
|
|
; sum2 = ((t0 & 1) == 0) ? sum1 : (sum1 + 42) // Likely false
|
|
|
|
; if ((t0 & 1) != 0) { // Likely true
|
|
|
|
; sum3 = sum2 + 44
|
|
|
|
; sum4 = ((t0 & 8) == 0) ? sum3 : (sum3 + 44) // Likely false
|
|
|
|
; }
|
|
|
|
; sum5 = phi sum2, sum4
|
|
|
|
; }
|
|
|
|
; sum6 = phi sum0, sum5
|
|
|
|
; return sum6
|
|
|
|
; ->
|
|
|
|
; t0 = *i
|
|
|
|
; if ((t0 & 9) == 9) { // Likely true
|
|
|
|
; tmp3 = sum0 + 85 // Dead
|
|
|
|
; tmp4 = sum0 + 173
|
|
|
|
; } else {
|
|
|
|
; if ((t0 & 255) != 0) {
|
|
|
|
; sum2_nc = ((t0 & 1) == 0) ? sum0 : (sum0 + 85)
|
|
|
|
; sum4_nc_v = ((t0 & 8) == 0) ? 44 : 88
|
|
|
|
; sum4_nc = add sum2_nc + sum4_nc_v
|
|
|
|
; }
|
|
|
|
; }
|
|
|
|
; sum6 = phi tmp4, sum0, sum2_nc, sum4_nc
|
|
|
|
; return sum6
|
|
|
|
define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_19(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 9
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: bb0.nonchr:
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM0]], 85
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM0]], i32 [[TMP8]], !prof !16
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP0]], 8
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
|
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP10]], i32 44, i32 88
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
2020-06-16 17:17:21 +08:00
|
|
|
; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ]
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: ret i32 [[SUM6]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 255
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb3, label %bb0, !prof !15
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
%3 = and i32 %0, 1
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
%5 = add i32 %sum0, 42
|
|
|
|
%sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
|
|
|
|
%6 = add i32 %sum1, 43
|
|
|
|
%sum2 = select i1 %4, i32 %sum1, i32 %6, !prof !15
|
|
|
|
br i1 %4, label %bb2, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%sum3 = add i32 %sum2, 44
|
|
|
|
%7 = and i32 %0, 8
|
|
|
|
%8 = icmp eq i32 %7, 0
|
|
|
|
%9 = add i32 %sum3, 44
|
|
|
|
%sum4 = select i1 %8, i32 %sum3, i32 %9, !prof !15
|
|
|
|
br label %bb2
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
%sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
|
|
|
|
ret i32 %sum6
|
|
|
|
}
|
|
|
|
|
|
|
|
; Selects. The exit block, which belongs to the top-level region, has a select
|
|
|
|
; and causes the top-level region to be the outermost CHR scope with the
|
|
|
|
; subscope that includes the entry block with two selects. The outermost CHR
|
|
|
|
; scope doesn't see the selects in the entry block as the entry block is in the
|
|
|
|
; subscope and incorrectly sets the CHR hoist point to the branch rather than
|
|
|
|
; the first select in the entry block and causes the CHR'ed selects ("select i1
|
|
|
|
; false...") to incorrectly position above the CHR branch. This is testing
|
|
|
|
; against a quirk of how the region analysis handles the entry block.
|
|
|
|
; Roughly,
|
|
|
|
; i0 = *i
|
|
|
|
; sum2 = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
|
|
|
|
; sum3 = ((i0 & 4) == 0) ? sum2 : (sum2 + 44) // Likely false
|
|
|
|
; if (j)
|
|
|
|
; foo()
|
|
|
|
; i5 = *i
|
|
|
|
; v13 = (i5 == 44) ? i5 : sum3
|
|
|
|
; return v13
|
|
|
|
; ->
|
|
|
|
; i0 = *i
|
|
|
|
; if ((i0 & 6) != 6) { // Likely true
|
|
|
|
; v9 = sum0 + 87
|
|
|
|
; if (j)
|
|
|
|
; foo()
|
|
|
|
; } else {
|
|
|
|
; sum2.nc = ((i0 & 2) == 0) ? sum0 : (sum0 + 43)
|
|
|
|
; sum3.nc = ((i0 & 4) == 0) ? sum2.nc : (sum2.nc + 44)
|
|
|
|
; if (j)
|
|
|
|
; foo()
|
|
|
|
; }
|
|
|
|
; t2 = phi v9, sum3.nc
|
|
|
|
; i5 = *i
|
|
|
|
; v13 = (i5 == 44) ? 44 : t2
|
|
|
|
; return v13
|
|
|
|
define i32 @test_chr_20(i32* %i, i32 %sum0, i1 %j) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_20(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 6
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 6
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: entry.split:
|
|
|
|
; CHECK-NEXT: [[V9:%.*]] = add i32 [[SUM0:%.*]], 87
|
|
|
|
; CHECK-NEXT: br i1 [[J:%.*]], label [[BB1:%.*]], label [[BB4:%.*]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB4]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0]], 43
|
|
|
|
; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[V4_NOT:%.*]] = icmp eq i32 [[V3]], 0
|
|
|
|
; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NOT]], i32 [[SUM0]], i32 [[V8]], !prof !16
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK-NEXT: [[V6_NONCHR:%.*]] = and i32 [[I0]], 4
|
|
|
|
; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[V6_NONCHR]], 0
|
|
|
|
; CHECK-NEXT: [[V9_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44
|
|
|
|
; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[V9_NONCHR]], !prof !16
|
|
|
|
; CHECK-NEXT: br i1 [[J]], label [[BB1_NONCHR:%.*]], label [[BB4]]
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB4]]
|
|
|
|
; CHECK: bb4:
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[V9]], [[BB1]] ], [ [[V9]], [[ENTRY_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM3_NONCHR]], [[ENTRY_SPLIT_NONCHR]] ]
|
|
|
|
; CHECK-NEXT: [[I5:%.*]] = load i32, i32* [[I]], align 4
|
|
|
|
; CHECK-NEXT: [[V12:%.*]] = icmp eq i32 [[I5]], 44
|
|
|
|
; CHECK-NEXT: [[V13:%.*]] = select i1 [[V12]], i32 44, i32 [[TMP2]], !prof !16
|
|
|
|
; CHECK-NEXT: ret i32 [[V13]]
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%i0 = load i32, i32* %i
|
|
|
|
%v3 = and i32 %i0, 2
|
|
|
|
%v4 = icmp eq i32 %v3, 0
|
|
|
|
%v8 = add i32 %sum0, 43
|
|
|
|
%sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
|
|
|
|
%v6 = and i32 %i0, 4
|
|
|
|
%v5 = icmp eq i32 %v6, 0
|
|
|
|
%v9 = add i32 %sum2, 44
|
|
|
|
%sum3 = select i1 %v5, i32 %sum2, i32 %v9, !prof !15
|
|
|
|
br i1 %j, label %bb1, label %bb4
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb4
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%i5 = load i32, i32* %i
|
|
|
|
%v12 = icmp eq i32 %i5, 44
|
|
|
|
%v13 = select i1 %v12, i32 %i5, i32 %sum3, !prof !15
|
|
|
|
ret i32 %v13
|
|
|
|
}
|
|
|
|
|
2019-05-02 06:49:52 +08:00
|
|
|
; Test the case where two scopes share a common instruction to hoist (%cmp.i).
|
|
|
|
; Two scopes would hoist it to their hoist points, but since the outer scope
|
|
|
|
; hoists (entry/bb6-9) it first to its hoist point, it'd be wrong (causing bad
|
|
|
|
; IR) for the inner scope (bb1-4) to hoist the same instruction to its hoist
|
|
|
|
; point.
|
|
|
|
; Roughly,
|
|
|
|
; if (j != k) {
|
|
|
|
; if (i != 2)
|
|
|
|
; foo();
|
|
|
|
; cmp.i = i == 86
|
|
|
|
; if (!cmp.i)
|
|
|
|
; foo();
|
|
|
|
; if (j != i)
|
|
|
|
; foo();
|
|
|
|
; if (!cmp.i)
|
|
|
|
; foo();
|
|
|
|
; }
|
|
|
|
; return 45;
|
|
|
|
define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
|
|
|
|
; CHECK-LABEL: @test_chr_21(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J:%.*]], [[K:%.*]]
|
|
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[J]], [[I:%.*]]
|
|
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[I]], 86
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP0]], [[CMP3]]
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP_I]]
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[I]], 2
|
|
|
|
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
|
|
|
|
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
|
|
|
|
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
|
|
|
|
; CHECK-NEXT: ], !prof !20
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB7:%.*]]
|
|
|
|
; CHECK: bb2.nonchr1:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3_NONCHR2]]
|
|
|
|
; CHECK: bb3.nonchr2:
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label [[BB4_NONCHR3:%.*]], label [[BB7]], !prof !18
|
|
|
|
; CHECK: bb4.nonchr3:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB7]]
|
|
|
|
; CHECK: bb7:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB10:%.*]]
|
|
|
|
; CHECK: entry.split.nonchr:
|
|
|
|
; CHECK-NEXT: br i1 [[CMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof !18
|
|
|
|
; CHECK: bb1.nonchr:
|
|
|
|
; CHECK-NEXT: [[CMP2_NONCHR:%.*]] = icmp eq i64 [[I]], 2
|
|
|
|
; CHECK-NEXT: br i1 [[CMP2_NONCHR]], label [[BB3_NONCHR:%.*]], label [[BB2_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb3.nonchr:
|
|
|
|
; CHECK-NEXT: [[CMP_I_NONCHR:%.*]] = icmp eq i64 [[I]], 86
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB6_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb6.nonchr:
|
|
|
|
; CHECK-NEXT: [[CMP3_NONCHR:%.*]] = icmp eq i64 [[J]], [[I]]
|
|
|
|
; CHECK-NEXT: br i1 [[CMP3_NONCHR]], label [[BB8_NONCHR:%.*]], label [[BB7_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb8.nonchr:
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB10]], label [[BB9_NONCHR:%.*]], !prof !16
|
|
|
|
; CHECK: bb9.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB10]]
|
|
|
|
; CHECK: bb7.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB8_NONCHR]]
|
|
|
|
; CHECK: bb4.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB6_NONCHR]]
|
|
|
|
; CHECK: bb2.nonchr:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3_NONCHR]]
|
|
|
|
; CHECK: bb10:
|
|
|
|
; CHECK-NEXT: ret i32 45
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%cmp0 = icmp eq i64 %j, %k
|
|
|
|
br i1 %cmp0, label %bb10, label %bb1, !prof !15
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%cmp2 = icmp eq i64 %i, 2
|
|
|
|
br i1 %cmp2, label %bb3, label %bb2, !prof !15
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
%cmp.i = icmp eq i64 %i, 86
|
|
|
|
br i1 %cmp.i, label %bb5, label %bb4, !prof !15
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb5
|
|
|
|
|
|
|
|
bb5:
|
|
|
|
br label %bb6
|
|
|
|
|
|
|
|
bb6:
|
|
|
|
%cmp3 = icmp eq i64 %j, %i
|
|
|
|
br i1 %cmp3, label %bb8, label %bb7, !prof !15
|
|
|
|
|
|
|
|
bb7:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb8
|
|
|
|
|
|
|
|
bb8:
|
|
|
|
br i1 %cmp.i, label %bb10, label %bb9, !prof !15
|
|
|
|
|
|
|
|
bb9:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb10
|
|
|
|
|
|
|
|
bb10:
|
|
|
|
ret i32 45
|
|
|
|
}
|
|
|
|
|
2019-05-23 02:37:34 +08:00
|
|
|
; Test a case with a really long use-def chains. This test checks that it's not
|
|
|
|
; really slow and doesn't appear to be hanging.
|
|
|
|
define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 {
|
2020-05-20 13:02:55 +08:00
|
|
|
; CHECK-LABEL: @test_chr_22(
|
|
|
|
; CHECK-NEXT: bb0:
|
2020-05-22 23:37:58 +08:00
|
|
|
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i64 [[V0:%.*]], 1
|
|
|
|
; CHECK-NEXT: [[V2:%.*]] = add i64 [[REASS_ADD]], 3
|
[SimplifyCFG][LoopRotate] SimplifyCFG: disable common instruction hoisting by default, enable late in pipeline
I've been looking at missed vectorizations in one codebase.
One particular thing that stands out is that some of the loops
reach vectorizer in a rather mangled form, with weird PHI's,
and some of the loops aren't even in a rotated form.
After taking a more detailed look, that happened because
the loop's headers were too big by then. It is evident that
SimplifyCFG's common code hoisting transform is at fault there,
because the pattern it handles is precisely the unrotated
loop basic block structure.
Surprizingly, `SimplifyCFGOpt::HoistThenElseCodeToIf()` is enabled
by default, and is always run, unlike it's friend, common code sinking
transform, `SinkCommonCodeFromPredecessors()`, which is not enabled
by default and is only run once very late in the pipeline.
I'm proposing to harmonize this, and disable common code hoisting
until //late// in pipeline. Definition of //late// may vary,
here currently i've picked the same one as for code sinking,
but i suppose we could enable it as soon as right after
loop rotation happens.
Experimentation shows that this does indeed unsurprizingly help,
more loops got rotated, although other issues remain elsewhere.
Now, this undoubtedly seriously shakes phase ordering.
This will undoubtedly be a mixed bag in terms of both compile- and
run- time performance, codesize. Since we no longer aggressively
hoist+deduplicate common code, we don't pay the price of said hoisting
(which wasn't big). That may allow more loops to be rotated,
so we pay that price. That, in turn, that may enable all the transforms
that require canonical (rotated) loop form, including but not limited to
vectorization, so we pay that too. And in general, no deduplication means
more [duplicate] instructions going through the optimizations. But there's still
late hoisting, some of them will be caught late.
As per benchmarks i've run {F12360204}, this is mostly within the noise,
there are some small improvements, some small regressions.
One big regression i saw i fixed in rG8d487668d09fb0e4e54f36207f07c1480ffabbfd, but i'm sure
this will expose many more pre-existing missed optimizations, as usual :S
llvm-compile-time-tracker.com thoughts on this:
http://llvm-compile-time-tracker.com/compare.php?from=e40315d2b4ed1e38962a8f33ff151693ed4ada63&to=c8289c0ecbf235da9fb0e3bc052e3c0d6bff5cf9&stat=instructions
* this does regress compile-time by +0.5% geomean (unsurprizingly)
* size impact varies; for ThinLTO it's actually an improvement
The largest fallout appears to be in GVN's load partial redundancy
elimination, it spends *much* more time in
`MemoryDependenceResults::getNonLocalPointerDependency()`.
Non-local `MemoryDependenceResults` is widely-known to be, uh, costly.
There does not appear to be a proper solution to this issue,
other than silencing the compile-time performance regression
by tuning cut-off thresholds in `MemoryDependenceResults`,
at the cost of potentially regressing run-time performance.
D84609 attempts to move in that direction, but the path is unclear
and is going to take some time.
If we look at stats before/after diffs, some excerpts:
* RawSpeed (the target) {F12360200}
* -14 (-73.68%) loops not rotated due to the header size (yay)
* -272 (-0.67%) `"Number of live out of a loop variables"` - good for vectorizer
* -3937 (-64.19%) common instructions hoisted
* +561 (+0.06%) x86 asm instructions
* -2 basic blocks
* +2418 (+0.11%) IR instructions
* vanilla test-suite + RawSpeed + darktable {F12360201}
* -36396 (-65.29%) common instructions hoisted
* +1676 (+0.02%) x86 asm instructions
* +662 (+0.06%) basic blocks
* +4395 (+0.04%) IR instructions
It is likely to be sub-optimal for when optimizing for code size,
so one might want to change tune pipeline by enabling sinking/hoisting
when optimizing for size.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D84108
2020-07-30 00:54:33 +08:00
|
|
|
; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100
|
|
|
|
; CHECK-NEXT: br i1 [[C1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
|
|
|
|
; CHECK: bb0.split:
|
2020-05-20 13:02:55 +08:00
|
|
|
; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991
|
|
|
|
; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4
|
|
|
|
; CHECK-NEXT: ret i64 99
|
[SimplifyCFG][LoopRotate] SimplifyCFG: disable common instruction hoisting by default, enable late in pipeline
I've been looking at missed vectorizations in one codebase.
One particular thing that stands out is that some of the loops
reach vectorizer in a rather mangled form, with weird PHI's,
and some of the loops aren't even in a rotated form.
After taking a more detailed look, that happened because
the loop's headers were too big by then. It is evident that
SimplifyCFG's common code hoisting transform is at fault there,
because the pattern it handles is precisely the unrotated
loop basic block structure.
Surprizingly, `SimplifyCFGOpt::HoistThenElseCodeToIf()` is enabled
by default, and is always run, unlike it's friend, common code sinking
transform, `SinkCommonCodeFromPredecessors()`, which is not enabled
by default and is only run once very late in the pipeline.
I'm proposing to harmonize this, and disable common code hoisting
until //late// in pipeline. Definition of //late// may vary,
here currently i've picked the same one as for code sinking,
but i suppose we could enable it as soon as right after
loop rotation happens.
Experimentation shows that this does indeed unsurprizingly help,
more loops got rotated, although other issues remain elsewhere.
Now, this undoubtedly seriously shakes phase ordering.
This will undoubtedly be a mixed bag in terms of both compile- and
run- time performance, codesize. Since we no longer aggressively
hoist+deduplicate common code, we don't pay the price of said hoisting
(which wasn't big). That may allow more loops to be rotated,
so we pay that price. That, in turn, that may enable all the transforms
that require canonical (rotated) loop form, including but not limited to
vectorization, so we pay that too. And in general, no deduplication means
more [duplicate] instructions going through the optimizations. But there's still
late hoisting, some of them will be caught late.
As per benchmarks i've run {F12360204}, this is mostly within the noise,
there are some small improvements, some small regressions.
One big regression i saw i fixed in rG8d487668d09fb0e4e54f36207f07c1480ffabbfd, but i'm sure
this will expose many more pre-existing missed optimizations, as usual :S
llvm-compile-time-tracker.com thoughts on this:
http://llvm-compile-time-tracker.com/compare.php?from=e40315d2b4ed1e38962a8f33ff151693ed4ada63&to=c8289c0ecbf235da9fb0e3bc052e3c0d6bff5cf9&stat=instructions
* this does regress compile-time by +0.5% geomean (unsurprizingly)
* size impact varies; for ThinLTO it's actually an improvement
The largest fallout appears to be in GVN's load partial redundancy
elimination, it spends *much* more time in
`MemoryDependenceResults::getNonLocalPointerDependency()`.
Non-local `MemoryDependenceResults` is widely-known to be, uh, costly.
There does not appear to be a proper solution to this issue,
other than silencing the compile-time performance regression
by tuning cut-off thresholds in `MemoryDependenceResults`,
at the cost of potentially regressing run-time performance.
D84609 attempts to move in that direction, but the path is unclear
and is going to take some time.
If we look at stats before/after diffs, some excerpts:
* RawSpeed (the target) {F12360200}
* -14 (-73.68%) loops not rotated due to the header size (yay)
* -272 (-0.67%) `"Number of live out of a loop variables"` - good for vectorizer
* -3937 (-64.19%) common instructions hoisted
* +561 (+0.06%) x86 asm instructions
* -2 basic blocks
* +2418 (+0.11%) IR instructions
* vanilla test-suite + RawSpeed + darktable {F12360201}
* -36396 (-65.29%) common instructions hoisted
* +1676 (+0.02%) x86 asm instructions
* +662 (+0.06%) basic blocks
* +4395 (+0.04%) IR instructions
It is likely to be sub-optimal for when optimizing for code size,
so one might want to change tune pipeline by enabling sinking/hoisting
when optimizing for size.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D84108
2020-07-30 00:54:33 +08:00
|
|
|
; CHECK: bb0.split.nonchr:
|
|
|
|
; CHECK-NEXT: [[V299_NONCHR:%.*]] = mul i64 [[V2]], 7860086430977039991
|
|
|
|
; CHECK-NEXT: store i64 [[V299_NONCHR]], i64* [[J]], align 4
|
|
|
|
; CHECK-NEXT: ret i64 99
|
2020-05-20 13:02:55 +08:00
|
|
|
;
|
2019-05-23 02:37:34 +08:00
|
|
|
bb0:
|
|
|
|
%v1 = add i64 %v0, 3
|
|
|
|
%v2 = add i64 %v1, %v0
|
|
|
|
%c1 = icmp sgt i64 %v2, 99
|
|
|
|
%v3 = select i1 %c1, i64 %v1, i64 %v2, !prof !15
|
|
|
|
%v4 = add i64 %v2, %v2
|
|
|
|
%v5 = add i64 %v4, %v2
|
|
|
|
%v6 = add i64 %v5, %v4
|
|
|
|
%v7 = add i64 %v6, %v5
|
|
|
|
%v8 = add i64 %v7, %v6
|
|
|
|
%v9 = add i64 %v8, %v7
|
|
|
|
%v10 = add i64 %v9, %v8
|
|
|
|
%v11 = add i64 %v10, %v9
|
|
|
|
%v12 = add i64 %v11, %v10
|
|
|
|
%v13 = add i64 %v12, %v11
|
|
|
|
%v14 = add i64 %v13, %v12
|
|
|
|
%v15 = add i64 %v14, %v13
|
|
|
|
%v16 = add i64 %v15, %v14
|
|
|
|
%v17 = add i64 %v16, %v15
|
|
|
|
%v18 = add i64 %v17, %v16
|
|
|
|
%v19 = add i64 %v18, %v17
|
|
|
|
%v20 = add i64 %v19, %v18
|
|
|
|
%v21 = add i64 %v20, %v19
|
|
|
|
%v22 = add i64 %v21, %v20
|
|
|
|
%v23 = add i64 %v22, %v21
|
|
|
|
%v24 = add i64 %v23, %v22
|
|
|
|
%v25 = add i64 %v24, %v23
|
|
|
|
%v26 = add i64 %v25, %v24
|
|
|
|
%v27 = add i64 %v26, %v25
|
|
|
|
%v28 = add i64 %v27, %v26
|
|
|
|
%v29 = add i64 %v28, %v27
|
|
|
|
%v30 = add i64 %v29, %v28
|
|
|
|
%v31 = add i64 %v30, %v29
|
|
|
|
%v32 = add i64 %v31, %v30
|
|
|
|
%v33 = add i64 %v32, %v31
|
|
|
|
%v34 = add i64 %v33, %v32
|
|
|
|
%v35 = add i64 %v34, %v33
|
|
|
|
%v36 = add i64 %v35, %v34
|
|
|
|
%v37 = add i64 %v36, %v35
|
|
|
|
%v38 = add i64 %v37, %v36
|
|
|
|
%v39 = add i64 %v38, %v37
|
|
|
|
%v40 = add i64 %v39, %v38
|
|
|
|
%v41 = add i64 %v40, %v39
|
|
|
|
%v42 = add i64 %v41, %v40
|
|
|
|
%v43 = add i64 %v42, %v41
|
|
|
|
%v44 = add i64 %v43, %v42
|
|
|
|
%v45 = add i64 %v44, %v43
|
|
|
|
%v46 = add i64 %v45, %v44
|
|
|
|
%v47 = add i64 %v46, %v45
|
|
|
|
%v48 = add i64 %v47, %v46
|
|
|
|
%v49 = add i64 %v48, %v47
|
|
|
|
%v50 = add i64 %v49, %v48
|
|
|
|
%v51 = add i64 %v50, %v49
|
|
|
|
%v52 = add i64 %v51, %v50
|
|
|
|
%v53 = add i64 %v52, %v51
|
|
|
|
%v54 = add i64 %v53, %v52
|
|
|
|
%v55 = add i64 %v54, %v53
|
|
|
|
%v56 = add i64 %v55, %v54
|
|
|
|
%v57 = add i64 %v56, %v55
|
|
|
|
%v58 = add i64 %v57, %v56
|
|
|
|
%v59 = add i64 %v58, %v57
|
|
|
|
%v60 = add i64 %v59, %v58
|
|
|
|
%v61 = add i64 %v60, %v59
|
|
|
|
%v62 = add i64 %v61, %v60
|
|
|
|
%v63 = add i64 %v62, %v61
|
|
|
|
%v64 = add i64 %v63, %v62
|
|
|
|
%v65 = add i64 %v64, %v63
|
|
|
|
%v66 = add i64 %v65, %v64
|
|
|
|
%v67 = add i64 %v66, %v65
|
|
|
|
%v68 = add i64 %v67, %v66
|
|
|
|
%v69 = add i64 %v68, %v67
|
|
|
|
%v70 = add i64 %v69, %v68
|
|
|
|
%v71 = add i64 %v70, %v69
|
|
|
|
%v72 = add i64 %v71, %v70
|
|
|
|
%v73 = add i64 %v72, %v71
|
|
|
|
%v74 = add i64 %v73, %v72
|
|
|
|
%v75 = add i64 %v74, %v73
|
|
|
|
%v76 = add i64 %v75, %v74
|
|
|
|
%v77 = add i64 %v76, %v75
|
|
|
|
%v78 = add i64 %v77, %v76
|
|
|
|
%v79 = add i64 %v78, %v77
|
|
|
|
%v80 = add i64 %v79, %v78
|
|
|
|
%v81 = add i64 %v80, %v79
|
|
|
|
%v82 = add i64 %v81, %v80
|
|
|
|
%v83 = add i64 %v82, %v81
|
|
|
|
%v84 = add i64 %v83, %v82
|
|
|
|
%v85 = add i64 %v84, %v83
|
|
|
|
%v86 = add i64 %v85, %v84
|
|
|
|
%v87 = add i64 %v86, %v85
|
|
|
|
%v88 = add i64 %v87, %v86
|
|
|
|
%v89 = add i64 %v88, %v87
|
|
|
|
%v90 = add i64 %v89, %v88
|
|
|
|
%v91 = add i64 %v90, %v89
|
|
|
|
%v92 = add i64 %v91, %v90
|
|
|
|
%v93 = add i64 %v92, %v91
|
|
|
|
%v94 = add i64 %v93, %v92
|
|
|
|
%v95 = add i64 %v94, %v93
|
|
|
|
%v96 = add i64 %v95, %v94
|
|
|
|
%v97 = add i64 %v96, %v95
|
|
|
|
%v98 = add i64 %v97, %v96
|
|
|
|
%v99 = add i64 %v98, %v97
|
|
|
|
%v100 = add i64 %v99, %v98
|
|
|
|
%v101 = add i64 %v100, %v99
|
|
|
|
%v102 = add i64 %v101, %v100
|
|
|
|
%v103 = add i64 %v102, %v101
|
|
|
|
%v104 = add i64 %v103, %v102
|
|
|
|
%v105 = add i64 %v104, %v103
|
|
|
|
%v106 = add i64 %v105, %v104
|
|
|
|
%v107 = add i64 %v106, %v105
|
|
|
|
%v108 = add i64 %v107, %v106
|
|
|
|
%v109 = add i64 %v108, %v107
|
|
|
|
%v110 = add i64 %v109, %v108
|
|
|
|
%v111 = add i64 %v110, %v109
|
|
|
|
%v112 = add i64 %v111, %v110
|
|
|
|
%v113 = add i64 %v112, %v111
|
|
|
|
%v114 = add i64 %v113, %v112
|
|
|
|
%v115 = add i64 %v114, %v113
|
|
|
|
%v116 = add i64 %v115, %v114
|
|
|
|
%v117 = add i64 %v116, %v115
|
|
|
|
%v118 = add i64 %v117, %v116
|
|
|
|
%v119 = add i64 %v118, %v117
|
|
|
|
%v120 = add i64 %v119, %v118
|
|
|
|
%v121 = add i64 %v120, %v119
|
|
|
|
%v122 = add i64 %v121, %v120
|
|
|
|
%v123 = add i64 %v122, %v121
|
|
|
|
%v124 = add i64 %v123, %v122
|
|
|
|
%v125 = add i64 %v124, %v123
|
|
|
|
%v126 = add i64 %v125, %v124
|
|
|
|
%v127 = add i64 %v126, %v125
|
|
|
|
%v128 = add i64 %v127, %v126
|
|
|
|
%v129 = add i64 %v128, %v127
|
|
|
|
%v130 = add i64 %v129, %v128
|
|
|
|
%v131 = add i64 %v130, %v129
|
|
|
|
%v132 = add i64 %v131, %v130
|
|
|
|
%v133 = add i64 %v132, %v131
|
|
|
|
%v134 = add i64 %v133, %v132
|
|
|
|
%v135 = add i64 %v134, %v133
|
|
|
|
%v136 = add i64 %v135, %v134
|
|
|
|
%v137 = add i64 %v136, %v135
|
|
|
|
%v138 = add i64 %v137, %v136
|
|
|
|
%v139 = add i64 %v138, %v137
|
|
|
|
%v140 = add i64 %v139, %v138
|
|
|
|
%v141 = add i64 %v140, %v139
|
|
|
|
%v142 = add i64 %v141, %v140
|
|
|
|
%v143 = add i64 %v142, %v141
|
|
|
|
%v144 = add i64 %v143, %v142
|
|
|
|
%v145 = add i64 %v144, %v143
|
|
|
|
%v146 = add i64 %v145, %v144
|
|
|
|
%v147 = add i64 %v146, %v145
|
|
|
|
%v148 = add i64 %v147, %v146
|
|
|
|
%v149 = add i64 %v148, %v147
|
|
|
|
%v150 = add i64 %v149, %v148
|
|
|
|
%v151 = add i64 %v150, %v149
|
|
|
|
%v152 = add i64 %v151, %v150
|
|
|
|
%v153 = add i64 %v152, %v151
|
|
|
|
%v154 = add i64 %v153, %v152
|
|
|
|
%v155 = add i64 %v154, %v153
|
|
|
|
%v156 = add i64 %v155, %v154
|
|
|
|
%v157 = add i64 %v156, %v155
|
|
|
|
%v158 = add i64 %v157, %v156
|
|
|
|
%v159 = add i64 %v158, %v157
|
|
|
|
%v160 = add i64 %v159, %v158
|
|
|
|
%v161 = add i64 %v160, %v159
|
|
|
|
%v162 = add i64 %v161, %v160
|
|
|
|
%v163 = add i64 %v162, %v161
|
|
|
|
%v164 = add i64 %v163, %v162
|
|
|
|
%v165 = add i64 %v164, %v163
|
|
|
|
%v166 = add i64 %v165, %v164
|
|
|
|
%v167 = add i64 %v166, %v165
|
|
|
|
%v168 = add i64 %v167, %v166
|
|
|
|
%v169 = add i64 %v168, %v167
|
|
|
|
%v170 = add i64 %v169, %v168
|
|
|
|
%v171 = add i64 %v170, %v169
|
|
|
|
%v172 = add i64 %v171, %v170
|
|
|
|
%v173 = add i64 %v172, %v171
|
|
|
|
%v174 = add i64 %v173, %v172
|
|
|
|
%v175 = add i64 %v174, %v173
|
|
|
|
%v176 = add i64 %v175, %v174
|
|
|
|
%v177 = add i64 %v176, %v175
|
|
|
|
%v178 = add i64 %v177, %v176
|
|
|
|
%v179 = add i64 %v178, %v177
|
|
|
|
%v180 = add i64 %v179, %v178
|
|
|
|
%v181 = add i64 %v180, %v179
|
|
|
|
%v182 = add i64 %v181, %v180
|
|
|
|
%v183 = add i64 %v182, %v181
|
|
|
|
%v184 = add i64 %v183, %v182
|
|
|
|
%v185 = add i64 %v184, %v183
|
|
|
|
%v186 = add i64 %v185, %v184
|
|
|
|
%v187 = add i64 %v186, %v185
|
|
|
|
%v188 = add i64 %v187, %v186
|
|
|
|
%v189 = add i64 %v188, %v187
|
|
|
|
%v190 = add i64 %v189, %v188
|
|
|
|
%v191 = add i64 %v190, %v189
|
|
|
|
%v192 = add i64 %v191, %v190
|
|
|
|
%v193 = add i64 %v192, %v191
|
|
|
|
%v194 = add i64 %v193, %v192
|
|
|
|
%v195 = add i64 %v194, %v193
|
|
|
|
%v196 = add i64 %v195, %v194
|
|
|
|
%v197 = add i64 %v196, %v195
|
|
|
|
%v198 = add i64 %v197, %v196
|
|
|
|
%v199 = add i64 %v198, %v197
|
|
|
|
%v200 = add i64 %v199, %v198
|
|
|
|
%v201 = add i64 %v200, %v199
|
|
|
|
%v202 = add i64 %v201, %v200
|
|
|
|
%v203 = add i64 %v202, %v201
|
|
|
|
%v204 = add i64 %v203, %v202
|
|
|
|
%v205 = add i64 %v204, %v203
|
|
|
|
%v206 = add i64 %v205, %v204
|
|
|
|
%v207 = add i64 %v206, %v205
|
|
|
|
%v208 = add i64 %v207, %v206
|
|
|
|
%v209 = add i64 %v208, %v207
|
|
|
|
%v210 = add i64 %v209, %v208
|
|
|
|
%v211 = add i64 %v210, %v209
|
|
|
|
%v212 = add i64 %v211, %v210
|
|
|
|
%v213 = add i64 %v212, %v211
|
|
|
|
%v214 = add i64 %v213, %v212
|
|
|
|
%v215 = add i64 %v214, %v213
|
|
|
|
%v216 = add i64 %v215, %v214
|
|
|
|
%v217 = add i64 %v216, %v215
|
|
|
|
%v218 = add i64 %v217, %v216
|
|
|
|
%v219 = add i64 %v218, %v217
|
|
|
|
%v220 = add i64 %v219, %v218
|
|
|
|
%v221 = add i64 %v220, %v219
|
|
|
|
%v222 = add i64 %v221, %v220
|
|
|
|
%v223 = add i64 %v222, %v221
|
|
|
|
%v224 = add i64 %v223, %v222
|
|
|
|
%v225 = add i64 %v224, %v223
|
|
|
|
%v226 = add i64 %v225, %v224
|
|
|
|
%v227 = add i64 %v226, %v225
|
|
|
|
%v228 = add i64 %v227, %v226
|
|
|
|
%v229 = add i64 %v228, %v227
|
|
|
|
%v230 = add i64 %v229, %v228
|
|
|
|
%v231 = add i64 %v230, %v229
|
|
|
|
%v232 = add i64 %v231, %v230
|
|
|
|
%v233 = add i64 %v232, %v231
|
|
|
|
%v234 = add i64 %v233, %v232
|
|
|
|
%v235 = add i64 %v234, %v233
|
|
|
|
%v236 = add i64 %v235, %v234
|
|
|
|
%v237 = add i64 %v236, %v235
|
|
|
|
%v238 = add i64 %v237, %v236
|
|
|
|
%v239 = add i64 %v238, %v237
|
|
|
|
%v240 = add i64 %v239, %v238
|
|
|
|
%v241 = add i64 %v240, %v239
|
|
|
|
%v242 = add i64 %v241, %v240
|
|
|
|
%v243 = add i64 %v242, %v241
|
|
|
|
%v244 = add i64 %v243, %v242
|
|
|
|
%v245 = add i64 %v244, %v243
|
|
|
|
%v246 = add i64 %v245, %v244
|
|
|
|
%v247 = add i64 %v246, %v245
|
|
|
|
%v248 = add i64 %v247, %v246
|
|
|
|
%v249 = add i64 %v248, %v247
|
|
|
|
%v250 = add i64 %v249, %v248
|
|
|
|
%v251 = add i64 %v250, %v249
|
|
|
|
%v252 = add i64 %v251, %v250
|
|
|
|
%v253 = add i64 %v252, %v251
|
|
|
|
%v254 = add i64 %v253, %v252
|
|
|
|
%v255 = add i64 %v254, %v253
|
|
|
|
%v256 = add i64 %v255, %v254
|
|
|
|
%v257 = add i64 %v256, %v255
|
|
|
|
%v258 = add i64 %v257, %v256
|
|
|
|
%v259 = add i64 %v258, %v257
|
|
|
|
%v260 = add i64 %v259, %v258
|
|
|
|
%v261 = add i64 %v260, %v259
|
|
|
|
%v262 = add i64 %v261, %v260
|
|
|
|
%v263 = add i64 %v262, %v261
|
|
|
|
%v264 = add i64 %v263, %v262
|
|
|
|
%v265 = add i64 %v264, %v263
|
|
|
|
%v266 = add i64 %v265, %v264
|
|
|
|
%v267 = add i64 %v266, %v265
|
|
|
|
%v268 = add i64 %v267, %v266
|
|
|
|
%v269 = add i64 %v268, %v267
|
|
|
|
%v270 = add i64 %v269, %v268
|
|
|
|
%v271 = add i64 %v270, %v269
|
|
|
|
%v272 = add i64 %v271, %v270
|
|
|
|
%v273 = add i64 %v272, %v271
|
|
|
|
%v274 = add i64 %v273, %v272
|
|
|
|
%v275 = add i64 %v274, %v273
|
|
|
|
%v276 = add i64 %v275, %v274
|
|
|
|
%v277 = add i64 %v276, %v275
|
|
|
|
%v278 = add i64 %v277, %v276
|
|
|
|
%v279 = add i64 %v278, %v277
|
|
|
|
%v280 = add i64 %v279, %v278
|
|
|
|
%v281 = add i64 %v280, %v279
|
|
|
|
%v282 = add i64 %v281, %v280
|
|
|
|
%v283 = add i64 %v282, %v281
|
|
|
|
%v284 = add i64 %v283, %v282
|
|
|
|
%v285 = add i64 %v284, %v283
|
|
|
|
%v286 = add i64 %v285, %v284
|
|
|
|
%v287 = add i64 %v286, %v285
|
|
|
|
%v288 = add i64 %v287, %v286
|
|
|
|
%v289 = add i64 %v288, %v287
|
|
|
|
%v290 = add i64 %v289, %v288
|
|
|
|
%v291 = add i64 %v290, %v289
|
|
|
|
%v292 = add i64 %v291, %v290
|
|
|
|
%v293 = add i64 %v292, %v291
|
|
|
|
%v294 = add i64 %v293, %v292
|
|
|
|
%v295 = add i64 %v294, %v293
|
|
|
|
%v296 = add i64 %v295, %v294
|
|
|
|
%v297 = add i64 %v296, %v295
|
|
|
|
%v298 = add i64 %v297, %v296
|
|
|
|
%v299 = add i64 %v298, %v297
|
|
|
|
%v300 = add i64 %v299, %v298
|
|
|
|
%v301 = icmp eq i64 %v300, 100
|
|
|
|
%v302 = select i1 %v301, i64 %v298, i64 %v299, !prof !15
|
|
|
|
store i64 %v302, i64* %j
|
|
|
|
ret i64 99
|
|
|
|
}
|
|
|
|
|
2019-09-06 00:56:55 +08:00
|
|
|
; Test a case with a really long use-def chains. This test checks that it's not
|
|
|
|
; really slow and doesn't appear to be hanging. This is different from
|
|
|
|
; test_chr_22 in that it has nested control structures (multiple scopes) and
|
|
|
|
; covers additional code.
|
|
|
|
define i64 @test_chr_23(i64 %v0) !prof !14 {
|
2020-05-20 13:02:55 +08:00
|
|
|
; CHECK-LABEL: @test_chr_23(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[V0:%.*]], 50
|
[InstCombine] Always try to invert non-canonical predicate of an icmp
Summary:
The actual transform i was going after was:
https://rise4fun.com/Alive/Tp9H
```
Name: zz
Pre: isPowerOf2(C0) && isPowerOf2(C1) && C1 == C0
%t0 = and i8 %x, C0
%r = icmp eq i8 %t0, C1
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
Name: zz
Pre: isPowerOf2(C0)
%t0 = and i8 %x, C0
%r = icmp ne i8 %t0, 0
=>
%t = icmp eq i8 %t0, 0
%r = xor i1 %t, -1
```
but as it can be seen from the current tests, we already canonicalize most of it,
and we are only missing handling multi-use non-canonical icmp predicates.
If we have both `!=0` and `==0`, even though we can CSE them,
we end up being stuck with them. We should canonicalize to the `==0`.
I believe this is one of the cleanup steps i'll need after `-scalarizer`
if i end up proceeding with my WIP alloca promotion helper pass.
Reviewers: spatel, jdoerfert, nikic
Reviewed By: nikic
Subscribers: zzheng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83139
2020-07-04 22:39:48 +08:00
|
|
|
; CHECK-NEXT: [[V10_NOT:%.*]] = icmp eq i64 [[TMP0]], -50
|
2020-05-20 13:02:55 +08:00
|
|
|
; CHECK-NEXT: ret i64 99
|
|
|
|
;
|
2019-09-06 00:56:55 +08:00
|
|
|
entry:
|
|
|
|
%v1 = add i64 %v0, 3
|
|
|
|
%v2 = add i64 %v1, %v1
|
|
|
|
%v3 = add i64 %v2, %v1
|
|
|
|
%v4 = add i64 %v2, %v3
|
|
|
|
%v5 = add i64 %v4, %v2
|
|
|
|
%v6 = add i64 %v5, %v4
|
|
|
|
%v7 = add i64 %v6, %v5
|
|
|
|
%v8 = add i64 %v7, %v6
|
|
|
|
%v9 = add i64 %v8, %v7
|
|
|
|
%v10 = icmp eq i64 %v9, 100
|
|
|
|
br i1 %v10, label %body, label %end, !prof !15
|
|
|
|
|
|
|
|
body:
|
|
|
|
%v1_0 = add i64 %v9, 3
|
|
|
|
%v2_0 = add i64 %v1_0, %v1_0
|
|
|
|
%v3_0 = add i64 %v2_0, %v1_0
|
|
|
|
%v4_0 = add i64 %v2_0, %v3_0
|
|
|
|
%v5_0 = add i64 %v4_0, %v2_0
|
|
|
|
%v6_0 = add i64 %v5_0, %v4_0
|
|
|
|
%v7_0 = add i64 %v6_0, %v5_0
|
|
|
|
%v8_0 = add i64 %v7_0, %v6_0
|
|
|
|
%v9_0 = add i64 %v8_0, %v7_0
|
|
|
|
%v10_0 = icmp eq i64 %v9_0, 100
|
|
|
|
br i1 %v10_0, label %body.1, label %end, !prof !15
|
|
|
|
|
|
|
|
body.1:
|
|
|
|
%v1_1 = add i64 %v9_0, 3
|
|
|
|
%v2_1 = add i64 %v1_1, %v1_1
|
|
|
|
%v3_1 = add i64 %v2_1, %v1_1
|
|
|
|
%v4_1 = add i64 %v2_1, %v3_1
|
|
|
|
%v5_1 = add i64 %v4_1, %v2_1
|
|
|
|
%v6_1 = add i64 %v5_1, %v4_1
|
|
|
|
%v7_1 = add i64 %v6_1, %v5_1
|
|
|
|
%v8_1 = add i64 %v7_1, %v6_1
|
|
|
|
%v9_1 = add i64 %v8_1, %v7_1
|
|
|
|
%v10_1 = icmp eq i64 %v9_1, 100
|
|
|
|
br i1 %v10_1, label %body.2, label %end, !prof !15
|
|
|
|
|
|
|
|
body.2:
|
|
|
|
%v1_2 = add i64 %v9_1, 3
|
|
|
|
%v2_2 = add i64 %v1_2, %v1_2
|
|
|
|
%v3_2 = add i64 %v2_2, %v1_2
|
|
|
|
%v4_2 = add i64 %v2_2, %v3_2
|
|
|
|
%v5_2 = add i64 %v4_2, %v2_2
|
|
|
|
%v6_2 = add i64 %v5_2, %v4_2
|
|
|
|
%v7_2 = add i64 %v6_2, %v5_2
|
|
|
|
%v8_2 = add i64 %v7_2, %v6_2
|
|
|
|
%v9_2 = add i64 %v8_2, %v7_2
|
|
|
|
%v10_2 = icmp eq i64 %v9_2, 100
|
|
|
|
br i1 %v10_2, label %body.3, label %end, !prof !15
|
|
|
|
|
|
|
|
body.3:
|
|
|
|
%v1_3 = add i64 %v9_2, 3
|
|
|
|
%v2_3 = add i64 %v1_3, %v1_3
|
|
|
|
%v3_3 = add i64 %v2_3, %v1_3
|
|
|
|
%v4_3 = add i64 %v2_3, %v3_3
|
|
|
|
%v5_3 = add i64 %v4_3, %v2_3
|
|
|
|
%v6_3 = add i64 %v5_3, %v4_3
|
|
|
|
%v7_3 = add i64 %v6_3, %v5_3
|
|
|
|
%v8_3 = add i64 %v7_3, %v6_3
|
|
|
|
%v9_3 = add i64 %v8_3, %v7_3
|
|
|
|
%v10_3 = icmp eq i64 %v9_3, 100
|
|
|
|
br i1 %v10_3, label %body.4, label %end, !prof !15
|
|
|
|
|
|
|
|
body.4:
|
|
|
|
%v1_4 = add i64 %v9_3, 3
|
|
|
|
%v2_4 = add i64 %v1_4, %v1_4
|
|
|
|
%v3_4 = add i64 %v2_4, %v1_4
|
|
|
|
%v4_4 = add i64 %v2_4, %v3_4
|
|
|
|
%v5_4 = add i64 %v4_4, %v2_4
|
|
|
|
%v6_4 = add i64 %v5_4, %v4_4
|
|
|
|
%v7_4 = add i64 %v6_4, %v5_4
|
|
|
|
%v8_4 = add i64 %v7_4, %v6_4
|
|
|
|
%v9_4 = add i64 %v8_4, %v7_4
|
|
|
|
%v10_4 = icmp eq i64 %v9_4, 100
|
|
|
|
br i1 %v10_4, label %body.5, label %end, !prof !15
|
|
|
|
|
|
|
|
body.5:
|
|
|
|
%v1_5 = add i64 %v9_4, 3
|
|
|
|
%v2_5 = add i64 %v1_5, %v1_5
|
|
|
|
%v3_5 = add i64 %v2_5, %v1_5
|
|
|
|
%v4_5 = add i64 %v2_5, %v3_5
|
|
|
|
%v5_5 = add i64 %v4_5, %v2_5
|
|
|
|
%v6_5 = add i64 %v5_5, %v4_5
|
|
|
|
%v7_5 = add i64 %v6_5, %v5_5
|
|
|
|
%v8_5 = add i64 %v7_5, %v6_5
|
|
|
|
%v9_5 = add i64 %v8_5, %v7_5
|
|
|
|
%v10_5 = icmp eq i64 %v9_5, 100
|
|
|
|
br i1 %v10_5, label %body.6, label %end, !prof !15
|
|
|
|
|
|
|
|
body.6:
|
|
|
|
%v1_6 = add i64 %v9_5, 3
|
|
|
|
%v2_6 = add i64 %v1_6, %v1_6
|
|
|
|
%v3_6 = add i64 %v2_6, %v1_6
|
|
|
|
%v4_6 = add i64 %v2_6, %v3_6
|
|
|
|
%v5_6 = add i64 %v4_6, %v2_6
|
|
|
|
%v6_6 = add i64 %v5_6, %v4_6
|
|
|
|
%v7_6 = add i64 %v6_6, %v5_6
|
|
|
|
%v8_6 = add i64 %v7_6, %v6_6
|
|
|
|
%v9_6 = add i64 %v8_6, %v7_6
|
|
|
|
%v10_6 = icmp eq i64 %v9_6, 100
|
|
|
|
br i1 %v10_6, label %body.7, label %end, !prof !15
|
|
|
|
|
|
|
|
body.7:
|
|
|
|
%v1_7 = add i64 %v9_6, 3
|
|
|
|
%v2_7 = add i64 %v1_7, %v1_7
|
|
|
|
%v3_7 = add i64 %v2_7, %v1_7
|
|
|
|
%v4_7 = add i64 %v2_7, %v3_7
|
|
|
|
%v5_7 = add i64 %v4_7, %v2_7
|
|
|
|
%v6_7 = add i64 %v5_7, %v4_7
|
|
|
|
%v7_7 = add i64 %v6_7, %v5_7
|
|
|
|
%v8_7 = add i64 %v7_7, %v6_7
|
|
|
|
%v9_7 = add i64 %v8_7, %v7_7
|
|
|
|
%v10_7 = icmp eq i64 %v9_7, 100
|
|
|
|
br i1 %v10_7, label %body.8, label %end, !prof !15
|
|
|
|
|
|
|
|
body.8:
|
|
|
|
%v1_8 = add i64 %v9_7, 3
|
|
|
|
%v2_8 = add i64 %v1_8, %v1_8
|
|
|
|
%v3_8 = add i64 %v2_8, %v1_8
|
|
|
|
%v4_8 = add i64 %v2_8, %v3_8
|
|
|
|
%v5_8 = add i64 %v4_8, %v2_8
|
|
|
|
%v6_8 = add i64 %v5_8, %v4_8
|
|
|
|
%v7_8 = add i64 %v6_8, %v5_8
|
|
|
|
%v8_8 = add i64 %v7_8, %v6_8
|
|
|
|
%v9_8 = add i64 %v8_8, %v7_8
|
|
|
|
%v10_8 = icmp eq i64 %v9_8, 100
|
|
|
|
br i1 %v10_8, label %body.9, label %end, !prof !15
|
|
|
|
|
|
|
|
body.9:
|
|
|
|
%v1_9 = add i64 %v9_8, 3
|
|
|
|
%v2_9 = add i64 %v1_9, %v1_9
|
|
|
|
%v3_9 = add i64 %v2_9, %v1_9
|
|
|
|
%v4_9 = add i64 %v2_9, %v3_9
|
|
|
|
%v5_9 = add i64 %v4_9, %v2_9
|
|
|
|
%v6_9 = add i64 %v5_9, %v4_9
|
|
|
|
%v7_9 = add i64 %v6_9, %v5_9
|
|
|
|
%v8_9 = add i64 %v7_9, %v6_9
|
|
|
|
%v9_9 = add i64 %v8_9, %v7_9
|
|
|
|
br label %end
|
|
|
|
|
|
|
|
end:
|
|
|
|
ret i64 99
|
|
|
|
}
|
|
|
|
|
2020-01-14 06:19:45 +08:00
|
|
|
; Test to not crash upon a 0:0 branch_weight metadata.
|
|
|
|
define void @test_chr_24(i32* %i) !prof !14 {
|
2020-05-20 13:02:55 +08:00
|
|
|
; CHECK-LABEL: @test_chr_24(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
|
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB1]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
|
|
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
|
|
|
|
; CHECK: bb2:
|
|
|
|
; CHECK-NEXT: call void @foo()
|
|
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
|
|
; CHECK: bb3:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
2020-01-14 06:19:45 +08:00
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* %i
|
|
|
|
%1 = and i32 %0, 1
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %bb1, label %bb0, !prof !17
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb1
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%3 = and i32 %0, 2
|
|
|
|
%4 = icmp eq i32 %3, 0
|
|
|
|
br i1 %4, label %bb3, label %bb2, !prof !17
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
call void @foo()
|
|
|
|
br label %bb3
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-04-17 12:52:47 +08:00
|
|
|
!llvm.module.flags = !{!0}
|
|
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
|
|
!4 = !{!"MaxCount", i64 10}
|
|
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
|
|
!7 = !{!"NumCounts", i64 3}
|
|
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
|
|
!9 = !{!"DetailedSummary", !10}
|
|
|
|
!10 = !{!11, !12, !13}
|
|
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
|
|
|
|
|
|
!14 = !{!"function_entry_count", i64 100}
|
|
|
|
!15 = !{!"branch_weights", i32 0, i32 1}
|
|
|
|
!16 = !{!"branch_weights", i32 1, i32 1}
|
2020-01-14 06:19:45 +08:00
|
|
|
!17 = !{!"branch_weights", i32 0, i32 0}
|
2019-04-17 12:52:47 +08:00
|
|
|
; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
|
|
|
|
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
|
|
|
|
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
|
|
|
|
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
|
|
|
|
; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}
|