2019-07-31 23:20:33 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2020-02-22 05:48:06 +08:00
|
|
|
; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
|
2019-07-31 23:20:33 +08:00
|
|
|
|
|
|
|
; These xor-of-icmps could be replaced with and-of-icmps, but %cond0 has extra
|
|
|
|
; uses, so we don't consider it, even though some cases are freely invertible.
|
|
|
|
|
|
|
|
; %cond0 is extra-used in select, which is freely invertible.
|
|
|
|
define i1 @v0_select_of_consts(i32 %X, i32* %selected) {
|
|
|
|
; CHECK-LABEL: @v0_select_of_consts(
|
[InstCombine] Try to reuse constant from select in leading comparison
Summary:
If we have e.g.:
```
%t = icmp ult i32 %x, 65536
%r = select i1 %t, i32 %y, i32 65535
```
the constants `65535` and `65536` are suspiciously close.
We could perform a transformation to deduplicate them:
```
Name: ult
%t = icmp ult i32 %x, 65536
%r = select i1 %t, i32 %y, i32 65535
=>
%t.inv = icmp ugt i32 %x, 65535
%r = select i1 %t.inv, i32 65535, i32 %y
```
https://rise4fun.com/Alive/avb
While this may seem esoteric, this should certainly be good for vectors
(less constant pool usage) and for opt-for-size - need to have only one constant.
But the real fun part here is that it allows further transformation,
in particular it finishes cleaning up the `clamp` folding,
see e.g. `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`.
We start with e.g.
```
%dont_need_to_clamp_positive = icmp sle i32 %X, 32767
%dont_need_to_clamp_negative = icmp sge i32 %X, -32768
%clamp_limit = select i1 %dont_need_to_clamp_positive, i32 -32768, i32 32767
%dont_need_to_clamp = and i1 %dont_need_to_clamp_positive, %dont_need_to_clamp_negative
%R = select i1 %dont_need_to_clamp, i32 %X, i32 %clamp_limit
```
without this patch we currently produce
```
%1 = icmp slt i32 %X, 32768
%2 = icmp sgt i32 %X, -32768
%3 = select i1 %2, i32 %X, i32 -32768
%R = select i1 %1, i32 %3, i32 32767
```
which isn't really a `clamp` - both comparisons are performed on the original value,
this patch changes it into
```
%1.inv = icmp sgt i32 %X, 32767
%2 = icmp sgt i32 %X, -32768
%3 = select i1 %2, i32 %X, i32 -32768
%R = select i1 %1.inv, i32 32767, i32 %3
```
and then the magic happens! Some further transform finishes polishing it and we finally get:
```
%t1 = icmp sgt i32 %X, -32768
%t2 = select i1 %t1, i32 %X, i32 -32768
%t3 = icmp slt i32 %t2, 32767
%R = select i1 %t3, i32 %t2, i32 32767
```
which is beautiful and just what we want.
Proofs for `getFlippedStrictnessPredicateAndConstant()` for de-canonicalization:
https://rise4fun.com/Alive/THl
Proofs for the fold itself: https://rise4fun.com/Alive/THl
Reviewers: spatel, dmgreen, nikic, xbolva00
Reviewed By: spatel
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66232
llvm-svn: 369840
2019-08-24 14:49:25 +08:00
|
|
|
; CHECK-NEXT: [[COND0_INV:%.*]] = icmp sgt i32 [[X:%.*]], 32767
|
|
|
|
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0_INV]], i32 32767, i32 -32768
|
2019-07-31 23:20:33 +08:00
|
|
|
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 -32768
|
|
|
|
store i32 %select, i32* %selected
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
define i1 @v1_select_of_var_and_const(i32 %X, i32 %Y, i32* %selected) {
|
|
|
|
; CHECK-LABEL: @v1_select_of_var_and_const(
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
|
|
|
|
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 -32768, i32 [[Y:%.*]]
|
2019-07-31 23:20:33 +08:00
|
|
|
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 %Y, i32 -32768
|
|
|
|
store i32 %select, i32* %selected
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
define i1 @v2_select_of_const_and_var(i32 %X, i32 %Y, i32* %selected) {
|
|
|
|
; CHECK-LABEL: @v2_select_of_const_and_var(
|
[InstCombine] Try to reuse constant from select in leading comparison
Summary:
If we have e.g.:
```
%t = icmp ult i32 %x, 65536
%r = select i1 %t, i32 %y, i32 65535
```
the constants `65535` and `65536` are suspiciously close.
We could perform a transformation to deduplicate them:
```
Name: ult
%t = icmp ult i32 %x, 65536
%r = select i1 %t, i32 %y, i32 65535
=>
%t.inv = icmp ugt i32 %x, 65535
%r = select i1 %t.inv, i32 65535, i32 %y
```
https://rise4fun.com/Alive/avb
While this may seem esoteric, this should certainly be good for vectors
(less constant pool usage) and for opt-for-size - need to have only one constant.
But the real fun part here is that it allows further transformation,
in particular it finishes cleaning up the `clamp` folding,
see e.g. `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`.
We start with e.g.
```
%dont_need_to_clamp_positive = icmp sle i32 %X, 32767
%dont_need_to_clamp_negative = icmp sge i32 %X, -32768
%clamp_limit = select i1 %dont_need_to_clamp_positive, i32 -32768, i32 32767
%dont_need_to_clamp = and i1 %dont_need_to_clamp_positive, %dont_need_to_clamp_negative
%R = select i1 %dont_need_to_clamp, i32 %X, i32 %clamp_limit
```
without this patch we currently produce
```
%1 = icmp slt i32 %X, 32768
%2 = icmp sgt i32 %X, -32768
%3 = select i1 %2, i32 %X, i32 -32768
%R = select i1 %1, i32 %3, i32 32767
```
which isn't really a `clamp` - both comparisons are performed on the original value,
this patch changes it into
```
%1.inv = icmp sgt i32 %X, 32767
%2 = icmp sgt i32 %X, -32768
%3 = select i1 %2, i32 %X, i32 -32768
%R = select i1 %1.inv, i32 32767, i32 %3
```
and then the magic happens! Some further transform finishes polishing it and we finally get:
```
%t1 = icmp sgt i32 %X, -32768
%t2 = select i1 %t1, i32 %X, i32 -32768
%t3 = icmp slt i32 %t2, 32767
%R = select i1 %t3, i32 %t2, i32 32767
```
which is beautiful and just what we want.
Proofs for `getFlippedStrictnessPredicateAndConstant()` for de-canonicalization:
https://rise4fun.com/Alive/THl
Proofs for the fold itself: https://rise4fun.com/Alive/THl
Reviewers: spatel, dmgreen, nikic, xbolva00
Reviewed By: spatel
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66232
llvm-svn: 369840
2019-08-24 14:49:25 +08:00
|
|
|
; CHECK-NEXT: [[COND0_INV:%.*]] = icmp sgt i32 [[X:%.*]], 32767
|
|
|
|
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0_INV]], i32 32767, i32 [[Y:%.*]]
|
2019-07-31 23:20:33 +08:00
|
|
|
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 %Y
|
|
|
|
store i32 %select, i32* %selected
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Branch is also freely invertible
|
|
|
|
define i1 @v3_branch(i32 %X, i32* %dst0, i32* %dst1) {
|
|
|
|
; CHECK-LABEL: @v3_branch(
|
|
|
|
; CHECK-NEXT: begin:
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
|
|
|
|
; CHECK-NEXT: br i1 [[COND0]], label [[BB1:%.*]], label [[BB0:%.*]]
|
2019-07-31 23:20:33 +08:00
|
|
|
; CHECK: bb0:
|
|
|
|
; CHECK-NEXT: store i32 0, i32* [[DST0:%.*]], align 4
|
|
|
|
; CHECK-NEXT: br label [[END:%.*]]
|
|
|
|
; CHECK: bb1:
|
|
|
|
; CHECK-NEXT: store i32 0, i32* [[DST1:%.*]], align 4
|
|
|
|
; CHECK-NEXT: br label [[END]]
|
|
|
|
; CHECK: end:
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP0]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
begin:
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
br i1 %cond0, label %bb0, label %bb1
|
|
|
|
bb0:
|
|
|
|
store i32 0, i32* %dst0
|
|
|
|
br label %end
|
|
|
|
bb1:
|
|
|
|
store i32 0, i32* %dst1
|
|
|
|
br label %end
|
|
|
|
end:
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Can invert 'not'.
|
|
|
|
define i1 @v4_not_store(i32 %X, i1* %not_cond) {
|
|
|
|
; CHECK-LABEL: @v4_not_store(
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
|
|
|
|
; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1
|
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%not_cond0 = xor i1 %cond0, -1
|
|
|
|
store i1 %not_cond0, i1* %not_cond
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 -32768
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; All extra uses are invertible.
|
|
|
|
define i1 @v5_select_and_not(i32 %X, i32 %Y, i32* %selected, i1* %not_cond) {
|
|
|
|
; CHECK-LABEL: @v5_select_and_not(
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
|
|
|
|
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 32767
|
|
|
|
; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1
|
2019-07-31 23:20:33 +08:00
|
|
|
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
|
[InstCombine] foldXorOfICmps(): don't give up on non-single-use ICmp's if all users are freely invertible
Summary:
This is rather unconventional..
As the comment there says, we don't have much folds for xor-of-icmps,
we try to turn them into an and-of-icmps, for which we have plenty of folds.
But if the ICmp we need to invert is not single-use - we give up.
As discussed in https://reviews.llvm.org/D65148#1603922,
we may have a non-canonical CLAMP pattern, with bit match and
select-of-threshold that we'll potentially clamp.
As it can be seen in `canonicalize-clamp-with-select-of-constant-threshold-pattern.ll`,
out of all 8 variations of the pattern, only two are **not** canonicalized into
the variant with and+icmp instead of bit math.
The reason is because the ICmp we need to invert is not single-use - we give up.
We indeed can't perform this fold at will, the general rule is that
we should not increase instruction count in InstCombine,
But we wouldn't end up increasing instruction count if we can adapt every other
user to the inverted value. This way the `not` we create **will** get folded,
and in the end the instruction count did not increase.
For that, of course, we need to look at the users of a Value,
which is again rather unconventional for InstCombine :S
Thus i'm proposing to be a little bit more insistive in `foldXorOfICmps()`.
The alternatives would be to not create that `not`, but add duplicate code to
manually invert all users; or to add some even less general combine to handle
some more specific pattern[s].
Reviewers: spatel, nikic, RKSimon, craig.topper
Reviewed By: spatel
Subscribers: hiraditya, jdoerfert, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65530
llvm-svn: 368685
2019-08-13 20:49:06 +08:00
|
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2019-07-31 23:20:33 +08:00
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 %Y
|
|
|
|
%not_cond0 = xor i1 %cond0, -1
|
|
|
|
store i1 %not_cond0, i1* %not_cond
|
|
|
|
store i32 %select, i32* %selected
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Not all extra uses are invertible.
|
|
|
|
define i1 @n6_select_and_not(i32 %X, i32 %Y, i32* %selected, i1* %not_cond) {
|
|
|
|
; CHECK-LABEL: @n6_select_and_not(
|
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
|
|
|
|
; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
|
|
|
|
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 [[Y:%.*]]
|
|
|
|
; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1
|
|
|
|
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
|
|
|
|
; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
|
|
|
|
; CHECK-NEXT: ret i1 [[RES]]
|
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 %Y
|
|
|
|
store i1 %cond0, i1* %not_cond
|
|
|
|
store i32 %select, i32* %selected
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Not freely invertible, would require extra 'not' instruction.
|
|
|
|
define i1 @n7_store(i32 %X, i1* %cond) {
|
|
|
|
; CHECK-LABEL: @n7_store(
|
|
|
|
; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
|
|
|
|
; CHECK-NEXT: store i1 [[COND0]], i1* [[COND:%.*]], align 1
|
|
|
|
; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
|
|
|
|
; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
|
|
|
|
; CHECK-NEXT: ret i1 [[RES]]
|
|
|
|
;
|
|
|
|
%cond0 = icmp sgt i32 %X, 32767
|
|
|
|
store i1 %cond0, i1* %cond
|
|
|
|
%cond1 = icmp sgt i32 %X, -32768
|
|
|
|
%select = select i1 %cond0, i32 32767, i32 -32768
|
|
|
|
%res = xor i1 %cond0, %cond1
|
|
|
|
ret i1 %res
|
|
|
|
}
|