2018-06-23 05:16:29 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s
|
|
|
|
|
2018-06-23 06:06:33 +08:00
|
|
|
define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: sub_zext_cmp_mask_same_size_result:
|
|
|
|
; CHECK: # %bb.0:
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
|
|
|
; CHECK-NEXT: ori 3, 3, 65508
|
|
|
|
; CHECK-NEXT: oris 3, 3, 65535
|
2018-06-23 06:06:33 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%z = zext i1 %c to i32
|
|
|
|
%r = sub i32 -27, %z
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 05:16:29 +08:00
|
|
|
define i32 @sub_zext_cmp_mask_wider_result(i8 %x) {
|
|
|
|
; CHECK-LABEL: sub_zext_cmp_mask_wider_result:
|
|
|
|
; CHECK: # %bb.0:
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
|
|
|
; CHECK-NEXT: ori 3, 3, 26
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i8 %x, 1
|
|
|
|
%c = icmp eq i8 %a, 0
|
|
|
|
%z = zext i1 %c to i32
|
|
|
|
%r = sub i32 27, %z
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: sub_zext_cmp_mask_narrower_result:
|
|
|
|
; CHECK: # %bb.0:
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
|
|
|
; CHECK-NEXT: ori 3, 3, 46
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%z = zext i1 %c to i8
|
|
|
|
%r = sub i8 47, %z
|
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 06:06:33 +08:00
|
|
|
define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
|
|
|
|
; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
|
|
|
|
; CHECK: # %bb.0:
|
2020-04-17 00:22:43 +08:00
|
|
|
; CHECK-NEXT: clrlwi 3, 3, 31
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, 27
|
2018-06-23 06:06:33 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i8 %x, 1
|
|
|
|
%c = icmp eq i8 %a, 0
|
|
|
|
%z = zext i1 %c to i8
|
|
|
|
%r = add i8 %z, 26
|
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 05:16:29 +08:00
|
|
|
define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
|
|
|
|
; CHECK-LABEL: add_zext_cmp_mask_wider_result:
|
|
|
|
; CHECK: # %bb.0:
|
2020-04-17 00:22:43 +08:00
|
|
|
; CHECK-NEXT: clrlwi 3, 3, 31
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, 27
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i8 %x, 1
|
|
|
|
%c = icmp eq i8 %a, 0
|
|
|
|
%z = zext i1 %c to i32
|
|
|
|
%r = add i32 %z, 26
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
|
|
|
|
; CHECK: # %bb.0:
|
2020-04-17 00:22:43 +08:00
|
|
|
; CHECK-NEXT: clrlwi 3, 3, 31
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, 43
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%z = zext i1 %c to i8
|
|
|
|
%r = add i8 %z, 42
|
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 06:06:33 +08:00
|
|
|
define i32 @low_bit_select_constants_bigger_false_same_size_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_false_same_size_result:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
2019-02-08 01:43:34 +08:00
|
|
|
; CHECK-NEXT: ori 3, 3, 42
|
2018-06-23 06:06:33 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%r = select i1 %c, i32 42, i32 43
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 05:16:29 +08:00
|
|
|
define i64 @low_bit_select_constants_bigger_false_wider_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_false_wider_result:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
2019-02-08 01:43:34 +08:00
|
|
|
; CHECK-NEXT: ori 3, 3, 26
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%r = select i1 %c, i64 26, i64 27
|
|
|
|
ret i64 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_false_narrower_result:
|
|
|
|
; CHECK: # %bb.0:
|
2019-02-08 01:43:34 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
|
|
|
; CHECK-NEXT: ori 3, 3, 36
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i32 %x, 1
|
|
|
|
%c = icmp eq i32 %a, 0
|
|
|
|
%r = select i1 %c, i16 36, i16 37
|
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 06:06:33 +08:00
|
|
|
define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
|
|
|
|
; CHECK: # %bb.0:
|
Teach the DAGCombine to fold this pattern(c1 and c2 is constant).
// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
Sign extend the operands if it is any_extend, to keep the signess of the operands that, the other combine rule would apply. The any_extend is handled as zero extend for constants. i.e.
t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
t2: i64 = any_extend t1
-->
t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
-->
t4: i64 = sign_extend_inreg t3
Differential Revision: https://reviews.llvm.org/D63318
llvm-svn: 364382
2019-06-26 13:12:53 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, -29
|
2018-06-23 06:06:33 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i8 %x, 1
|
|
|
|
%c = icmp eq i8 %a, 0
|
|
|
|
%r = select i1 %c, i8 227, i8 226
|
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
2018-06-23 05:16:29 +08:00
|
|
|
define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, 227
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i8 %x, 1
|
|
|
|
%c = icmp eq i8 %a, 0
|
|
|
|
%r = select i1 %c, i32 227, i32 226
|
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
|
|
|
|
; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
|
|
|
|
; CHECK: # %bb.0:
|
Teach the DAGCombine to fold this pattern(c1 and c2 is constant).
// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
Sign extend the operands if it is any_extend, to keep the signess of the operands that, the other combine rule would apply. The any_extend is handled as zero extend for constants. i.e.
t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
t2: i64 = any_extend t1
-->
t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
-->
t4: i64 = sign_extend_inreg t3
Differential Revision: https://reviews.llvm.org/D63318
llvm-svn: 364382
2019-06-26 13:12:53 +08:00
|
|
|
; CHECK-NEXT: clrldi 3, 3, 63
|
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value
This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
%c.0282 = and i32 %c.0282.in, 268435455
%a16 = lshr i64 32508, %x
%a17 = and i64 %a16, 1
%tobool = icmp eq i64 %a17, 0
%. = select i1 %tobool, i32 1, i32 2
%.286 = select i1 %tobool, i32 27, i32 26
%shr97 = lshr i32 %c.0282, %.
%shl98 = shl i32 %c.0282.in, %.286
%or99 = or i32 %shr97, %shl98
%shr100 = lshr i32 %d.0280, %.
%shl101 = shl i32 %d.0280, %.286
%or102 = or i32 %shr100, %shl101
store i32 %or99, i32* %ptr0
store i32 %or102, i32* %ptr1
ret void
}
...but I'm trying to kill the setcc bool math sooner rather than later.
By matching a larger pattern that includes both the low-bit mask and the trailing add/sub,
we can create a universally good fold because we always eliminate the condition code
intermediate value.
Here are Alive proofs for these (currently instcombine folds the 'add' variants, but
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp
Name: sub of zext cmp mask
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%z = zext i1 %c to i32
%r = sub i32 C1, %z
=>
%optional_cast = zext i8 %a to i32
%r = add i32 %optional_cast, C1-1
Name: add of zext cmp mask
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
%z = zext i1 %c to i8
%r = add i8 %z, C1
=>
%optional_cast = trunc i32 %a to i8
%r = sub i8 C1+1, %optional_cast
All of the tests look like improvements or neutral to me. But it is possible that x86
test+set+bitop is better than what we now show here. I suspect we could do better by
adding another fold for the 'sub' variants.
We start with select-of-constant in IR in the larger motivating test, so that's why I
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1
Name: true const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C2, i64 C1
=>
%z = zext i8 %a to i64
%r = sub i64 C2, %z
Name: false const is bigger
Pre: C2 == (C1 + 1)
%a = and i8 %x, 1
%c = icmp eq i8 %a, 0
%r = select i1 %c, i64 C1, i64 C2
=>
%z = zext i8 %a to i64
%r = add i64 C1, %z
Differential Revision: https://reviews.llvm.org/D48466
llvm-svn: 335433
2018-06-24 22:37:30 +08:00
|
|
|
; CHECK-NEXT: subfic 3, 3, 41
|
2018-06-23 05:16:29 +08:00
|
|
|
; CHECK-NEXT: blr
|
|
|
|
%a = and i16 %x, 1
|
|
|
|
%c = icmp eq i16 %a, 0
|
|
|
|
%r = select i1 %c, i8 41, i8 40
|
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|