2018-01-03 04:16:45 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
|
|
|
|
|
|
|
; There are 4 commuted variants (abbc/abcb/bcab/bcba) *
|
2018-01-03 05:04:08 +08:00
|
|
|
; 4 predicate variants ([*][lg][te]) *
|
2018-01-11 07:31:42 +08:00
|
|
|
; 4 min/max flavors (smin/smax/umin/umax) *
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; 2 notted variants
|
2018-01-11 07:31:42 +08:00
|
|
|
; = 128 tests
|
2018-01-03 04:16:45 +08:00
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smin_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: smax_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umin_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %c, %a
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: umax_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
2018-01-03 04:56:45 +08:00
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-03 04:16:45 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %a, %c
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
2018-01-11 07:31:42 +08:00
|
|
|
define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp slt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp slt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp slt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp slt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sgt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sgt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp slt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp slt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp sgt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp sle <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp sgt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp sgt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp sgt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp sle <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ult <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ult <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ult <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ult <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_bc:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_cb:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ugt <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ab:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ba:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ugt <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_bc_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_cb_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ult <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ab_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ba_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ult <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_bc_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_cb_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp uge <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ab_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ba_eq_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp uge <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_cb = icmp ugt <4 x i32> %c, %b
|
|
|
|
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
|
|
|
|
%cmp_ac = icmp ule <4 x i32> %x, %z
|
|
|
|
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ab = icmp ugt <4 x i32> %a, %b
|
|
|
|
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
|
|
|
|
; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred:
|
|
|
|
; CHECK: // %bb.0:
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v0.16b, v0.16b
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: mvn v1.16b, v1.16b
|
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
2018-01-11 23:13:47 +08:00
|
|
|
; CHECK-NEXT: mvn v2.16b, v2.16b
|
|
|
|
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
|
2018-01-11 07:31:42 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp_bc = icmp ugt <4 x i32> %b, %c
|
|
|
|
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
|
|
|
|
%cmp_ba = icmp ugt <4 x i32> %b, %a
|
|
|
|
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
|
|
|
|
%cmp_ca = icmp ule <4 x i32> %z, %x
|
|
|
|
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|