llvm-project/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll

2442 lines
96 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
; There are 4 commuted variants (abbc/abcb/bcab/bcba) *
; 4 predicate variants ([*][lg][te]) *
; 4 min/max flavors (smin/smax/umin/umax) *
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; 2 notted variants
; = 128 tests
define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_bc:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp slt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_cb:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp slt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ab:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp slt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ba:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp slt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_bc_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sgt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_cb_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sgt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ab_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sgt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ba_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sgt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_bc_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sle <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_cb_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sle <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ab_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sle <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ba_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sle <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smin_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_bc:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sgt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_cb:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sgt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ab:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sgt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ba:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sgt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_bc_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp slt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_cb_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp slt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ab_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp slt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ba_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp slt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_bc_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_cb_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ab_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ba_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sle <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sle <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sle <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @smax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: smax_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sle <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_bc:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ult <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_cb:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ult <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ab:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ult <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ba:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ult <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_bc_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ugt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_cb_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ugt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ab_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ugt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ba_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ugt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_bc_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ule <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_cb_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ule <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ab_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ule <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ba_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ule <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp uge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp uge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp uge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umin_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp uge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_bc:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ugt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_cb:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ugt <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ab:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ugt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ba:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ugt <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_bc_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ult <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_cb_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ult <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ab_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ult <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ba_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ult <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_bc_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp uge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_cb_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp uge <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ab_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp uge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ba_eq_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp uge <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ule <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ule <4 x i32> %c, %a
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ule <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: umax_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ule <4 x i32> %a, %c
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_bc:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp slt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_cb:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp slt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ab:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp slt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ba:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp slt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_bc_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sgt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_cb_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sgt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ab_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sgt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ba_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sgt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_bc_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sle <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_cb_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sle <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ab_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sle <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ba_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sle <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp slt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp slt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp slt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp slt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_bc:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sgt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_cb:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sgt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ab:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sgt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ba:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sgt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_bc_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp slt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_cb_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp slt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ab_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp slt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ba_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp slt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_bc_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_cb_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ab_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ba_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp sle <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp sgt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp sle <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp sgt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp sle <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp sgt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp sgt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp sle <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_bc:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ult <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_cb:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ult <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ab:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ult <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ba:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ult <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_bc_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ugt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_cb_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ugt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ab_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ugt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ba_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ugt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_bc_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ule <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_cb_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ule <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ab_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ule <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ba_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ule <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp uge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ult <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp uge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ult <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp uge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ult <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ult <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp uge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_bc:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ugt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_cb:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ugt <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ab:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ugt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ba:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ugt <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_bc_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ult <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_cb_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ult <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ab_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ult <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ba_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ult <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_bc_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp uge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_cb_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp uge <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ab_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp uge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ba_eq_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp uge <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ac = icmp ule <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_cb = icmp ugt <4 x i32> %c, %b
%min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b
%cmp_ac = icmp ule <4 x i32> %x, %z
%r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ab = icmp ugt <4 x i32> %a, %b
%min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b
%cmp_ca = icmp ule <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab
ret <4 x i32> %r
}
define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred:
; CHECK: // %bb.0:
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: mvn v1.16b, v1.16b
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875) This was originally planned as the fix for: https://bugs.llvm.org/show_bug.cgi?id=35834 ...but simpler transforms handled that case, so I implemented a lesser solution. It turns out we need to handle the case with 'not' ops too because the real code example that we are trying to solve: https://bugs.llvm.org/show_bug.cgi?id=35875 ...has extra uses of the intermediate values, so we can't rely on smaller canonicalizations to get us to the goal. As with rL321672, I've tried to show every possibility in the codegen tests because that's the simplest way to prove we're doing the right thing in the wide variety of permutations of this pattern. We can also show an InstCombine win because we added a fold for this case in: rL321998 / D41603 An Alive proof for one variant of the pattern to show that the InstCombine and codegen results are correct: https://rise4fun.com/Alive/vd1 Name: min3_nots %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %cmpxyz = icmp slt i8 %minxz, %ny %r = select i1 %cmpxyz, i8 %minxz, i8 %ny Name: min3_nots_alt %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 %cmpxz = icmp slt i8 %nx, %nz %minxz = select i1 %cmpxz, i8 %nx, i8 %nz %cmpyz = icmp slt i8 %ny, %nz %minyz = select i1 %cmpyz, i8 %ny, i8 %nz %cmpyx = icmp slt i8 %y, %x %r = select i1 %cmpyx, i8 %minxz, i8 %minyz => %xz = icmp sgt i8 %x, %z %maxxz = select i1 %xz, i8 %x, i8 %z %xyz = icmp sgt i8 %maxxz, %y %maxxyz = select i1 %xyz, i8 %maxxz, i8 %y %r = xor i8 %maxxyz, -1 llvm-svn: 322283
2018-01-11 23:13:47 +08:00
; CHECK-NEXT: mvn v2.16b, v2.16b
; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s
; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s
; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp_bc = icmp ugt <4 x i32> %b, %c
%min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c
%cmp_ba = icmp ugt <4 x i32> %b, %a
%min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a
%cmp_ca = icmp ule <4 x i32> %z, %x
%r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba
ret <4 x i32> %r
}