2017-01-21 04:14:11 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-10-30 00:02:57 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
|
|
|
|
|
|
|
; These are actually tests of ValueTracking, and so may have test coverage in InstCombine or other
|
|
|
|
; IR opt passes, but ValueTracking also affects the backend via SelectionDAGBuilder::visitSelect().
|
|
|
|
|
|
|
|
define <4 x i32> @smin_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smin_vec1:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %not_x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smin_vec2:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %not_x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-11-13 08:32:39 +08:00
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
|
|
|
|
define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smin_vec3:
|
|
|
|
; CHECK: # BB#0:
|
2016-11-14 04:04:52 +08:00
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
2016-11-13 08:32:39 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
|
|
|
|
define <4 x i32> @smin_vec4(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smin_vec4:
|
|
|
|
; CHECK: # BB#0:
|
2016-11-14 04:04:52 +08:00
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
2016-11-13 08:32:39 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp slt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-10-30 00:02:57 +08:00
|
|
|
define <4 x i32> @smax_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smax_vec1:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %not_x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smax_vec2:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %not_x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-11-13 08:32:39 +08:00
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
|
|
|
|
define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smax_vec3:
|
|
|
|
; CHECK: # BB#0:
|
2016-11-14 04:04:52 +08:00
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-11-13 08:32:39 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp slt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
|
|
|
|
define <4 x i32> @smax_vec4(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smax_vec4:
|
|
|
|
; CHECK: # BB#0:
|
2016-11-14 04:04:52 +08:00
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-11-13 08:32:39 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-10-30 00:02:57 +08:00
|
|
|
define <4 x i32> @umax_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umax_vec1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umax_vec2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>, <4 x i32> %x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umin_vec1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> %x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umin_vec2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2017-01-21 04:14:11 +08:00
|
|
|
; The next 4 tests are value clamping with constants:
|
|
|
|
; https://llvm.org/bugs/show_bug.cgi?id=31693
|
|
|
|
|
|
|
|
; (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
|
|
|
|
|
|
|
|
define <4 x i32> @clamp_signed1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: clamp_signed1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693)
By enhancing value tracking, we allow an existing min/max canonicalization to
kick in and improve codegen for several targets that have min/max instructions.
Unfortunately, recognizing min/max in value tracking may cause us to hit
a hack in InstCombiner::visitICmpInst() more often:
http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html
...but I'm hoping we can remove that soon.
Correctness proofs based on Alive:
Name: smaxmin
Pre: C1 < C2
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp slt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp sgt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: sminmax
Pre: C1 > C2
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp sgt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp slt i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: smaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: sminmax
Done: 1
Optimization is correct!
Name: umaxmin
Pre: C1 u< C2
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ult i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ugt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: uminmax
Pre: C1 u> C2
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ugt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ult i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: umaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: uminmax
Done: 1
Optimization is correct!
llvm-svn: 292660
2017-01-21 06:18:47 +08:00
|
|
|
; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
|
2017-01-21 04:14:11 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp2 = icmp slt <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%cmp1 = icmp slt <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%r = select <4 x i1> %cmp1, <4 x i32><i32 15, i32 15, i32 15, i32 15>, <4 x i32> %min
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
|
|
|
|
|
|
|
|
define <4 x i32> @clamp_signed2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: clamp_signed2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693)
By enhancing value tracking, we allow an existing min/max canonicalization to
kick in and improve codegen for several targets that have min/max instructions.
Unfortunately, recognizing min/max in value tracking may cause us to hit
a hack in InstCombiner::visitICmpInst() more often:
http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html
...but I'm hoping we can remove that soon.
Correctness proofs based on Alive:
Name: smaxmin
Pre: C1 < C2
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp slt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp sgt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: sminmax
Pre: C1 > C2
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp sgt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp slt i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: smaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: sminmax
Done: 1
Optimization is correct!
Name: umaxmin
Pre: C1 u< C2
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ult i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ugt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: uminmax
Pre: C1 u> C2
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ugt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ult i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: umaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: uminmax
Done: 1
Optimization is correct!
llvm-svn: 292660
2017-01-21 06:18:47 +08:00
|
|
|
; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
|
2017-01-21 04:14:11 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp2 = icmp sgt <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%cmp1 = icmp sgt <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%r = select <4 x i1> %cmp1, <4 x i32><i32 255, i32 255, i32 255, i32 255>, <4 x i32> %max
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
|
|
|
|
|
|
|
|
define <4 x i32> @clamp_unsigned1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: clamp_unsigned1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693)
By enhancing value tracking, we allow an existing min/max canonicalization to
kick in and improve codegen for several targets that have min/max instructions.
Unfortunately, recognizing min/max in value tracking may cause us to hit
a hack in InstCombiner::visitICmpInst() more often:
http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html
...but I'm hoping we can remove that soon.
Correctness proofs based on Alive:
Name: smaxmin
Pre: C1 < C2
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp slt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp sgt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: sminmax
Pre: C1 > C2
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp sgt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp slt i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: smaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: sminmax
Done: 1
Optimization is correct!
Name: umaxmin
Pre: C1 u< C2
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ult i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ugt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: uminmax
Pre: C1 u> C2
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ugt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ult i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: umaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: uminmax
Done: 1
Optimization is correct!
llvm-svn: 292660
2017-01-21 06:18:47 +08:00
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
2017-01-21 04:14:11 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp2 = icmp ult <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%cmp1 = icmp ult <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%r = select <4 x i1> %cmp1, <4 x i32><i32 15, i32 15, i32 15, i32 15>, <4 x i32> %min
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
|
|
|
|
|
|
|
|
define <4 x i32> @clamp_unsigned2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: clamp_unsigned2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693)
By enhancing value tracking, we allow an existing min/max canonicalization to
kick in and improve codegen for several targets that have min/max instructions.
Unfortunately, recognizing min/max in value tracking may cause us to hit
a hack in InstCombiner::visitICmpInst() more often:
http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html
...but I'm hoping we can remove that soon.
Correctness proofs based on Alive:
Name: smaxmin
Pre: C1 < C2
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp slt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp slt i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp sgt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: sminmax
Pre: C1 > C2
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp sgt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp sgt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp slt i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: smaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: sminmax
Done: 1
Optimization is correct!
Name: umaxmin
Pre: C1 u< C2
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ult i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %min
=>
%cmp2 = icmp ult i8 %x, C2
%min = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ugt i8 %min, C1
%r = select i1 %cmp1, i8 %min, i8 C1
Name: uminmax
Pre: C1 u> C2
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp3 = icmp ugt i8 %x, C1
%r = select i1 %cmp3, i8 C1, i8 %max
=>
%cmp2 = icmp ugt i8 %x, C2
%max = select i1 %cmp2, i8 %x, i8 C2
%cmp1 = icmp ult i8 %max, C1
%r = select i1 %cmp1, i8 %max, i8 C1
----------------------------------------
Optimization: umaxmin
Done: 1
Optimization is correct!
----------------------------------------
Optimization: uminmax
Done: 1
Optimization is correct!
llvm-svn: 292660
2017-01-21 06:18:47 +08:00
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
2017-01-21 04:14:11 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp2 = icmp ugt <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32><i32 15, i32 15, i32 15, i32 15>
|
|
|
|
%cmp1 = icmp ugt <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%r = select <4 x i1> %cmp1, <4 x i32><i32 255, i32 255, i32 255, i32 255>, <4 x i32> %max
|
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|