2016-10-30 00:02:57 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
|
|
|
|
|
|
|
; These are actually tests of ValueTracking, and so may have test coverage in InstCombine or other
|
|
|
|
; IR opt passes, but ValueTracking also affects the backend via SelectionDAGBuilder::visitSelect().
|
|
|
|
|
|
|
|
define <4 x i32> @smin_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smin_vec1:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %not_x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smin_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smin_vec2:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %not_x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-11-13 08:32:39 +08:00
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
|
|
|
|
define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smin_vec3:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
|
|
|
|
define <4 x i32> @smin_vec4(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smin_vec4:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp slt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-10-30 00:02:57 +08:00
|
|
|
define <4 x i32> @smax_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smax_vec1:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %not_x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @smax_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: smax_vec2:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
2016-10-30 00:21:19 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%not_x = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %not_x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-11-13 08:32:39 +08:00
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
|
|
|
|
define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smax_vec3:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp slt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
; Z = X -nsw Y
|
|
|
|
; (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
|
|
|
|
define <4 x i32> @smax_vec4(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; CHECK-LABEL: smax_vec4:
|
|
|
|
; CHECK: # BB#0:
|
|
|
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%sub = sub nsw <4 x i32> %x, %y
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, %y
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
2016-10-30 00:02:57 +08:00
|
|
|
define <4 x i32> @umax_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umax_vec1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umax_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umax_vec2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>, <4 x i32> %x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_vec1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umin_vec1:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%cmp = icmp slt <4 x i32> %x, zeroinitializer
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> %x
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @umin_vec2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: umin_vec2:
|
|
|
|
; CHECK: # BB#0:
|
[ValueTracking] recognize obfuscated variants of umin/umax
The smallest tests that expose this are codegen tests (because SelectionDAGBuilder::visitSelect() uses matchSelectPattern
to create UMAX/UMIN nodes), but it's also possible to see the effects in IR alone with folds of min/max pairs.
If these were written as unsigned compares in IR, InstCombine canonicalizes the unsigned compares to signed compares.
Ie, running the optimizer pessimizes the codegen for this case without this patch:
define <4 x i32> @umax_vec(<4 x i32> %x) {
%cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
ret <4 x i32> %sel
}
$ ./opt umax.ll -S | ./llc -o - -mattr=avx
vpmaxud LCPI0_0(%rip), %xmm0, %xmm0
$ ./opt -instcombine umax.ll -S | ./llc -o - -mattr=avx
vpxor %xmm1, %xmm1, %xmm1
vpcmpgtd %xmm0, %xmm1, %xmm1
vmovaps LCPI0_0(%rip), %xmm2 ## xmm2 = [2147483647,2147483647,2147483647,2147483647]
vblendvps %xmm1, %xmm0, %xmm2, %xmm0
Differential Revision: https://reviews.llvm.org/D26096
llvm-svn: 286318
2016-11-09 08:24:44 +08:00
|
|
|
; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
|
2016-10-30 00:02:57 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
%cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
|
|
|
|
ret <4 x i32> %sel
|
|
|
|
}
|
|
|
|
|