2016-10-26 04:25:47 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2018-01-05 02:23:46 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
|
2018-01-24 23:02:16 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2ORLATER,AVX2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX2ORLATER,AVX512,AVX512F
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX2ORLATER,AVX512,AVX512BW
|
2018-01-25 22:07:33 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,AVX,XOP
|
2016-10-26 04:25:47 +08:00
|
|
|
|
|
|
|
; fold (sdiv x, 1) -> x
|
2018-01-05 05:33:19 +08:00
|
|
|
define i32 @combine_sdiv_by_one(i32 %x) {
|
|
|
|
; CHECK-LABEL: combine_sdiv_by_one:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-05 05:33:19 +08:00
|
|
|
%1 = sdiv i32 %x, 1
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
2016-10-26 04:25:47 +08:00
|
|
|
define <4 x i32> @combine_vec_sdiv_by_one(<4 x i32> %x) {
|
2018-01-05 02:23:46 +08:00
|
|
|
; CHECK-LABEL: combine_vec_sdiv_by_one:
|
|
|
|
; CHECK: # %bb.0:
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
; fold (sdiv x, -1) -> 0 - x
|
2018-01-05 05:33:19 +08:00
|
|
|
define i32 @combine_sdiv_by_negone(i32 %x) {
|
|
|
|
; CHECK-LABEL: combine_sdiv_by_negone:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: negl %edi
|
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-05 05:33:19 +08:00
|
|
|
%1 = sdiv i32 %x, -1
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
2016-10-26 04:25:47 +08:00
|
|
|
define <4 x i32> @combine_vec_sdiv_by_negone(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_negone:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; SSE-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: psubd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_negone:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
2018-06-27 06:03:00 +08:00
|
|
|
; TODO fold (sdiv x, INT_MIN) -> select((icmp eq x, INT_MIN), 1, 0)
|
|
|
|
define i32 @combine_sdiv_by_minsigned(i32 %x) {
|
|
|
|
; CHECK-LABEL: combine_sdiv_by_minsigned:
|
|
|
|
; CHECK: # %bb.0:
|
2018-06-27 17:41:22 +08:00
|
|
|
; CHECK-NEXT: movslq %edi, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %rax
|
|
|
|
; CHECK-NEXT: shlq $31, %rax
|
|
|
|
; CHECK-NEXT: subq %rcx, %rax
|
|
|
|
; CHECK-NEXT: shrq $32, %rax
|
|
|
|
; CHECK-NEXT: subl %ecx, %eax
|
|
|
|
; CHECK-NEXT: movl %eax, %ecx
|
|
|
|
; CHECK-NEXT: shrl $31, %ecx
|
|
|
|
; CHECK-NEXT: sarl $30, %eax
|
|
|
|
; CHECK-NEXT: addl %ecx, %eax
|
|
|
|
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
2018-06-27 06:03:00 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%1 = sdiv i32 %x, -2147483648
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_minsigned:
|
|
|
|
; SSE: # %bb.0:
|
2018-06-27 17:41:22 +08:00
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
|
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
|
|
|
|
; SSE-NEXT: pmuldq %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: pmuldq %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
|
|
|
; SSE-NEXT: psubd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: psrld $31, %xmm0
|
|
|
|
; SSE-NEXT: psrad $30, %xmm1
|
2018-06-27 06:03:00 +08:00
|
|
|
; SSE-NEXT: paddd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2018-06-27 17:41:22 +08:00
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_minsigned:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
|
|
|
|
; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
|
|
|
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpsrad $30, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2ORLATER-LABEL: combine_vec_sdiv_by_minsigned:
|
|
|
|
; AVX2ORLATER: # %bb.0:
|
|
|
|
; AVX2ORLATER-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
|
|
|
|
; AVX2ORLATER-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
|
|
|
; AVX2ORLATER-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
|
|
|
; AVX2ORLATER-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
|
|
|
; AVX2ORLATER-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX2ORLATER-NEXT: vpsrld $31, %xmm0, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpsrad $30, %xmm0, %xmm0
|
|
|
|
; AVX2ORLATER-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2ORLATER-NEXT: retq
|
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_minsigned:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
|
|
|
|
; XOP-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
|
|
|
|
; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
|
|
|
; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
|
|
|
; XOP-NEXT: vpsrld $31, %xmm0, %xmm1
|
|
|
|
; XOP-NEXT: vpsrad $30, %xmm0, %xmm0
|
|
|
|
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; XOP-NEXT: retq
|
2018-06-27 06:03:00 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
2018-01-05 02:20:46 +08:00
|
|
|
; TODO fold (sdiv x, x) -> 1
|
|
|
|
define i32 @combine_sdiv_dupe(i32 %x) {
|
2018-01-05 02:23:46 +08:00
|
|
|
; CHECK-LABEL: combine_sdiv_dupe:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
|
|
; CHECK-NEXT: cltd
|
|
|
|
; CHECK-NEXT: idivl %edi
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-05 02:20:46 +08:00
|
|
|
%1 = sdiv i32 %x, %x
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_dupe(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_dupe:
|
|
|
|
; SSE: # %bb.0:
|
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: movl %ecx, %eax
|
|
|
|
; SSE-NEXT: cltd
|
|
|
|
; SSE-NEXT: idivl %ecx
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: movd %xmm0, %esi
|
|
|
|
; SSE-NEXT: movl %esi, %eax
|
|
|
|
; SSE-NEXT: cltd
|
|
|
|
; SSE-NEXT: idivl %esi
|
|
|
|
; SSE-NEXT: movd %eax, %xmm1
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: movl %ecx, %eax
|
|
|
|
; SSE-NEXT: cltd
|
|
|
|
; SSE-NEXT: idivl %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: movl %ecx, %eax
|
|
|
|
; SSE-NEXT: cltd
|
|
|
|
; SSE-NEXT: idivl %ecx
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-05 02:20:46 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: combine_vec_sdiv_dupe:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrd $1, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: movl %ecx, %eax
|
|
|
|
; AVX-NEXT: cltd
|
|
|
|
; AVX-NEXT: idivl %ecx
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %esi
|
|
|
|
; AVX-NEXT: movl %esi, %eax
|
|
|
|
; AVX-NEXT: cltd
|
|
|
|
; AVX-NEXT: idivl %esi
|
|
|
|
; AVX-NEXT: vmovd %eax, %xmm1
|
|
|
|
; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: movl %ecx, %eax
|
|
|
|
; AVX-NEXT: cltd
|
|
|
|
; AVX-NEXT: idivl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $3, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: movl %ecx, %eax
|
|
|
|
; AVX-NEXT: cltd
|
|
|
|
; AVX-NEXT: idivl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-01-05 02:20:46 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, %x
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
2016-10-26 04:25:47 +08:00
|
|
|
; fold (sdiv x, y) -> (udiv x, y) iff x and y are positive
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pos0(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pos0:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
|
|
|
; SSE-NEXT: psrld $2, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pos0:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpsrld $2, %xmm0, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%2 = sdiv <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pos1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
2016-12-14 23:08:13 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: psrld $3, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
|
|
|
; SSE-NEXT: psrld $4, %xmm0
|
|
|
|
; SSE-NEXT: psrld $2, %xmm2
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
;
|
2016-12-14 23:08:13 +08:00
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pos1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-12-14 23:08:13 +08:00
|
|
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpsrld $2, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpsrld $3, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2016-12-14 23:08:13 +08:00
|
|
|
;
|
2018-01-24 23:02:16 +08:00
|
|
|
; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pos1:
|
|
|
|
; AVX2ORLATER: # %bb.0:
|
|
|
|
; AVX2ORLATER-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX2ORLATER-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2ORLATER-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pos1:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; XOP-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
|
|
|
|
%2 = sdiv <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
; fold (sdiv x, (1 << c)) -> x >>u c
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2a:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: psrad $31, %xmm1
|
|
|
|
; SSE-NEXT: psrld $30, %xmm1
|
|
|
|
; SSE-NEXT: paddd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: psrad $2, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2a:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-26 04:25:47 +08:00
|
|
|
; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
|
|
|
|
; AVX-NEXT: vpsrld $30, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpsrad $2, %xmm0, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
2018-04-06 05:57:20 +08:00
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2a_neg:
|
|
|
|
; SSE: # %bb.0:
|
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: psrad $31, %xmm1
|
|
|
|
; SSE-NEXT: psrld $30, %xmm1
|
|
|
|
; SSE-NEXT: paddd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: psrad $2, %xmm1
|
|
|
|
; SSE-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: psubd %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-04-06 05:57:20 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2a_neg:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
|
|
|
|
; AVX-NEXT: vpsrld $30, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpsrad $2, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-04-06 05:57:20 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
2018-01-24 23:02:16 +08:00
|
|
|
define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrb $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $6, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $2, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pextrb $0, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: movd %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pinsrb $1, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $2, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $3, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $4, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $4, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $3, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $4, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $5, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $3, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $4, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $5, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $3, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $5, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $5, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $6, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $2, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $6, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $6, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $7, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $7, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $8, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pinsrb $8, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $9, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $6, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $2, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $9, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $10, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $10, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $11, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $4, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $4, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $11, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $12, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $5, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $3, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $12, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $13, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $3, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $5, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $13, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $14, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarb $7, %cl
|
|
|
|
; SSE-NEXT: shrb $2, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb $6, %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $14, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $15, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $15, %eax, %xmm1
|
2018-01-24 23:02:16 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $6, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $2, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $4, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $4, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $5, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $3, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $3, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $5, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $2, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $6, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $6, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $2, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $10, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $11, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $4, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $4, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $12, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $5, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $3, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $13, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $3, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $5, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $14, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarb $7, %cl
|
|
|
|
; AVX-NEXT: shrb $2, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb $6, %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <16 x i8> %x, <i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2, i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2>
|
|
|
|
ret <16 x i8> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; SSE: # %bb.0:
|
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; AVX1: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; AVX2: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; AVX512F: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512F-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; AVX512BW: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <8 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2>
|
|
|
|
ret <8 x i16> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; SSE: # %bb.0:
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
|
2018-01-24 23:02:16 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; AVX2: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; AVX512F: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm3 = xmm1[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512F-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; AVX512BW: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm3 = xmm1[0],xmm2[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
|
|
|
|
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
|
|
|
|
; XOP: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <16 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2, i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2>
|
|
|
|
ret <16 x i16> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; SSE: # %bb.0:
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm3, %xmm4
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm5
|
|
|
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0,1],xmm1[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm5, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm2[0,1],xmm5[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm5
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3,4,5,6,7]
|
|
|
|
; SSE-NEXT: pextrw $1, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $14, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $2, %cx
|
|
|
|
; SSE-NEXT: pinsrw $1, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $2, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $2, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $3, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $12, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $4, %cx
|
|
|
|
; SSE-NEXT: pinsrw $3, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $4, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $13, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $3, %cx
|
|
|
|
; SSE-NEXT: pinsrw $4, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $5, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $11, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $5, %cx
|
|
|
|
; SSE-NEXT: pinsrw $5, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $6, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $15, %cx
|
|
|
|
; SSE-NEXT: movzwl %cx, %ecx
|
|
|
|
; SSE-NEXT: shrl $10, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw $6, %cx
|
|
|
|
; SSE-NEXT: pinsrw $6, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: pextrw $7, %xmm4, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $15, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarw %cx
|
|
|
|
; SSE-NEXT: pinsrw $7, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: movdqa %xmm5, %xmm2
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; AVX1: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX1-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $14, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $2, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $12, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $4, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $13, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $3, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $11, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $5, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $15, %cx
|
|
|
|
; AVX1-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX1-NEXT: shrl $10, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw $6, %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: shrl $15, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarw %cx
|
|
|
|
; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; AVX2: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
|
2018-01-24 23:02:16 +08:00
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm0
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $14, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $2, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $12, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $4, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $13, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $3, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $11, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $5, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $15, %cx
|
|
|
|
; AVX2-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX2-NEXT: shrl $10, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw $6, %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: shrl $15, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarw %cx
|
|
|
|
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; AVX512F: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
|
|
; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm4 = xmm3[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm0
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm4 = xmm3[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX512F-NEXT: vmovss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
|
|
|
; AVX512F-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $2, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $4, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $3, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $5, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $15, %cx
|
|
|
|
; AVX512F-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw $6, %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; AVX512F-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512F-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512F-NEXT: sarw %cx
|
|
|
|
; AVX512F-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512F-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; AVX512BW: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm2, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm4 = xmm3[0],xmm1[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
|
|
|
|
; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm4 = xmm3[0],xmm1[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; AVX512BW-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
|
|
|
; AVX512BW-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $14, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $2, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $12, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $4, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $13, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $3, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $11, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $5, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $15, %cx
|
|
|
|
; AVX512BW-NEXT: movzwl %cx, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $10, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw $6, %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: shrl $15, %ecx
|
|
|
|
; AVX512BW-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512BW-NEXT: sarw %cx
|
|
|
|
; AVX512BW-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
|
|
|
|
; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
|
|
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
|
|
|
|
; XOP: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
2018-01-25 22:07:33 +08:00
|
|
|
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm0
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm3, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm4, %xmm3
|
|
|
|
; XOP-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
|
|
|
; XOP-NEXT: vpextrw $1, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $14, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $2, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $2, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $3, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $12, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $4, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $4, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $13, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $3, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $5, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $11, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $5, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $6, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $15, %cx
|
|
|
|
; XOP-NEXT: movzwl %cx, %ecx
|
|
|
|
; XOP-NEXT: shrl $10, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw $6, %cx
|
|
|
|
; XOP-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrw $7, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: shrl $15, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarw %cx
|
|
|
|
; XOP-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <32 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2, i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2, i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2, i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2>
|
|
|
|
ret <32 x i16> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-04-08 19:35:20 +08:00
|
|
|
;
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2b_v4i32:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $30, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $2, %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $29, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $3, %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $28, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $4, %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2016-10-26 04:25:47 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
2018-01-24 23:02:16 +08:00
|
|
|
|
|
|
|
define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v8i32:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm0
|
|
|
|
; SSE-NEXT: pextrd $1, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm1, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm1, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v8i32:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
|
2018-01-24 23:02:16 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pow2b_v8i32:
|
|
|
|
; AVX2ORLATER: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2ORLATER-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX2ORLATER-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2ORLATER-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
|
|
|
|
; AVX2ORLATER-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2ORLATER-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v8i32:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
|
|
|
|
; XOP-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
|
2018-01-25 22:07:33 +08:00
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <8 x i32> %x, <i32 1, i32 4, i32 8, i32 16, i32 1, i32 4, i32 8, i32 16>
|
|
|
|
ret <8 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v16i32:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm0
|
|
|
|
; SSE-NEXT: pextrd $1, %xmm1, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm1, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm1, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrd $1, %xmm2, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm2, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm2, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm2
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm2
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm2
|
|
|
|
; SSE-NEXT: pextrd $1, %xmm3, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm3, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm3, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm3
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm3
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v16i32:
|
|
|
|
; AVX1: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $30, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $2, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $29, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $3, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; AVX1-NEXT: movl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $31, %ecx
|
|
|
|
; AVX1-NEXT: shrl $28, %ecx
|
|
|
|
; AVX1-NEXT: addl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sarl $4, %ecx
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v16i32:
|
|
|
|
; AVX2: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm0
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $30, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $2, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $29, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $3, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX2-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $31, %ecx
|
|
|
|
; AVX2-NEXT: shrl $28, %ecx
|
|
|
|
; AVX2-NEXT: addl %eax, %ecx
|
|
|
|
; AVX2-NEXT: sarl $4, %ecx
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm1
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512-LABEL: combine_vec_sdiv_by_pow2b_v16i32:
|
|
|
|
; AVX512: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
|
|
; AVX512-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $30, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $2, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $29, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $3, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $28, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $4, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
|
|
|
|
; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $30, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $2, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $29, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $3, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $28, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $4, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $30, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $2, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $29, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $3, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $28, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $4, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $30, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $2, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $29, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $3, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; AVX512-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: movl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $31, %ecx
|
|
|
|
; AVX512-NEXT: shrl $28, %ecx
|
|
|
|
; AVX512-NEXT: addl %eax, %ecx
|
|
|
|
; AVX512-NEXT: sarl $4, %ecx
|
|
|
|
; AVX512-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v16i32:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm0
|
2018-01-25 22:07:33 +08:00
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm2, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm2
|
|
|
|
; XOP-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $30, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $2, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $29, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $3, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
|
|
|
|
; XOP-NEXT: vpextrd $3, %xmm1, %eax
|
|
|
|
; XOP-NEXT: movl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $31, %ecx
|
|
|
|
; XOP-NEXT: shrl $28, %ecx
|
|
|
|
; XOP-NEXT: addl %eax, %ecx
|
|
|
|
; XOP-NEXT: sarl $4, %ecx
|
|
|
|
; XOP-NEXT: vpinsrd $3, %ecx, %xmm3, %xmm1
|
2018-01-25 22:07:33 +08:00
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <16 x i32> %x, <i32 1, i32 4, i32 8, i32 16, i32 1, i32 4, i32 8, i32 16, i32 1, i32 4, i32 8, i32 16, i32 1, i32 4, i32 8, i32 16>
|
|
|
|
ret <16 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrq $1, %xmm0, %rax
|
|
|
|
; SSE-NEXT: movq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $63, %rcx
|
|
|
|
; SSE-NEXT: shrq $62, %rcx
|
|
|
|
; SSE-NEXT: addq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $2, %rcx
|
|
|
|
; SSE-NEXT: movq %rcx, %xmm1
|
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX-NEXT: movq %rax, %rcx
|
|
|
|
; AVX-NEXT: sarq $63, %rcx
|
|
|
|
; AVX-NEXT: shrq $62, %rcx
|
|
|
|
; AVX-NEXT: addq %rax, %rcx
|
|
|
|
; AVX-NEXT: sarq $2, %rcx
|
|
|
|
; AVX-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <2 x i64> %x, <i64 1, i64 4>
|
|
|
|
ret <2 x i64> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
|
|
|
|
; SSE: # %bb.0:
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm2
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: psrad $31, %xmm1
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm3
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: psrlq $60, %xmm3
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: psrlq $61, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
|
|
|
|
; SSE-NEXT: paddq %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: psrlq $4, %xmm2
|
|
|
|
; SSE-NEXT: psrlq $3, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
|
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1152921504606846976,576460752303423488]
|
|
|
|
; SSE-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: psubq %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: pextrq $1, %xmm0, %rax
|
|
|
|
; SSE-NEXT: movq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $63, %rcx
|
|
|
|
; SSE-NEXT: shrq $62, %rcx
|
|
|
|
; SSE-NEXT: addq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $2, %rcx
|
|
|
|
; SSE-NEXT: movq %rcx, %xmm2
|
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $60, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $4, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $61, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $3, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $62, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $2, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2018-01-24 23:02:16 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-04-08 19:35:20 +08:00
|
|
|
;
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
|
|
|
|
; AVX2ORLATER: # %bb.0:
|
|
|
|
; AVX2ORLATER-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX2ORLATER-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: shrq $60, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $4, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX2ORLATER-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: shrq $61, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $3, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX2ORLATER-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; AVX2ORLATER-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX2ORLATER-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: shrq $62, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: sarq $2, %rcx
|
|
|
|
; AVX2ORLATER-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX2ORLATER-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; AVX2ORLATER-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2ORLATER-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
|
|
|
|
; XOP: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $60, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $4, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; XOP-NEXT: vmovq %xmm1, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $61, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $3, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $62, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $2, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <4 x i64> %x, <i64 1, i64 4, i64 8, i64 16>
|
|
|
|
ret <4 x i64> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm3
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: psrad $31, %xmm1
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm5
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: psrlq $60, %xmm5
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: psrlq $61, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm5[4,5,6,7]
|
|
|
|
; SSE-NEXT: paddq %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm3
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: psrlq $4, %xmm3
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: psrlq $3, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
|
|
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1152921504606846976,576460752303423488]
|
|
|
|
; SSE-NEXT: pxor %xmm5, %xmm1
|
|
|
|
; SSE-NEXT: psubq %xmm5, %xmm1
|
2018-04-08 19:35:20 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm4, %xmm3
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: psrad $31, %xmm3
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE-NEXT: movdqa %xmm3, %xmm6
|
|
|
|
; SSE-NEXT: psrlq $60, %xmm6
|
|
|
|
; SSE-NEXT: psrlq $61, %xmm3
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm6[4,5,6,7]
|
|
|
|
; SSE-NEXT: paddq %xmm4, %xmm3
|
|
|
|
; SSE-NEXT: movdqa %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: psrlq $4, %xmm4
|
|
|
|
; SSE-NEXT: psrlq $3, %xmm3
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
|
|
|
|
; SSE-NEXT: pxor %xmm5, %xmm3
|
|
|
|
; SSE-NEXT: psubq %xmm5, %xmm3
|
|
|
|
; SSE-NEXT: pextrq $1, %xmm0, %rax
|
|
|
|
; SSE-NEXT: movq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $63, %rcx
|
|
|
|
; SSE-NEXT: shrq $62, %rcx
|
|
|
|
; SSE-NEXT: addq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $2, %rcx
|
|
|
|
; SSE-NEXT: movq %rcx, %xmm4
|
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
|
|
|
|
; SSE-NEXT: pextrq $1, %xmm2, %rax
|
|
|
|
; SSE-NEXT: movq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $63, %rcx
|
|
|
|
; SSE-NEXT: shrq $62, %rcx
|
|
|
|
; SSE-NEXT: addq %rax, %rcx
|
|
|
|
; SSE-NEXT: sarq $2, %rcx
|
|
|
|
; SSE-NEXT: movq %rcx, %xmm4
|
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
|
|
|
; AVX1: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $60, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $4, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovq %xmm2, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $61, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $3, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $62, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $2, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $60, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $4, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovq %xmm2, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $61, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $3, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $63, %rcx
|
|
|
|
; AVX1-NEXT: shrq $62, %rcx
|
|
|
|
; AVX1-NEXT: addq %rax, %rcx
|
|
|
|
; AVX1-NEXT: sarq $2, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX1-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
|
|
|
; AVX2: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $60, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $4, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX2-NEXT: vmovq %xmm2, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $61, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $3, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $62, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $2, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
|
|
|
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $60, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $4, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX2-NEXT: vmovq %xmm2, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $61, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $3, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX2-NEXT: movq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $63, %rcx
|
|
|
|
; AVX2-NEXT: shrq $62, %rcx
|
|
|
|
; AVX2-NEXT: addq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sarq $2, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX2-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
;
|
|
|
|
; AVX512-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
|
|
|
; AVX512: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $60, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $4, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX512-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $61, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $3, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $62, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $2, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $60, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $4, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX512-NEXT: vmovq %xmm2, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $61, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $3, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: movq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $63, %rcx
|
|
|
|
; AVX512-NEXT: shrq $62, %rcx
|
|
|
|
; AVX512-NEXT: addq %rax, %rcx
|
|
|
|
; AVX512-NEXT: sarq $2, %rcx
|
|
|
|
; AVX512-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX512-NEXT: retq
|
2018-01-25 22:07:33 +08:00
|
|
|
;
|
|
|
|
; XOP-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
|
|
|
; XOP: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $60, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $4, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; XOP-NEXT: vmovq %xmm2, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $61, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $3, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $62, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $2, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm2, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $60, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $4, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; XOP-NEXT: vmovq %xmm2, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $61, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $3, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; XOP-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; XOP-NEXT: movq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $63, %rcx
|
|
|
|
; XOP-NEXT: shrq $62, %rcx
|
|
|
|
; XOP-NEXT: addq %rax, %rcx
|
|
|
|
; XOP-NEXT: sarq $2, %rcx
|
|
|
|
; XOP-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; XOP-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
|
|
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
2018-05-30 00:36:27 +08:00
|
|
|
; XOP-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <8 x i64> %x, <i64 1, i64 4, i64 8, i64 16, i64 1, i64 4, i64 8, i64 16>
|
|
|
|
ret <8 x i64> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
|
|
|
; SSE-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg:
|
|
|
|
; SSE: # %bb.0:
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $30, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $2, %ecx
|
|
|
|
; SSE-NEXT: negl %ecx
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %edx
|
|
|
|
; SSE-NEXT: pinsrd $1, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $31, %ecx
|
|
|
|
; SSE-NEXT: shrl $29, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl $3, %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm0
|
|
|
|
; SSE-NEXT: movl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $31, %eax
|
|
|
|
; SSE-NEXT: shrl $28, %eax
|
|
|
|
; SSE-NEXT: addl %edx, %eax
|
|
|
|
; SSE-NEXT: sarl $4, %eax
|
|
|
|
; SSE-NEXT: negl %eax
|
|
|
|
; SSE-NEXT: pinsrd $3, %eax, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; SSE-NEXT: retq
|
2018-04-08 19:35:20 +08:00
|
|
|
;
|
[DAGCombiner] Fix a case of 1 in non-splat vector pow2 divisor
Summary:
D42479 (rL329525) enabled SDIV combine for pow2 non-splat vector
dividers. But when there is a 1 in a vector, the instruction sequence to
be generated involves shifting a value by the number of its bit widths,
which is undefined
(https://github.com/llvm-mirror/llvm/blob/c64f4dbfe31e509f9c1092b951e524b056245af8/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L6000-L6006).
Especially, in architectures that do not support vector instructions,
each of element in a vector will be computed separately using scalar
operations, and then the resulting value will be undef for '1' values
in a vector.
(All 1's vector is fine; only vectors mixed with 1 and others will be
affected.)
Reviewers: RKSimon, jgravelle-google
Subscribers: jfb, dschuff, sbc100, jgravelle-google, llvm-commits
Differential Revision: https://reviews.llvm.org/D46161
llvm-svn: 331092
2018-04-28 06:23:11 +08:00
|
|
|
; AVX-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $30, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $2, %ecx
|
|
|
|
; AVX-NEXT: negl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $29, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $3, %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $31, %ecx
|
|
|
|
; AVX-NEXT: shrl $28, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl $4, %ecx
|
|
|
|
; AVX-NEXT: negl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm0
|
2018-05-30 00:36:27 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 1, i32 -4, i32 8, i32 -16>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2b_undef1(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: combine_vec_sdiv_by_pow2b_undef1:
|
|
|
|
; CHECK: # %bb.0:
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 undef, i32 -4, i32 undef, i32 -16>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2b_undef2(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: combine_vec_sdiv_by_pow2b_undef2:
|
|
|
|
; CHECK: # %bb.0:
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 undef, i32 4, i32 undef, i32 16>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @combine_vec_sdiv_by_pow2b_undef3(<4 x i32> %x) {
|
|
|
|
; CHECK-LABEL: combine_vec_sdiv_by_pow2b_undef3:
|
|
|
|
; CHECK: # %bb.0:
|
2018-05-30 00:36:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2018-01-24 23:02:16 +08:00
|
|
|
%1 = sdiv <4 x i32> %x, <i32 undef, i32 -4, i32 undef, i32 16>
|
|
|
|
ret <4 x i32> %1
|
|
|
|
}
|
2018-06-23 02:31:57 +08:00
|
|
|
|
|
|
|
; PR37119
|
|
|
|
define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
|
|
|
|
; SSE-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; SSE: # %bb.0:
|
|
|
|
; SSE-NEXT: pextrb $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pextrb $0, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: negb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %ecx
|
|
|
|
; SSE-NEXT: movd %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pinsrb $1, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pinsrb $2, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $3, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $3, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $4, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $4, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $5, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $5, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $6, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pinsrb $6, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $7, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $7, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $8, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $8, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $9, %xmm0, %eax
|
|
|
|
; SSE-NEXT: pinsrb $9, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: negb %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: negb %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX2-NEXT: negb %cl
|
|
|
|
; AVX2-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX2-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: negb %al
|
|
|
|
; AVX2-NEXT: movzbl %al, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; AVX512F: # %bb.0:
|
|
|
|
; AVX512F-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX512F-NEXT: negb %cl
|
|
|
|
; AVX512F-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX512F-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: negb %al
|
|
|
|
; AVX512F-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; AVX512BW: # %bb.0:
|
|
|
|
; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX512BW-NEXT: negb %cl
|
|
|
|
; AVX512BW-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX512BW-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: negb %al
|
|
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; XOP-LABEL: non_splat_minus_one_divisor_0:
|
|
|
|
; XOP: # %bb.0:
|
|
|
|
; XOP-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; XOP-NEXT: negb %cl
|
|
|
|
; XOP-NEXT: movzbl %cl, %ecx
|
|
|
|
; XOP-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; XOP-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; XOP-NEXT: negb %al
|
|
|
|
; XOP-NEXT: movzbl %al, %eax
|
|
|
|
; XOP-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7,8],xmm0[9,10,11,12,13,14,15]
|
|
|
|
; XOP-NEXT: retq
|
|
|
|
%div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
ret <16 x i8> %div
|
|
|
|
}
|
|
|
|
|
2018-06-27 01:06:05 +08:00
|
|
|
define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
|
2018-06-23 02:31:57 +08:00
|
|
|
; SSE-LABEL: non_splat_minus_one_divisor_1:
|
|
|
|
; SSE: # %bb.0:
|
2018-06-27 01:46:51 +08:00
|
|
|
; SSE-NEXT: pextrb $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pextrb $0, %xmm0, %ecx
|
|
|
|
; SSE-NEXT: negb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %ecx
|
|
|
|
; SSE-NEXT: movd %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pinsrb $1, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $2, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $3, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $3, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $4, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $4, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $5, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $5, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $6, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $6, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $7, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $7, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $8, %xmm0, %eax
|
|
|
|
; SSE-NEXT: negb %al
|
|
|
|
; SSE-NEXT: movzbl %al, %eax
|
|
|
|
; SSE-NEXT: pinsrb $8, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $9, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $9, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $10, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $10, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $11, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $11, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $12, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $12, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $13, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $13, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $14, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $14, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrb $15, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrb $7, %cl
|
|
|
|
; SSE-NEXT: addb %al, %cl
|
|
|
|
; SSE-NEXT: sarb %cl
|
|
|
|
; SSE-NEXT: movzbl %cl, %eax
|
|
|
|
; SSE-NEXT: pinsrb $15, %eax, %xmm1
|
2018-06-27 01:06:05 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2018-06-27 01:46:51 +08:00
|
|
|
; AVX-LABEL: non_splat_minus_one_divisor_1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: negb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $4, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $5, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $6, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $7, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX-NEXT: negb %al
|
|
|
|
; AVX-NEXT: movzbl %al, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $9, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $10, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $11, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $12, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $13, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $14, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrb $7, %cl
|
|
|
|
; AVX-NEXT: addb %al, %cl
|
|
|
|
; AVX-NEXT: sarb %cl
|
|
|
|
; AVX-NEXT: movzbl %cl, %eax
|
|
|
|
; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-27 01:06:05 +08:00
|
|
|
%div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 2, i8 -1, i8 -1, i8 -1, i8 2, i8 -1, i8 -1, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
|
|
|
ret <16 x i8> %div
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) {
|
|
|
|
; SSE-LABEL: non_splat_minus_one_divisor_2:
|
|
|
|
; SSE: # %bb.0:
|
2018-06-23 02:31:57 +08:00
|
|
|
; SSE-NEXT: pextrd $1, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movd %xmm0, %ecx
|
|
|
|
; SSE-NEXT: negl %ecx
|
|
|
|
; SSE-NEXT: movd %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pinsrd $1, %eax, %xmm1
|
|
|
|
; SSE-NEXT: pextrd $2, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $31, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl %ecx
|
|
|
|
; SSE-NEXT: pinsrd $2, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: pextrd $3, %xmm0, %eax
|
|
|
|
; SSE-NEXT: movl %eax, %ecx
|
|
|
|
; SSE-NEXT: shrl $31, %ecx
|
|
|
|
; SSE-NEXT: addl %eax, %ecx
|
|
|
|
; SSE-NEXT: sarl %ecx
|
|
|
|
; SSE-NEXT: negl %ecx
|
|
|
|
; SSE-NEXT: pinsrd $3, %ecx, %xmm1
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2018-06-27 01:06:05 +08:00
|
|
|
; AVX-LABEL: non_splat_minus_one_divisor_2:
|
2018-06-23 02:31:57 +08:00
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpextrd $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %ecx
|
|
|
|
; AVX-NEXT: negl %ecx
|
|
|
|
; AVX-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrl $31, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: movl %eax, %ecx
|
|
|
|
; AVX-NEXT: shrl $31, %ecx
|
|
|
|
; AVX-NEXT: addl %eax, %ecx
|
|
|
|
; AVX-NEXT: sarl %ecx
|
|
|
|
; AVX-NEXT: negl %ecx
|
|
|
|
; AVX-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%div = sdiv <4 x i32> %A, <i32 -1, i32 1, i32 2, i32 -2>
|
|
|
|
ret <4 x i32> %div
|
|
|
|
}
|