[X86] Add v2i64->v2i32/v2i16/v2i8 test cases to the trunc packus/ssat/usat tests. NFC

llvm-svn: 374704
This commit is contained in:
Craig Topper 2019-10-13 05:47:42 +00:00
parent 4056e7f02a
commit 8fe8adb9f1
3 changed files with 2721 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,224 @@
; Unsigned saturation truncation to vXi32
;
define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v2i64_v2i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: trunc_usat_v2i64_v2i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i32:
; SKX: # %bb.0:
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
%3 = trunc <2 x i64> %2 to <2 x i32>
ret <2 x i32> %3
}
define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
; SSE2-LABEL: trunc_usat_v2i64_v2i32_store:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE2-NEXT: movq %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i32_store:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSSE3-NEXT: movq %xmm0, (%rdi)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i32_store:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE41-NEXT: movq %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vmovlpd %xmm0, (%rdi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpmovusqd %xmm0, (%rdi)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovusqd %xmm0, (%rdi)
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i32_store:
; SKX: # %bb.0:
; SKX-NEXT: vpmovusqd %xmm0, (%rdi)
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
%3 = trunc <2 x i64> %2 to <2 x i32>
store <2 x i32> %3, <2 x i32>* %p1
ret void
}
define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i32:
; SSE2: # %bb.0:
@ -479,6 +697,278 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(<8 x i64>* %p0) {
; Unsigned saturation truncation to vXi16
;
define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v2i64_v2i16:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i16:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_usat_v2i64_v2i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX2-FAST-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i16:
; SKX: # %bb.0:
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
%3 = trunc <2 x i64> %2 to <2 x i16>
ret <2 x i16> %3
}
define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16>* %p1) {
; SSE2-LABEL: trunc_usat_v2i64_v2i16_store:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: movd %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i16_store:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSSE3-NEXT: movd %xmm0, (%rdi)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i16_store:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE41-NEXT: movd %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: vmovd %xmm0, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi)
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi)
; AVX2-FAST-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpmovusqw %xmm0, (%rdi)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovusqw %xmm0, (%rdi)
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i16_store:
; SKX: # %bb.0:
; SKX-NEXT: vpmovusqw %xmm0, (%rdi)
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
%3 = trunc <2 x i64> %2 to <2 x i16>
store <2 x i16> %3, <2 x i16>* %p1
ret void
}
define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i16:
; SSE2: # %bb.0:
@ -1592,6 +2082,234 @@ define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32>* %p0) {
; Unsigned saturation truncation to vXi8
;
define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v2i64_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: packuswb %xmm1, %xmm1
; SSE2-NEXT: packuswb %xmm1, %xmm1
; SSE2-NEXT: packuswb %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm2, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: trunc_usat_v2i64_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i8:
; SKX: # %bb.0:
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
%3 = trunc <2 x i64> %2 to <2 x i8>
ret <2 x i8> %3
}
define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) {
; SSE2-LABEL: trunc_usat_v2i64_v2i8_store:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: packuswb %xmm2, %xmm2
; SSE2-NEXT: packuswb %xmm0, %xmm2
; SSE2-NEXT: packuswb %xmm0, %xmm2
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movw %ax, (%rdi)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v2i64_v2i8_store:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movd %xmm2, %eax
; SSSE3-NEXT: movw %ax, (%rdi)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_usat_v2i64_v2i8_store:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pextrw $0, %xmm2, (%rdi)
; SSE41-NEXT: retq
;
; AVX-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpmovusqb %xmm0, (%rdi)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovusqb %xmm0, (%rdi)
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i8_store:
; SKX: # %bb.0:
; SKX-NEXT: vpmovusqb %xmm0, (%rdi)
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
%3 = trunc <2 x i64> %2 to <2 x i8>
store <2 x i8> %3, <2 x i8>* %p1
ret void
}
define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i8:
; SSE2: # %bb.0: