forked from OSchip/llvm-project
[X86] Add v2i64->v2i32/v2i16/v2i8 test cases to the trunc packus/ssat/usat tests. NFC
llvm-svn: 374704
This commit is contained in:
parent
4056e7f02a
commit
8fe8adb9f1
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -15,6 +15,224 @@
|
|||
; Unsigned saturation truncation to vXi32
|
||||
;
|
||||
|
||||
define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
|
||||
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i32:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i32>
|
||||
ret <2 x i32> %3
|
||||
}
|
||||
|
||||
define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSSE3-NEXT: movq %xmm0, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE41-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
|
||||
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX-NEXT: vmovlpd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovusqd %xmm0, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovusqd %xmm0, (%rdi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i32_store:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovusqd %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i32>
|
||||
store <2 x i32> %3, <2 x i32>* %p1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v4i64_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
|
@ -479,6 +697,278 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(<8 x i64>* %p0) {
|
|||
; Unsigned saturation truncation to vXi16
|
||||
;
|
||||
|
||||
define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i16:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i16>
|
||||
ret <2 x i16> %3
|
||||
}
|
||||
|
||||
define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16>* %p1) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: movd %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSSE3-NEXT: movd %xmm0, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: movd %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
|
||||
; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovusqw %xmm0, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
|
||||
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovusqw %xmm0, (%rdi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i16_store:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovusqw %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i16>
|
||||
store <2 x i16> %3, <2 x i16>* %p1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v4i64_v4i16:
|
||||
; SSE2: # %bb.0:
|
||||
|
@ -1592,6 +2082,234 @@ define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32>* %p0) {
|
|||
; Unsigned saturation truncation to vXi8
|
||||
;
|
||||
|
||||
define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: por %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
|
||||
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i8:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i8>
|
||||
ret <2 x i8> %3
|
||||
}
|
||||
|
||||
define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) {
|
||||
; SSE2-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: movd %xmm2, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
||||
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSSE3-NEXT: movd %xmm2, %eax
|
||||
; SSSE3-NEXT: movw %ax, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: pextrw $0, %xmm2, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
|
||||
; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
|
||||
; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovusqb %xmm0, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
|
||||
; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovusqb %xmm0, (%rdi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_usat_v2i64_v2i8_store:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovusqb %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
|
||||
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
|
||||
%3 = trunc <2 x i64> %2 to <2 x i8>
|
||||
store <2 x i8> %3, <2 x i8>* %p1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v4i64_v4i8:
|
||||
; SSE2: # %bb.0:
|
||||
|
|
Loading…
Reference in New Issue