[X86][SSE] Add shuffle combining tests with PACKSS/PACKUS

llvm-svn: 314628
This commit is contained in:
Simon Pilgrim 2017-10-01 17:30:44 +00:00
parent c7076a3ba9
commit d25c200cd6
2 changed files with 156 additions and 0 deletions

View File

@ -804,6 +804,96 @@ define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) {
ret <32 x i8> %6
}
define <16 x i16> @shuffle_combine_packssdw_pshufb(<8 x i32> %a0) {
; X32-LABEL: shuffle_combine_packssdw_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packssdw_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsrad $31, %ymm0, %ymm0
; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X64-NEXT: retq
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1)
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
ret <16 x i16> %3
}
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
define <32 x i8> @shuffle_combine_packsswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
; X32-LABEL: shuffle_combine_packsswb_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
; X32-NEXT: vpsraw $15, %ymm1, %ymm1
; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packsswb_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsraw $15, %ymm0, %ymm0
; X64-NEXT: vpsraw $15, %ymm1, %ymm1
; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X64-NEXT: retq
%1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%3 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
%4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <32 x i8> %4
}
declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
; X32-LABEL: shuffle_combine_packusdw_pshufb:
; X32: # BB#0:
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packusdw_pshufb:
; X64: # BB#0:
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X64-NEXT: retq
%1 = and <8 x i32> %a0, <i32 255, i32 65535, i32 255, i32 65535, i32 255, i32 255, i32 255, i32 65535>
%2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
ret <16 x i16> %3
}
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
; X32-LABEL: shuffle_combine_packuswb_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
; X32-NEXT: vpsrlw $8, %ymm1, %ymm1
; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packuswb_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
; X64-NEXT: vpsrlw $8, %ymm1, %ymm1
; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
; X64-NEXT: retq
%1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%3 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
%4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <32 x i8> %4
}
declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) {
; X32-LABEL: combine_pshufb_insertion_as_broadcast_v2i64:
; X32: # BB#0:

View File

@ -640,6 +640,72 @@ define <8 x i16> @shuffle_combine_unpack_insert(<8 x i16> %a0) {
ret <8 x i16> %8
}
define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) {
; SSE-LABEL: shuffle_combine_packssdw_pshufb:
; SSE: # BB#0:
; SSE-NEXT: psrad $31, %xmm0
; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packssdw_pshufb:
; AVX: # BB#0:
; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; AVX-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
%2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1)
%3 = bitcast <8 x i16> %2 to <16 x i8>
%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>)
ret <16 x i8> %4
}
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
define <16 x i8> @shuffle_combine_packsswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: shuffle_combine_packsswb_pshufb:
; SSE: # BB#0:
; SSE-NEXT: psraw $15, %xmm0
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packsswb_pshufb:
; AVX: # BB#0:
; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
; AVX-NEXT: retq
%1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%3 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %4
}
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: shuffle_combine_packuswb_pshufb:
; SSE: # BB#0:
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packuswb_pshufb:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
; AVX-NEXT: retq
%1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%3 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %4
}
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @constant_fold_pshufb() {
; SSE-LABEL: constant_fold_pshufb:
; SSE: # BB#0: