From 17eace47cb3ee7c819a9141c123e3dbdf9d7a6b4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 9 Jan 2019 12:34:10 +0000 Subject: [PATCH] [X86] Add extra test coverage for combining shuffles to PACKSS/PACKUS llvm-svn: 350707 --- .../X86/vector-shuffle-combining-avx2.ll | 38 +++++++++++++ .../X86/vector-shuffle-combining-avx512bw.ll | 56 +++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 2ea0f1ab3e71..dda9e140d459 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -611,6 +611,44 @@ define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1 } declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone +define <32 x i8> @combine_pshufb_as_packsswb(<16 x i16> %a0, <16 x i16> %a1) nounwind { +; CHECK-LABEL: combine_pshufb_as_packsswb: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsraw $11, %ymm0, %ymm0 +; CHECK-NEXT: vpsraw $11, %ymm1, %ymm1 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = ashr <16 x i16> %a0, + %2 = ashr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> ) + %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> ) + %7 = or <32 x i8> %5, %6 + ret <32 x i8> %7 +} + +define <32 x i8> @combine_pshufb_as_packuswb(<16 x i16> %a0, <16 x i16> %a1) nounwind { +; CHECK-LABEL: combine_pshufb_as_packuswb: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsrlw $11, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $11, %ymm1, %ymm1 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = lshr <16 x i16> %a0, + %2 = lshr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> ) + %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> ) + %7 = or <32 x i8> %5, %6 + ret <32 x i8> %7 +} + define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) { ; X86-LABEL: combine_pshufb_insertion_as_broadcast_v2i64: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index b8d3824f1caf..5c40520ac1ca 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -824,6 +824,62 @@ define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) { ret <32 x i16> %1 } +define <64 x i8> @combine_pshufb_as_packsswb(<32 x i16> %a0, <32 x i16> %a1) nounwind { +; X32-LABEL: combine_pshufb_as_packsswb: +; X32: # %bb.0: +; X32-NEXT: vpsraw $11, %zmm0, %zmm0 +; X32-NEXT: vpsraw $11, %zmm1, %zmm1 +; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_pshufb_as_packsswb: +; X64: # %bb.0: +; X64-NEXT: vpsraw $11, %zmm0, %zmm0 +; X64-NEXT: vpsraw $11, %zmm1, %zmm1 +; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: retq + %1 = ashr <32 x i16> %a0, + %2 = ashr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> , <64 x i8> undef, i64 -1) + %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> , <64 x i8> undef, i64 -1) + %7 = or <64 x i8> %5, %6 + ret <64 x i8> %7 +} + +define <64 x i8> @combine_pshufb_as_packuswb(<32 x i16> %a0, <32 x i16> %a1) nounwind { +; X32-LABEL: combine_pshufb_as_packuswb: +; X32: # %bb.0: +; X32-NEXT: vpsrlw $11, %zmm0, %zmm0 +; X32-NEXT: vpsrlw $11, %zmm1, %zmm1 +; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_pshufb_as_packuswb: +; X64: # %bb.0: +; X64-NEXT: vpsrlw $11, %zmm0, %zmm0 +; X64-NEXT: vpsrlw $11, %zmm1, %zmm1 +; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: retq + %1 = lshr <32 x i16> %a0, + %2 = lshr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> , <64 x i8> undef, i64 -1) + %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> , <64 x i8> undef, i64 -1) + %7 = or <64 x i8> %5, %6 + ret <64 x i8> %7 +} + define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) { ; X32-LABEL: combine_vpermi2var_32i16_as_pshufb: ; X32: # %bb.0: