diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index 977066c9290c..065cfff5fc8e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1215,6 +1215,28 @@ entry: ret <16 x i8> %shuffle } +define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: +; SSE: # BB#0: +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: packuswb %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = lshr <8 x i16> %a0, + %2 = lshr <8 x i16> %a1, + %3 = bitcast <8 x i16> %1 to <16 x i8> + %4 = bitcast <8 x i16> %2 to <16 x i8> + %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> + ret <16 x i8> %5 +} + define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { ; Nothing interesting to test here. Just make sure we didn't crashe. ; ALL-LABEL: stress_test2: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 04296032362a..3c69f6160ddb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2262,6 +2262,35 @@ define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_ ret <32 x i8> %shuffle } +define <32 x i8> @shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<16 x i16> %a0, <16 x i16> %a1) { +; AVX1-LABEL: shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2OR512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2OR512VL-NEXT: retq + %1 = lshr <16 x i16> %a0, + %2 = lshr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = shufflevector <32 x i8> %3, <32 x i8> %4, <32 x i32> + ret <32 x i8> %5 +} + define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-LABEL: PR28136: ; AVX1: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 56ef7636770e..196a531016f7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -538,3 +538,54 @@ define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_ %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> ret <64 x i8> %shuffle } + +define <64 x i8> @shuffe_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { +; AVX512F-LABEL: shuffe_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm1 +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512F-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffe_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512BW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffe_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm1 +; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffe_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512VBMI: # BB#0: +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127] +; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = lshr <32 x i16> %a0, + %2 = lshr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +}