From c7ba5699c5a9e908c05a2a94f56852300095a280 Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Wed, 24 Feb 2016 08:15:20 +0000 Subject: [PATCH] AVX512: Add vpmovzxbw/d/q ,vpmovzxw/d/q ,vpmovzxbdq lowering patterns that support 256bit inputs like AVX patterns ( that are disable in case HasVLX , see SS41I_pmovx_avx2_patterns). Differential Revision: http://reviews.llvm.org/D17504 llvm-svn: 261724 --- llvm/lib/Target/X86/X86InstrAVX512.td | 14 ++++++ llvm/test/CodeGen/X86/avx512-ext.ll | 69 +++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b3b455a02dc1..12e8887b6293 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6351,6 +6351,14 @@ multiclass avx512_extend_common opc, string OpcodeStr, } } +// support full register inputs (like SSE paterns) +multiclass avx512_extend_lowering { + def : Pat<(To.VT (OpNode (From.VT From.RC:$src))), + (!cast(NAME#To.ZSuffix#"rr") + (EXTRACT_SUBREG From.RC:$src, SubRegIdx))>; +} + multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { @@ -6360,6 +6368,7 @@ multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasBWI] in { @@ -6378,6 +6387,7 @@ multiclass avx512_extend_BD opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6396,6 +6406,7 @@ multiclass avx512_extend_BQ opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6414,6 +6425,7 @@ multiclass avx512_extend_WD opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6432,6 +6444,7 @@ multiclass avx512_extend_WQ opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6451,6 +6464,7 @@ multiclass avx512_extend_DQ opc, string OpcodeStr, SDNode OpNode, defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 2f831981f0c8..3eb84ea6a2d5 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1810,3 +1810,72 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ret <64 x i16> %ret } +define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { +; ALL-LABEL: shuffle_zext_16x8_to_16x16: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <16 x i16> + ret <16 x i16> %2 +} + +define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_16x16: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <16 x i16> + ret <16 x i16> %2 +} + +define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_8x32: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <8 x i32> + ret <8 x i32> %2 +} + +define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { +; ALL-LABEL: zext_16x16_to_8x32: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i16> %1 to <8 x i32> + ret <8 x i32> %2 +} + +define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { +; ALL-LABEL: zext_16x16_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i16> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { +; ALL-LABEL: zext_8x32_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; ALL-NEXT: retq + %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +}