From c941f6b329119f2c0877554dfbe642d12ed833f0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 18 Jul 2016 17:32:59 +0000 Subject: [PATCH] [X86][AVX] Add target shuffle decode support for VBROADCAST Currently we only decode broadcasts from a vector of the same size. llvm-svn: 275823 --- llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 6 ++++++ llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 3 +++ llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++++++++ llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 4 ---- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 4fe9a9e949d6..18f71675437b 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -275,6 +275,12 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { } } +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = DstVT.getVectorNumElements(); + ShuffleMask.append(NumElts, 0); +} + /// Decodes a broadcast of a subvector to a larger vector type. void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, SmallVectorImpl &ShuffleMask) { diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index 30b17052b722..dc21c19752c3 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -88,6 +88,9 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); /// datatypes and vector widths. void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl &ShuffleMask); + /// Decodes a broadcast of a subvector to a larger vector type. void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, SmallVectorImpl &ShuffleMask); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cac36b82e217..e54711195900 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3804,6 +3804,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKL: case X86ISD::UNPCKH: + case X86ISD::VBROADCAST: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: @@ -4920,6 +4921,15 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodeZeroMoveLowMask(VT, Mask); IsUnary = true; break; + case X86ISD::VBROADCAST: { + // We only decode broadcasts of same-sized vectors at the moment. + if (N->getOperand(0).getValueType() == VT) { + DecodeVectorBroadcast(VT, Mask); + IsUnary = true; + break; + } + return false; + } case X86ISD::VPERMILPV: { IsUnary = true; SDValue MaskNode = N->getOperand(1); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 940906edd692..a10ba6ccc41e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -207,7 +207,6 @@ define <16 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb128(<16 x i8> %a) { ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb128: ; CHECK: # BB#0: ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 -; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> zeroinitializer) @@ -218,8 +217,6 @@ define <32 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb256(<32 x i8> %a) { ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb256: ; CHECK: # BB#0: ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 -; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %1 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> zeroinitializer) @@ -230,7 +227,6 @@ define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> % ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastss128: ; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 -; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer %2 = bitcast <4 x float> %1 to <16 x i8>