diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0de7d1c173c2..2a0ea9cfeaeb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26264,7 +26264,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, // instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, - bool FloatDomain, + bool FloatDomain, SDValue &V1, SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); @@ -26280,8 +26281,9 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } // Match against a VZEXT instruction. - // TODO: Add 256/512-bit vector support. - if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) { + // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). + if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || + (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { unsigned MaxScale = 64 / MaskEltSize; for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { bool Match = true; @@ -26291,7 +26293,10 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); } if (Match) { - SrcVT = MaskVT; + unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); + SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize); + if (SrcVT != MaskVT) + V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); DstVT = MVT::getIntegerVT(Scale * MaskEltSize); DstVT = MVT::getVectorVT(DstVT, NumDstElts); Shuffle = X86ISD::VZEXT; @@ -26908,8 +26913,8 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle, - ShuffleSrcVT, ShuffleVT)) { + if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, DL, DAG, + Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 2f6267edfceb..c9845bf74cfb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -480,14 +480,12 @@ define <8 x float> @combine_permps_as_permpd(<8 x float> %a) { define <4 x i64> @combine_pshufb_as_zext(<32 x i8> %a0) { ; X32-LABEL: combine_pshufb_as_zext: ; X32: # BB#0: -; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,1] -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[20,21],zero,zero,zero,zero,zero,zero,ymm0[22,23],zero,zero,zero,zero,zero,zero +; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_zext: ; X64: # BB#0: -; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,1] -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[20,21],zero,zero,zero,zero,zero,zero,ymm0[22,23],zero,zero,zero,zero,zero,zero +; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-NEXT: retq %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> )