From 77ad6d9bb247932ac2f04dee7df70f216399a715 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 6 Jul 2017 12:40:10 +0000 Subject: [PATCH] [X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks llvm-svn: 307255 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 32 ++++++++++--------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 02c8ec4d8f7d..a38b8d47fcbf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27148,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const X86Subtarget &Subtarget, @@ -27158,14 +27159,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - bool ContainsZeros = false; - APInt Zeroable(NumMaskElts, false); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (isUndefOrZero(M)) - Zeroable.setBit(i); - ContainsZeros |= (M == SM_SentinelZero); - } + bool ContainsZeros = + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns. if (!ContainsZeros && MaskScalarSizeInBits == 64) { @@ -27330,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, SDLoc &DL, @@ -27415,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // Attempt to combine to INSERTPS. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { - APInt Zeroable(4, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) - if (Mask[i] < 0) - Zeroable.setBit(i); - if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; @@ -27608,6 +27599,12 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); + // Determine zeroable mask elements. + APInt Zeroable(NumMaskElts, 0); + for (unsigned i = 0; i != NumMaskElts; ++i) + if (isUndefOrZero(Mask[i])) + Zeroable.setBit(i); + if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load // directly if we don't shuffle the lower element and we shuffle the upper @@ -27640,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) @@ -27676,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { @@ -27701,11 +27698,6 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits); ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts); - APInt Zeroable(NumMaskElts, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) - if (isUndefOrZero(Mask[i])) - Zeroable.setBit(i); - uint64_t BitLen, BitIdx; if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) {