[X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks

llvm-svn: 307255
This commit is contained in:
Simon Pilgrim 2017-07-06 12:40:10 +00:00
parent cc0f785dca
commit 77ad6d9bb2
1 changed files with 12 additions and 20 deletions

View File

@ -27148,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
const X86Subtarget &Subtarget,
@ -27158,14 +27159,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
bool ContainsZeros = false;
APInt Zeroable(NumMaskElts, false);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (isUndefOrZero(M))
Zeroable.setBit(i);
ContainsZeros |= (M == SM_SentinelZero);
}
bool ContainsZeros =
llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
if (!ContainsZeros && MaskScalarSizeInBits == 64) {
@ -27330,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1, SDValue &V2, SDLoc &DL,
@ -27415,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to combine to INSERTPS.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
MaskVT.is128BitVector()) {
APInt Zeroable(4, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] < 0)
Zeroable.setBit(i);
if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
@ -27608,6 +27599,12 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
APInt Zeroable(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (isUndefOrZero(Mask[i]))
Zeroable.setBit(i);
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
@ -27640,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
@ -27676,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
@ -27701,11 +27698,6 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
APInt Zeroable(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (isUndefOrZero(Mask[i]))
Zeroable.setBit(i);
uint64_t BitLen, BitIdx;
if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {