forked from OSchip/llvm-project
[X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks
llvm-svn: 307255
This commit is contained in:
parent
cc0f785dca
commit
77ad6d9bb2
|
@ -27148,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
// permute instructions.
|
||||
// TODO: Investigate sharing more of this with shuffle lowering.
|
||||
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
const APInt &Zeroable,
|
||||
bool AllowFloatDomain,
|
||||
bool AllowIntDomain,
|
||||
const X86Subtarget &Subtarget,
|
||||
|
@ -27158,14 +27159,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
|
||||
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
|
||||
|
||||
bool ContainsZeros = false;
|
||||
APInt Zeroable(NumMaskElts, false);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i) {
|
||||
int M = Mask[i];
|
||||
if (isUndefOrZero(M))
|
||||
Zeroable.setBit(i);
|
||||
ContainsZeros |= (M == SM_SentinelZero);
|
||||
}
|
||||
bool ContainsZeros =
|
||||
llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
|
||||
|
||||
// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
|
||||
if (!ContainsZeros && MaskScalarSizeInBits == 64) {
|
||||
|
@ -27330,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
}
|
||||
|
||||
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
const APInt &Zeroable,
|
||||
bool AllowFloatDomain,
|
||||
bool AllowIntDomain,
|
||||
SDValue &V1, SDValue &V2, SDLoc &DL,
|
||||
|
@ -27415,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
// Attempt to combine to INSERTPS.
|
||||
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
|
||||
MaskVT.is128BitVector()) {
|
||||
APInt Zeroable(4, 0);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i)
|
||||
if (Mask[i] < 0)
|
||||
Zeroable.setBit(i);
|
||||
|
||||
if (Zeroable.getBoolValue() &&
|
||||
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
|
||||
Shuffle = X86ISD::INSERTPS;
|
||||
|
@ -27608,6 +27599,12 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
|
||||
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
|
||||
|
||||
// Determine zeroable mask elements.
|
||||
APInt Zeroable(NumMaskElts, 0);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i)
|
||||
if (isUndefOrZero(Mask[i]))
|
||||
Zeroable.setBit(i);
|
||||
|
||||
if (UnaryShuffle) {
|
||||
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
|
||||
// directly if we don't shuffle the lower element and we shuffle the upper
|
||||
|
@ -27640,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
|
||||
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
|
||||
AllowIntDomain, Subtarget, Shuffle,
|
||||
ShuffleVT, PermuteImm)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
|
@ -27676,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
|
||||
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
|
||||
AllowIntDomain, V1, V2, DL, DAG,
|
||||
Subtarget, Shuffle, ShuffleVT,
|
||||
PermuteImm)) {
|
||||
|
@ -27701,11 +27698,6 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
|
||||
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
|
||||
|
||||
APInt Zeroable(NumMaskElts, 0);
|
||||
for (unsigned i = 0; i != NumMaskElts; ++i)
|
||||
if (isUndefOrZero(Mask[i]))
|
||||
Zeroable.setBit(i);
|
||||
|
||||
uint64_t BitLen, BitIdx;
|
||||
if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
|
||||
Zeroable)) {
|
||||
|
|
Loading…
Reference in New Issue