forked from OSchip/llvm-project
[NFC][X86] combineX86ShuffleChain(): hoist Mask variable higher up
Having `NewMask` outside of an if and rebinding `BaseMask` `ArrayRef`
to it is confusing. Instead, just move the `Mask` vector higher up,
and change the code that earlier had no access to it but now does
to use `Mask` instead of `BaseMask`.
This has no other intentional changes.
This is a recommit of 35c0848b57
,
that was reverted to simplify reversion of an earlier change.
This commit is contained in:
parent
16605aea84
commit
c0586ff05d
|
@ -35816,13 +35816,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
return CanonicalizeShuffleInput(RootVT, V1);
|
return CanonicalizeShuffleInput(RootVT, V1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end());
|
||||||
|
|
||||||
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
|
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
|
||||||
// etc. can be simplified.
|
// etc. can be simplified.
|
||||||
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
|
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
|
||||||
SmallVector<int> ScaledMask, IdentityMask;
|
SmallVector<int> ScaledMask, IdentityMask;
|
||||||
unsigned NumElts = VT1.getVectorNumElements();
|
unsigned NumElts = VT1.getVectorNumElements();
|
||||||
if (BaseMask.size() <= NumElts &&
|
if (Mask.size() <= NumElts &&
|
||||||
scaleShuffleElements(BaseMask, NumElts, ScaledMask)) {
|
scaleShuffleElements(Mask, NumElts, ScaledMask)) {
|
||||||
for (unsigned i = 0; i != NumElts; ++i)
|
for (unsigned i = 0; i != NumElts; ++i)
|
||||||
IdentityMask.push_back(i);
|
IdentityMask.push_back(i);
|
||||||
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
|
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
|
||||||
|
@ -35836,14 +35838,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
// If the upper subvectors are zeroable, then an extract+insert is more
|
// If the upper subvectors are zeroable, then an extract+insert is more
|
||||||
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
|
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
|
||||||
// to zero the upper subvectors.
|
// to zero the upper subvectors.
|
||||||
if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
|
if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
|
||||||
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
||||||
return SDValue(); // Nothing to do!
|
return SDValue(); // Nothing to do!
|
||||||
assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
|
assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
|
||||||
"Unexpected lane shuffle");
|
"Unexpected lane shuffle");
|
||||||
Res = CanonicalizeShuffleInput(RootVT, V1);
|
Res = CanonicalizeShuffleInput(RootVT, V1);
|
||||||
unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
|
unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
|
||||||
bool UseZero = isAnyZero(BaseMask);
|
bool UseZero = isAnyZero(Mask);
|
||||||
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
|
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
|
||||||
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
|
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
|
||||||
}
|
}
|
||||||
|
@ -35851,7 +35853,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
// Narrow shuffle mask to v4x128.
|
// Narrow shuffle mask to v4x128.
|
||||||
SmallVector<int, 4> ScaledMask;
|
SmallVector<int, 4> ScaledMask;
|
||||||
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
|
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
|
||||||
narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, ScaledMask);
|
narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);
|
||||||
|
|
||||||
// Try to lower to vshuf64x2/vshuf32x4.
|
// Try to lower to vshuf64x2/vshuf32x4.
|
||||||
auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
|
auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
|
||||||
|
@ -35910,20 +35912,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
// If the upper half is zeroable, then an extract+insert is more optimal
|
// If the upper half is zeroable, then an extract+insert is more optimal
|
||||||
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
|
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
|
||||||
// zero the upper half.
|
// zero the upper half.
|
||||||
if (isUndefOrZero(BaseMask[1])) {
|
if (isUndefOrZero(Mask[1])) {
|
||||||
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
||||||
return SDValue(); // Nothing to do!
|
return SDValue(); // Nothing to do!
|
||||||
assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
|
assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
|
||||||
Res = CanonicalizeShuffleInput(RootVT, V1);
|
Res = CanonicalizeShuffleInput(RootVT, V1);
|
||||||
Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
|
Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
|
||||||
return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
|
return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
|
||||||
DL, 256);
|
256);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're splatting the low subvector, an insert-subvector 'concat'
|
// If we're splatting the low subvector, an insert-subvector 'concat'
|
||||||
// pattern is quicker than VPERM2X128.
|
// pattern is quicker than VPERM2X128.
|
||||||
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
|
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
|
||||||
if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) {
|
if (Mask[0] == 0 && Mask[1] == 0 && !Subtarget.hasAVX2()) {
|
||||||
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
|
||||||
return SDValue(); // Nothing to do!
|
return SDValue(); // Nothing to do!
|
||||||
Res = CanonicalizeShuffleInput(RootVT, V1);
|
Res = CanonicalizeShuffleInput(RootVT, V1);
|
||||||
|
@ -35938,11 +35940,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
// we need to use the zeroing feature.
|
// we need to use the zeroing feature.
|
||||||
// Prefer blends for sequential shuffles unless we are optimizing for size.
|
// Prefer blends for sequential shuffles unless we are optimizing for size.
|
||||||
if (UnaryShuffle &&
|
if (UnaryShuffle &&
|
||||||
!(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
|
!(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
|
||||||
(OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
|
(OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
|
||||||
unsigned PermMask = 0;
|
unsigned PermMask = 0;
|
||||||
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
|
PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
|
||||||
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
|
PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
|
||||||
return DAG.getNode(
|
return DAG.getNode(
|
||||||
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
|
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
|
||||||
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
||||||
|
@ -35953,16 +35955,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
|
|
||||||
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
|
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
|
||||||
if (!UnaryShuffle && !IsMaskedShuffle) {
|
if (!UnaryShuffle && !IsMaskedShuffle) {
|
||||||
assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
|
assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
|
||||||
"Unexpected shuffle sentinel value");
|
"Unexpected shuffle sentinel value");
|
||||||
// Prefer blends to X86ISD::VPERM2X128.
|
// Prefer blends to X86ISD::VPERM2X128.
|
||||||
if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
|
if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
|
||||||
(BaseMask[0] == 2 && BaseMask[1] == 1))) {
|
|
||||||
unsigned PermMask = 0;
|
unsigned PermMask = 0;
|
||||||
PermMask |= ((BaseMask[0] & 3) << 0);
|
PermMask |= ((Mask[0] & 3) << 0);
|
||||||
PermMask |= ((BaseMask[1] & 3) << 4);
|
PermMask |= ((Mask[1] & 3) << 4);
|
||||||
SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
|
SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
|
||||||
SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
|
SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
|
||||||
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
|
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
|
||||||
CanonicalizeShuffleInput(RootVT, LHS),
|
CanonicalizeShuffleInput(RootVT, LHS),
|
||||||
CanonicalizeShuffleInput(RootVT, RHS),
|
CanonicalizeShuffleInput(RootVT, RHS),
|
||||||
|
@ -35973,13 +35974,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
|
|
||||||
// For masks that have been widened to 128-bit elements or more,
|
// For masks that have been widened to 128-bit elements or more,
|
||||||
// narrow back down to 64-bit elements.
|
// narrow back down to 64-bit elements.
|
||||||
SmallVector<int, 64> Mask;
|
|
||||||
if (BaseMaskEltSizeInBits > 64) {
|
if (BaseMaskEltSizeInBits > 64) {
|
||||||
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
|
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
|
||||||
int MaskScale = BaseMaskEltSizeInBits / 64;
|
int MaskScale = BaseMaskEltSizeInBits / 64;
|
||||||
narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
|
SmallVector<int, 64> ScaledMask;
|
||||||
} else {
|
narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
|
||||||
Mask.assign(BaseMask.begin(), BaseMask.end());
|
Mask = std::move(ScaledMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For masked shuffles, we're trying to match the root width for better
|
// For masked shuffles, we're trying to match the root width for better
|
||||||
|
|
Loading…
Reference in New Issue