forked from OSchip/llvm-project
[X86][AVX] Rename + cleanup lowerShuffleAsLanePermuteAndBlend. NFCI.
Rename to lowerShuffleAsLanePermuteAndShuffle to make it clear that not just blends are performed. Cleanup the in-lane shuffle mask generation to make it more obvious what's going on. Some prep work noticed while investigating the poor shuffle code mentioned in D66004. llvm-svn: 370613
This commit is contained in:
parent
5341193537
commit
07de5292e5
|
@ -14673,16 +14673,14 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
|
|||
return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
|
||||
}
|
||||
|
||||
/// Lower a vector shuffle crossing multiple 128-bit lanes as
|
||||
/// a permutation and blend of those lanes.
|
||||
/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
|
||||
/// source with a lane permutation.
|
||||
///
|
||||
/// This essentially blends the out-of-lane inputs to each lane into the lane
|
||||
/// from a permuted copy of the vector. This lowering strategy results in four
|
||||
/// instructions in the worst case for a single-input cross lane shuffle which
|
||||
/// is lower than any other fully general cross-lane shuffle strategy I'm aware
|
||||
/// of. Special cases for each particular shuffle pattern should be handled
|
||||
/// prior to trying this lowering.
|
||||
static SDValue lowerShuffleAsLanePermuteAndBlend(
|
||||
/// This lowering strategy results in four instructions in the worst case for a
|
||||
/// single-input cross lane shuffle which is lower than any other fully general
|
||||
/// cross-lane shuffle strategy I'm aware of. Special cases for each particular
|
||||
/// shuffle pattern should be handled prior to trying this lowering.
|
||||
static SDValue lowerShuffleAsLanePermuteAndShuffle(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
SelectionDAG &DAG, const X86Subtarget &Subtarget) {
|
||||
// FIXME: This should probably be generalized for 512-bit vectors as well.
|
||||
|
@ -14709,24 +14707,28 @@ static SDValue lowerShuffleAsLanePermuteAndBlend(
|
|||
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
||||
// TODO - we could support shuffling V2 in the Flipped input.
|
||||
assert(V2.isUndef() &&
|
||||
"This last part of this routine only works on single input shuffles");
|
||||
|
||||
SmallVector<int, 32> FlippedBlendMask(Size);
|
||||
for (int i = 0; i < Size; ++i)
|
||||
FlippedBlendMask[i] =
|
||||
Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
|
||||
? Mask[i]
|
||||
: Mask[i] % LaneSize +
|
||||
(i / LaneSize) * LaneSize + Size);
|
||||
SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end());
|
||||
for (int i = 0; i < Size; ++i) {
|
||||
int &M = InLaneMask[i];
|
||||
if (M < 0)
|
||||
continue;
|
||||
if (((M % Size) / LaneSize) != (i / LaneSize))
|
||||
M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
|
||||
}
|
||||
assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
|
||||
"In-lane shuffle mask expected");
|
||||
|
||||
// Flip the vector, and blend the results which should now be in-lane.
|
||||
// Flip the lanes, and shuffle the results which should now be in-lane.
|
||||
MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
|
||||
SDValue Flipped = DAG.getBitcast(PVT, V1);
|
||||
Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT),
|
||||
{ 2, 3, 0, 1 });
|
||||
Flipped =
|
||||
DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1});
|
||||
Flipped = DAG.getBitcast(VT, Flipped);
|
||||
return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
|
||||
return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
|
||||
}
|
||||
|
||||
/// Handle lowering 2-lane 128-bit shuffles.
|
||||
|
@ -15487,8 +15489,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return V;
|
||||
|
||||
// Otherwise, fall back.
|
||||
return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG,
|
||||
Subtarget);
|
||||
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
}
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -15704,8 +15706,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
|
||||
|
||||
// Otherwise, fall back.
|
||||
return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
}
|
||||
|
||||
// Try to simplify this by merging 128-bit lanes to enable a lane-based
|
||||
|
@ -15912,8 +15914,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
}
|
||||
|
||||
SmallVector<int, 8> RepeatedMask;
|
||||
|
@ -16011,8 +16013,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
|
||||
Subtarget);
|
||||
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
|
||||
DAG, Subtarget);
|
||||
}
|
||||
|
||||
if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
|
||||
|
|
Loading…
Reference in New Issue