forked from OSchip/llvm-project
AArch64: refactor ReconstructShuffle function
Quite a bit of cruft had accumulated as we realised the various different cases it had to handle and squeezed them in where possible. This refactoring mostly flattens the logic and special-cases. The result is slightly longer, but I think clearer. Should be no functionality change. llvm-svn: 213867
This commit is contained in:
parent
857fd660d8
commit
7324e845a4
|
@ -4138,10 +4138,30 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
unsigned NumElts = VT.getVectorNumElements();
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
|
|
||||||
SmallVector<SDValue, 2> SourceVecs;
|
struct ShuffleSourceInfo {
|
||||||
SmallVector<unsigned, 2> MinElts;
|
SDValue Vec;
|
||||||
SmallVector<unsigned, 2> MaxElts;
|
unsigned MinElt;
|
||||||
|
unsigned MaxElt;
|
||||||
|
|
||||||
|
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
|
||||||
|
// be compatible with the shuffle we intend to construct. As a result
|
||||||
|
// ShuffleVec will be some sliding window into the original Vec.
|
||||||
|
SDValue ShuffleVec;
|
||||||
|
|
||||||
|
// Code should guarantee that element i in Vec starts at element "WindowBase
|
||||||
|
// + i * WindowScale in ShuffleVec".
|
||||||
|
int WindowBase;
|
||||||
|
int WindowScale;
|
||||||
|
|
||||||
|
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
|
||||||
|
ShuffleSourceInfo(SDValue Vec)
|
||||||
|
: Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
|
||||||
|
WindowScale(1) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// First gather all vectors used as an immediate source for this BUILD_VECTOR
|
||||||
|
// node.
|
||||||
|
SmallVector<ShuffleSourceInfo, 2> Sources;
|
||||||
for (unsigned i = 0; i < NumElts; ++i) {
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
SDValue V = Op.getOperand(i);
|
SDValue V = Op.getOperand(i);
|
||||||
if (V.getOpcode() == ISD::UNDEF)
|
if (V.getOpcode() == ISD::UNDEF)
|
||||||
|
@ -4152,158 +4172,153 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record this extraction against the appropriate vector if possible...
|
// Add this element source to the list if it's not already there.
|
||||||
SDValue SourceVec = V.getOperand(0);
|
SDValue SourceVec = V.getOperand(0);
|
||||||
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
|
auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
|
||||||
bool FoundSource = false;
|
if (Source == Sources.end())
|
||||||
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
|
Sources.push_back(ShuffleSourceInfo(SourceVec));
|
||||||
if (SourceVecs[j] == SourceVec) {
|
|
||||||
if (MinElts[j] > EltNo)
|
|
||||||
MinElts[j] = EltNo;
|
|
||||||
if (MaxElts[j] < EltNo)
|
|
||||||
MaxElts[j] = EltNo;
|
|
||||||
FoundSource = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Or record a new source if not...
|
// Update the minimum and maximum lane number seen.
|
||||||
if (!FoundSource) {
|
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
|
||||||
SourceVecs.push_back(SourceVec);
|
Source->MinElt = std::min(Source->MinElt, EltNo);
|
||||||
MinElts.push_back(EltNo);
|
Source->MaxElt = std::max(Source->MaxElt, EltNo);
|
||||||
MaxElts.push_back(EltNo);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Currently only do something sane when at most two source vectors
|
// Currently only do something sane when at most two source vectors
|
||||||
// involved.
|
// are involved.
|
||||||
if (SourceVecs.size() > 2)
|
if (Sources.size() > 2)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Find out the smallest element size among result and two sources, and use
|
// Find out the smallest element size among result and two sources, and use
|
||||||
// it as element size to build the shuffle_vector.
|
// it as element size to build the shuffle_vector.
|
||||||
EVT SmallestEltTy = VT.getVectorElementType();
|
EVT SmallestEltTy = VT.getVectorElementType();
|
||||||
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
|
for (auto &Source : Sources) {
|
||||||
EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
|
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
|
||||||
if (SrcEltTy.bitsLT(SmallestEltTy)) {
|
if (SrcEltTy.bitsLT(SmallestEltTy)) {
|
||||||
SmallestEltTy = SrcEltTy;
|
SmallestEltTy = SrcEltTy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned ResMultiplier =
|
unsigned ResMultiplier =
|
||||||
VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
|
VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||||
int VEXTOffsets[2] = { 0, 0 };
|
|
||||||
int OffsetMultipliers[2] = { 1, 1 };
|
|
||||||
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||||
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
|
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
|
||||||
SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
|
|
||||||
|
|
||||||
// This loop extracts the usage patterns of the source vectors
|
// If the source vector is too wide or too narrow, we may nevertheless be able
|
||||||
// and prepares appropriate SDValues for a shuffle if possible.
|
// to construct a compatible shuffle either by concatenating it with UNDEF or
|
||||||
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
|
// extracting a suitable range of elements.
|
||||||
unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
|
for (auto &Src : Sources) {
|
||||||
SDValue CurSource = SourceVecs[i];
|
EVT SrcVT = Src.ShuffleVec.getValueType();
|
||||||
if (SourceVecs[i].getValueType().getVectorElementType() !=
|
|
||||||
ShuffleVT.getVectorElementType()) {
|
|
||||||
// As ShuffleVT holds smallest element size, it may hit here only if
|
|
||||||
// the element type of SourceVecs is bigger than that of ShuffleVT.
|
|
||||||
// Adjust the element size of SourceVecs to match ShuffleVT, and record
|
|
||||||
// the multipliers.
|
|
||||||
EVT CastVT = EVT::getVectorVT(
|
|
||||||
*DAG.getContext(), ShuffleVT.getVectorElementType(),
|
|
||||||
SourceVecs[i].getValueSizeInBits() /
|
|
||||||
ShuffleVT.getVectorElementType().getSizeInBits());
|
|
||||||
|
|
||||||
CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
|
if (SrcVT.getSizeInBits() == VT.getSizeInBits())
|
||||||
OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
|
|
||||||
NumSrcElts *= OffsetMultipliers[i];
|
|
||||||
MaxElts[i] *= OffsetMultipliers[i];
|
|
||||||
MinElts[i] *= OffsetMultipliers[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CurSource.getValueType() == ShuffleVT) {
|
|
||||||
// No VEXT necessary
|
|
||||||
ShuffleSrcs[i] = CurSource;
|
|
||||||
VEXTOffsets[i] = 0;
|
|
||||||
continue;
|
continue;
|
||||||
} else if (NumSrcElts < NumElts) {
|
|
||||||
|
// This stage of the search produces a source with the same element type as
|
||||||
|
// the original, but with a total width matching the BUILD_VECTOR output.
|
||||||
|
EVT EltVT = SrcVT.getVectorElementType();
|
||||||
|
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
|
||||||
|
VT.getSizeInBits() / EltVT.getSizeInBits());
|
||||||
|
|
||||||
|
if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
|
||||||
|
assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
|
||||||
// We can pad out the smaller vector for free, so if it's part of a
|
// We can pad out the smaller vector for free, so if it's part of a
|
||||||
// shuffle...
|
// shuffle...
|
||||||
ShuffleSrcs[i] =
|
Src.ShuffleVec =
|
||||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
|
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
|
||||||
DAG.getUNDEF(CurSource.getValueType()));
|
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since only 64-bit and 128-bit vectors are legal on ARM and
|
assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
|
||||||
// we've eliminated the other cases...
|
|
||||||
assert(NumSrcElts == 2 * NumElts &&
|
|
||||||
"unexpected vector sizes in ReconstructShuffle");
|
|
||||||
|
|
||||||
if (MaxElts[i] - MinElts[i] >= NumElts) {
|
if (Src.MaxElt - Src.MinElt >= NumElts) {
|
||||||
// Span too large for a VEXT to cope
|
// Span too large for a VEXT to cope
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MinElts[i] >= NumElts) {
|
if (Src.MinElt >= NumElts) {
|
||||||
// The extraction can just take the second half
|
// The extraction can just take the second half
|
||||||
VEXTOffsets[i] = NumElts;
|
Src.ShuffleVec =
|
||||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||||
CurSource, DAG.getIntPtrConstant(NumElts));
|
DAG.getIntPtrConstant(NumElts));
|
||||||
} else if (MaxElts[i] < NumElts) {
|
Src.WindowBase = -NumElts;
|
||||||
|
} else if (Src.MaxElt < NumElts) {
|
||||||
// The extraction can just take the first half
|
// The extraction can just take the first half
|
||||||
VEXTOffsets[i] = 0;
|
Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
|
||||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
Src.ShuffleVec, DAG.getIntPtrConstant(0));
|
||||||
CurSource, DAG.getIntPtrConstant(0));
|
|
||||||
} else {
|
} else {
|
||||||
// An actual VEXT is needed
|
// An actual VEXT is needed
|
||||||
VEXTOffsets[i] = MinElts[i];
|
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
|
||||||
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
Src.ShuffleVec, DAG.getIntPtrConstant(0));
|
||||||
CurSource, DAG.getIntPtrConstant(0));
|
SDValue VEXTSrc2 =
|
||||||
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||||
CurSource, DAG.getIntPtrConstant(NumElts));
|
DAG.getIntPtrConstant(NumElts));
|
||||||
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
|
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
|
||||||
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
|
|
||||||
|
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
|
||||||
VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
|
VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
|
||||||
|
Src.WindowBase = -Src.MinElt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SmallVector<int, 8> Mask;
|
// Another possible incompatibility occurs from the vector element types. We
|
||||||
unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();
|
// can fix this by bitcasting the source vectors to the same type we intend
|
||||||
|
// for the shuffle.
|
||||||
|
for (auto &Src : Sources) {
|
||||||
|
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
|
||||||
|
if (SrcEltTy == SmallestEltTy)
|
||||||
|
continue;
|
||||||
|
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
|
||||||
|
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
|
||||||
|
Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||||
|
Src.WindowBase *= Src.WindowScale;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final sanity check before we try to actually produce a shuffle.
|
||||||
|
DEBUG(
|
||||||
|
for (auto Src : Sources)
|
||||||
|
assert(Src.ShuffleVec.getValueType() == ShuffleVT);
|
||||||
|
);
|
||||||
|
|
||||||
|
// The stars all align, our next step is to produce the mask for the shuffle.
|
||||||
|
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
|
||||||
|
int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
|
||||||
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
|
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
|
||||||
SDValue Entry = Op.getOperand(i);
|
SDValue Entry = Op.getOperand(i);
|
||||||
int SourceNum = 1;
|
if (Entry.getOpcode() == ISD::UNDEF)
|
||||||
unsigned LanePartNum = 0;
|
continue;
|
||||||
int ExtractElt;
|
|
||||||
if (Entry.getOpcode() != ISD::UNDEF) {
|
|
||||||
// Check how many parts of source lane should be inserted.
|
|
||||||
SDValue ExtractVec = Entry.getOperand(0);
|
|
||||||
if (ExtractVec == SourceVecs[0])
|
|
||||||
SourceNum = 0;
|
|
||||||
ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
|
|
||||||
unsigned ExtEltSize =
|
|
||||||
ExtractVec.getValueType().getVectorElementType().getSizeInBits();
|
|
||||||
unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
|
|
||||||
LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j != ResMultiplier; ++j) {
|
auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
|
||||||
if (j < LanePartNum)
|
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
|
||||||
Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
|
|
||||||
NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
|
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
|
||||||
else
|
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
|
||||||
Mask.push_back(-1);
|
// segment.
|
||||||
}
|
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
|
||||||
|
int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
|
||||||
|
VT.getVectorElementType().getSizeInBits());
|
||||||
|
int LanesDefined = BitsDefined / BitsPerShuffleLane;
|
||||||
|
|
||||||
|
// This source is expected to fill ResMultiplier lanes of the final shuffle,
|
||||||
|
// starting at the appropriate offset.
|
||||||
|
int *LaneMask = &Mask[i * ResMultiplier];
|
||||||
|
|
||||||
|
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
|
||||||
|
ExtractBase += NumElts * (Src - Sources.begin());
|
||||||
|
for (int j = 0; j < LanesDefined; ++j)
|
||||||
|
LaneMask[j] = ExtractBase + j;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final check before we try to produce nonsense...
|
// Final check before we try to produce nonsense...
|
||||||
if (isShuffleMaskLegal(Mask, ShuffleVT)) {
|
if (!isShuffleMaskLegal(Mask, ShuffleVT))
|
||||||
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
|
return SDValue();
|
||||||
ShuffleSrcs[1], &Mask[0]);
|
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
|
||||||
|
for (unsigned i = 0; i < Sources.size(); ++i)
|
||||||
|
ShuffleOps[i] = Sources[i].ShuffleVec;
|
||||||
|
|
||||||
|
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
|
||||||
|
ShuffleOps[1], &Mask[0]);
|
||||||
|
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if an EXT instruction can handle the shuffle mask when the
|
// check if an EXT instruction can handle the shuffle mask when the
|
||||||
|
|
Loading…
Reference in New Issue