AArch64: refactor ReconstructShuffle function

Quite a bit of cruft had accumulated as we realised the various different cases
it had to handle and squeezed them in where possible. This refactoring mostly
flattens the logic and special-cases. The result is slightly longer, but I
think clearer.

Should be no functionality change.

llvm-svn: 213867
This commit is contained in:
Tim Northover 2014-07-24 15:39:55 +00:00
parent 857fd660d8
commit 7324e845a4
1 changed files with 123 additions and 108 deletions

View File

@ -4138,10 +4138,30 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements(); unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 2> SourceVecs; struct ShuffleSourceInfo {
SmallVector<unsigned, 2> MinElts; SDValue Vec;
SmallVector<unsigned, 2> MaxElts; unsigned MinElt;
unsigned MaxElt;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
// ShuffleVec will be some sliding window into the original Vec.
SDValue ShuffleVec;
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
int WindowBase;
int WindowScale;
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
ShuffleSourceInfo(SDValue Vec)
: Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
WindowScale(1) {}
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
// node.
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) { for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i); SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF) if (V.getOpcode() == ISD::UNDEF)
@ -4152,158 +4172,153 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
return SDValue(); return SDValue();
} }
// Record this extraction against the appropriate vector if possible... // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0); SDValue SourceVec = V.getOperand(0);
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
bool FoundSource = false; if (Source == Sources.end())
for (unsigned j = 0; j < SourceVecs.size(); ++j) { Sources.push_back(ShuffleSourceInfo(SourceVec));
if (SourceVecs[j] == SourceVec) {
if (MinElts[j] > EltNo)
MinElts[j] = EltNo;
if (MaxElts[j] < EltNo)
MaxElts[j] = EltNo;
FoundSource = true;
break;
}
}
// Or record a new source if not... // Update the minimum and maximum lane number seen.
if (!FoundSource) { unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
SourceVecs.push_back(SourceVec); Source->MinElt = std::min(Source->MinElt, EltNo);
MinElts.push_back(EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo);
MaxElts.push_back(EltNo);
}
} }
// Currently only do something sane when at most two source vectors // Currently only do something sane when at most two source vectors
// involved. // are involved.
if (SourceVecs.size() > 2) if (Sources.size() > 2)
return SDValue(); return SDValue();
// Find out the smallest element size among result and two sources, and use // Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector. // it as element size to build the shuffle_vector.
EVT SmallestEltTy = VT.getVectorElementType(); EVT SmallestEltTy = VT.getVectorElementType();
for (unsigned i = 0; i < SourceVecs.size(); ++i) { for (auto &Source : Sources) {
EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType(); EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
if (SrcEltTy.bitsLT(SmallestEltTy)) { if (SrcEltTy.bitsLT(SmallestEltTy)) {
SmallestEltTy = SrcEltTy; SmallestEltTy = SrcEltTy;
} }
} }
unsigned ResMultiplier = unsigned ResMultiplier =
VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits(); VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
int VEXTOffsets[2] = { 0, 0 };
int OffsetMultipliers[2] = { 1, 1 };
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
// This loop extracts the usage patterns of the source vectors // If the source vector is too wide or too narrow, we may nevertheless be able
// and prepares appropriate SDValues for a shuffle if possible. // to construct a compatible shuffle either by concatenating it with UNDEF or
for (unsigned i = 0; i < SourceVecs.size(); ++i) { // extracting a suitable range of elements.
unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); for (auto &Src : Sources) {
SDValue CurSource = SourceVecs[i]; EVT SrcVT = Src.ShuffleVec.getValueType();
if (SourceVecs[i].getValueType().getVectorElementType() !=
ShuffleVT.getVectorElementType()) {
// As ShuffleVT holds smallest element size, it may hit here only if
// the element type of SourceVecs is bigger than that of ShuffleVT.
// Adjust the element size of SourceVecs to match ShuffleVT, and record
// the multipliers.
EVT CastVT = EVT::getVectorVT(
*DAG.getContext(), ShuffleVT.getVectorElementType(),
SourceVecs[i].getValueSizeInBits() /
ShuffleVT.getVectorElementType().getSizeInBits());
CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); if (SrcVT.getSizeInBits() == VT.getSizeInBits())
OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
NumSrcElts *= OffsetMultipliers[i];
MaxElts[i] *= OffsetMultipliers[i];
MinElts[i] *= OffsetMultipliers[i];
}
if (CurSource.getValueType() == ShuffleVT) {
// No VEXT necessary
ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue; continue;
} else if (NumSrcElts < NumElts) {
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VT.getSizeInBits() / EltVT.getSizeInBits());
if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
// We can pad out the smaller vector for free, so if it's part of a // We can pad out the smaller vector for free, so if it's part of a
// shuffle... // shuffle...
ShuffleSrcs[i] = Src.ShuffleVec =
DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource, DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
DAG.getUNDEF(CurSource.getValueType())); DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue; continue;
} }
// Since only 64-bit and 128-bit vectors are legal on ARM and assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
// we've eliminated the other cases...
assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) { if (Src.MaxElt - Src.MinElt >= NumElts) {
// Span too large for a VEXT to cope // Span too large for a VEXT to cope
return SDValue(); return SDValue();
} }
if (MinElts[i] >= NumElts) { if (Src.MinElt >= NumElts) {
// The extraction can just take the second half // The extraction can just take the second half
VEXTOffsets[i] = NumElts; Src.ShuffleVec =
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
CurSource, DAG.getIntPtrConstant(NumElts)); DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) { Src.WindowBase = -NumElts;
} else if (Src.MaxElt < NumElts) {
// The extraction can just take the first half // The extraction can just take the first half
VEXTOffsets[i] = 0; Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, Src.ShuffleVec, DAG.getIntPtrConstant(0));
CurSource, DAG.getIntPtrConstant(0));
} else { } else {
// An actual VEXT is needed // An actual VEXT is needed
VEXTOffsets[i] = MinElts[i]; SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, Src.ShuffleVec, DAG.getIntPtrConstant(0));
CurSource, DAG.getIntPtrConstant(0)); SDValue VEXTSrc2 =
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
CurSource, DAG.getIntPtrConstant(NumElts)); DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
Src.WindowBase = -Src.MinElt;
} }
} }
SmallVector<int, 8> Mask; // Another possible incompatibility occurs from the vector element types. We
unsigned VTEltSize = VT.getVectorElementType().getSizeInBits(); // can fix this by bitcasting the source vectors to the same type we intend
// for the shuffle.
for (auto &Src : Sources) {
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
// Final sanity check before we try to actually produce a shuffle.
DEBUG(
for (auto Src : Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT);
);
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i); SDValue Entry = Op.getOperand(i);
int SourceNum = 1; if (Entry.getOpcode() == ISD::UNDEF)
unsigned LanePartNum = 0; continue;
int ExtractElt;
if (Entry.getOpcode() != ISD::UNDEF) {
// Check how many parts of source lane should be inserted.
SDValue ExtractVec = Entry.getOperand(0);
if (ExtractVec == SourceVecs[0])
SourceNum = 0;
ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
unsigned ExtEltSize =
ExtractVec.getValueType().getVectorElementType().getSizeInBits();
unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
}
for (unsigned j = 0; j != ResMultiplier; ++j) { auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
if (j < LanePartNum) int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
NumElts * SourceNum - VEXTOffsets[SourceNum] + j); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
else // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
Mask.push_back(-1); // segment.
} EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
VT.getVectorElementType().getSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
// starting at the appropriate offset.
int *LaneMask = &Mask[i * ResMultiplier];
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
ExtractBase += NumElts * (Src - Sources.begin());
for (int j = 0; j < LanesDefined; ++j)
LaneMask[j] = ExtractBase + j;
} }
// Final check before we try to produce nonsense... // Final check before we try to produce nonsense...
if (isShuffleMaskLegal(Mask, ShuffleVT)) { if (!isShuffleMaskLegal(Mask, ShuffleVT))
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0], return SDValue();
ShuffleSrcs[1], &Mask[0]);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
return SDValue(); SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], &Mask[0]);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
} }
// check if an EXT instruction can handle the shuffle mask when the // check if an EXT instruction can handle the shuffle mask when the