forked from OSchip/llvm-project
[AArch64] Fix a bug generating incorrect instruction when building small vector.
This bug is introduced by r211144. The element of operand may be smaller than the element of result, but previous commit can only handle the contrary condition. This commit is to handle this scenario and generate optimized codes like ZIP1. llvm-svn: 213830
This commit is contained in:
parent
9e7da0fb44
commit
9a2a2c502b
|
@ -4180,9 +4180,22 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
if (SourceVecs.size() > 2)
|
if (SourceVecs.size() > 2)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
|
// Find out the smallest element size among result and two sources, and use
|
||||||
|
// it as element size to build the shuffle_vector.
|
||||||
|
EVT SmallestEltTy = VT.getVectorElementType();
|
||||||
|
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
|
||||||
|
EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
|
||||||
|
if (SrcEltTy.bitsLT(SmallestEltTy)) {
|
||||||
|
SmallestEltTy = SrcEltTy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned ResMultiplier =
|
||||||
|
VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||||
int VEXTOffsets[2] = { 0, 0 };
|
int VEXTOffsets[2] = { 0, 0 };
|
||||||
int OffsetMultipliers[2] = { 1, 1 };
|
int OffsetMultipliers[2] = { 1, 1 };
|
||||||
|
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||||
|
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
|
||||||
|
SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
|
||||||
|
|
||||||
// This loop extracts the usage patterns of the source vectors
|
// This loop extracts the usage patterns of the source vectors
|
||||||
// and prepares appropriate SDValues for a shuffle if possible.
|
// and prepares appropriate SDValues for a shuffle if possible.
|
||||||
|
@ -4190,15 +4203,15 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
|
unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
|
||||||
SDValue CurSource = SourceVecs[i];
|
SDValue CurSource = SourceVecs[i];
|
||||||
if (SourceVecs[i].getValueType().getVectorElementType() !=
|
if (SourceVecs[i].getValueType().getVectorElementType() !=
|
||||||
VT.getVectorElementType()) {
|
ShuffleVT.getVectorElementType()) {
|
||||||
// It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
|
// As ShuffleVT holds smallest element size, it may hit here only if
|
||||||
// Then bitcast it to the vector which holds asserted element type,
|
// the element type of SourceVecs is bigger than that of ShuffleVT.
|
||||||
// and record the multiplier of element width between SourceVecs and
|
// Adjust the element size of SourceVecs to match ShuffleVT, and record
|
||||||
// Build_vector which is needed to extract the correct lanes later.
|
// the multipliers.
|
||||||
EVT CastVT =
|
EVT CastVT = EVT::getVectorVT(
|
||||||
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
*DAG.getContext(), ShuffleVT.getVectorElementType(),
|
||||||
SourceVecs[i].getValueSizeInBits() /
|
SourceVecs[i].getValueSizeInBits() /
|
||||||
VT.getVectorElementType().getSizeInBits());
|
ShuffleVT.getVectorElementType().getSizeInBits());
|
||||||
|
|
||||||
CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
|
CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
|
||||||
OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
|
OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
|
||||||
|
@ -4207,7 +4220,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
MinElts[i] *= OffsetMultipliers[i];
|
MinElts[i] *= OffsetMultipliers[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CurSource.getValueType() == VT) {
|
if (CurSource.getValueType() == ShuffleVT) {
|
||||||
// No VEXT necessary
|
// No VEXT necessary
|
||||||
ShuffleSrcs[i] = CurSource;
|
ShuffleSrcs[i] = CurSource;
|
||||||
VEXTOffsets[i] = 0;
|
VEXTOffsets[i] = 0;
|
||||||
|
@ -4215,8 +4228,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
} else if (NumSrcElts < NumElts) {
|
} else if (NumSrcElts < NumElts) {
|
||||||
// We can pad out the smaller vector for free, so if it's part of a
|
// We can pad out the smaller vector for free, so if it's part of a
|
||||||
// shuffle...
|
// shuffle...
|
||||||
ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
|
ShuffleSrcs[i] =
|
||||||
DAG.getUNDEF(CurSource.getValueType()));
|
DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
|
||||||
|
DAG.getUNDEF(CurSource.getValueType()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4233,50 +4247,61 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
|
||||||
if (MinElts[i] >= NumElts) {
|
if (MinElts[i] >= NumElts) {
|
||||||
// The extraction can just take the second half
|
// The extraction can just take the second half
|
||||||
VEXTOffsets[i] = NumElts;
|
VEXTOffsets[i] = NumElts;
|
||||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
|
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
||||||
DAG.getIntPtrConstant(NumElts));
|
CurSource, DAG.getIntPtrConstant(NumElts));
|
||||||
} else if (MaxElts[i] < NumElts) {
|
} else if (MaxElts[i] < NumElts) {
|
||||||
// The extraction can just take the first half
|
// The extraction can just take the first half
|
||||||
VEXTOffsets[i] = 0;
|
VEXTOffsets[i] = 0;
|
||||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
|
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
||||||
DAG.getIntPtrConstant(0));
|
CurSource, DAG.getIntPtrConstant(0));
|
||||||
} else {
|
} else {
|
||||||
// An actual VEXT is needed
|
// An actual VEXT is needed
|
||||||
VEXTOffsets[i] = MinElts[i];
|
VEXTOffsets[i] = MinElts[i];
|
||||||
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
|
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
||||||
DAG.getIntPtrConstant(0));
|
CurSource, DAG.getIntPtrConstant(0));
|
||||||
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
|
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
|
||||||
DAG.getIntPtrConstant(NumElts));
|
CurSource, DAG.getIntPtrConstant(NumElts));
|
||||||
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
|
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
|
||||||
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
|
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
|
||||||
DAG.getConstant(Imm, MVT::i32));
|
VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SmallVector<int, 8> Mask;
|
SmallVector<int, 8> Mask;
|
||||||
|
unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();
|
||||||
|
|
||||||
for (unsigned i = 0; i < NumElts; ++i) {
|
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
|
||||||
SDValue Entry = Op.getOperand(i);
|
SDValue Entry = Op.getOperand(i);
|
||||||
if (Entry.getOpcode() == ISD::UNDEF) {
|
int SourceNum = 1;
|
||||||
Mask.push_back(-1);
|
unsigned LanePartNum = 0;
|
||||||
continue;
|
int ExtractElt;
|
||||||
|
if (Entry.getOpcode() != ISD::UNDEF) {
|
||||||
|
// Check how many parts of source lane should be inserted.
|
||||||
|
SDValue ExtractVec = Entry.getOperand(0);
|
||||||
|
if (ExtractVec == SourceVecs[0])
|
||||||
|
SourceNum = 0;
|
||||||
|
ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
|
||||||
|
unsigned ExtEltSize =
|
||||||
|
ExtractVec.getValueType().getVectorElementType().getSizeInBits();
|
||||||
|
unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
|
||||||
|
LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue ExtractVec = Entry.getOperand(0);
|
for (unsigned j = 0; j != ResMultiplier; ++j) {
|
||||||
int ExtractElt =
|
if (j < LanePartNum)
|
||||||
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
|
Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
|
||||||
if (ExtractVec == SourceVecs[0]) {
|
NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
|
||||||
Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
|
else
|
||||||
} else {
|
Mask.push_back(-1);
|
||||||
Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
|
|
||||||
VEXTOffsets[1]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final check before we try to produce nonsense...
|
// Final check before we try to produce nonsense...
|
||||||
if (isShuffleMaskLegal(Mask, VT))
|
if (isShuffleMaskLegal(Mask, ShuffleVT)) {
|
||||||
return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
|
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
|
||||||
&Mask[0]);
|
ShuffleSrcs[1], &Mask[0]);
|
||||||
|
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1387,6 +1387,13 @@ entry:
|
||||||
ret <8 x i16> %shuffle.i
|
ret <8 x i16> %shuffle.i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
|
||||||
|
; CHECK-LABEL: test_vzip1_v4i8:
|
||||||
|
; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||||
|
%lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
ret <4 x i8> %lo
|
||||||
|
}
|
||||||
|
|
||||||
define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
|
define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
|
||||||
; CHECK-LABEL: test_same_vzip2_s8:
|
; CHECK-LABEL: test_same_vzip2_s8:
|
||||||
; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||||
|
|
Loading…
Reference in New Issue