forked from OSchip/llvm-project
[X86] EltsFromConsecutiveLoads - clean up element size calcs. NFCI.
Determine the element/load size calculations earlier and assert that they are whole bytes in size. llvm-svn: 365674
This commit is contained in:
parent
58a37754bb
commit
5dd2af5248
|
@ -7567,6 +7567,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||||
"Register/Memory size mismatch");
|
"Register/Memory size mismatch");
|
||||||
LoadSDNode *LDBase = Loads[FirstLoadedElt];
|
LoadSDNode *LDBase = Loads[FirstLoadedElt];
|
||||||
assert(LDBase && "Did not find base load for merging consecutive loads");
|
assert(LDBase && "Did not find base load for merging consecutive loads");
|
||||||
|
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
|
||||||
|
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
|
||||||
|
int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits;
|
||||||
|
assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");
|
||||||
|
|
||||||
// Consecutive loads can contain UNDEFS but not ZERO elements.
|
// Consecutive loads can contain UNDEFS but not ZERO elements.
|
||||||
// Consecutive loads with UNDEFs and ZEROs elements require a
|
// Consecutive loads with UNDEFs and ZEROs elements require a
|
||||||
|
@ -7576,10 +7580,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||||
for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
|
for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
|
||||||
if (LoadMask[i]) {
|
if (LoadMask[i]) {
|
||||||
SDValue Elt = peekThroughBitcasts(Elts[i]);
|
SDValue Elt = peekThroughBitcasts(Elts[i]);
|
||||||
LoadSDNode *LD = Loads[i];
|
if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes,
|
||||||
if (!DAG.areNonVolatileConsecutiveLoads(
|
i - FirstLoadedElt)) {
|
||||||
LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
|
|
||||||
i - FirstLoadedElt)) {
|
|
||||||
IsConsecutiveLoad = false;
|
IsConsecutiveLoad = false;
|
||||||
IsConsecutiveLoadWithZeros = false;
|
IsConsecutiveLoadWithZeros = false;
|
||||||
break;
|
break;
|
||||||
|
@ -7646,9 +7648,6 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned BaseSize = EltBaseVT.getStoreSizeInBits();
|
|
||||||
int LoadSize = (1 + LastLoadedElt - FirstLoadedElt) * BaseSize;
|
|
||||||
|
|
||||||
// If the upper half of a ymm/zmm load is undef then just load the lower half.
|
// If the upper half of a ymm/zmm load is undef then just load the lower half.
|
||||||
if (VT.is256BitVector() || VT.is512BitVector()) {
|
if (VT.is256BitVector() || VT.is512BitVector()) {
|
||||||
unsigned HalfNumElems = NumElems / 2;
|
unsigned HalfNumElems = NumElems / 2;
|
||||||
|
@ -7666,11 +7665,11 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||||
|
|
||||||
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
|
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
|
||||||
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
|
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
|
||||||
(LoadSize == 32 || LoadSize == 64) &&
|
(LoadSizeInBits == 32 || LoadSizeInBits == 64) &&
|
||||||
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
|
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
|
||||||
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
|
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
|
||||||
: MVT::getIntegerVT(LoadSize);
|
: MVT::getIntegerVT(LoadSizeInBits);
|
||||||
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
|
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);
|
||||||
if (TLI.isTypeLegal(VecVT)) {
|
if (TLI.isTypeLegal(VecVT)) {
|
||||||
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
|
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
|
||||||
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
|
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
|
||||||
|
@ -7688,11 +7687,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||||
|
|
||||||
// BROADCAST - match the smallest possible repetition pattern, load that
|
// BROADCAST - match the smallest possible repetition pattern, load that
|
||||||
// scalar/subvector element and then broadcast to the entire vector.
|
// scalar/subvector element and then broadcast to the entire vector.
|
||||||
if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) &&
|
if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
|
||||||
(BaseSize % 8) == 0 && Subtarget.hasAVX() &&
|
|
||||||
(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
|
(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
|
||||||
for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
|
for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
|
||||||
unsigned RepeatSize = SubElems * BaseSize;
|
unsigned RepeatSize = SubElems * BaseSizeInBits;
|
||||||
unsigned ScalarSize = std::min(RepeatSize, 64u);
|
unsigned ScalarSize = std::min(RepeatSize, 64u);
|
||||||
if (!Subtarget.hasAVX2() && ScalarSize < 32)
|
if (!Subtarget.hasAVX2() && ScalarSize < 32)
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue