[DAG] Avoid checking for consecutive stores in store merge. NFCI.

llvm-svn: 333766
This commit is contained in:
Nirav Dave 2018-06-01 15:05:55 +00:00
parent 39ece11ae5
commit fc9a700f94
1 changed files with 343 additions and 322 deletions

View File

@ -13227,13 +13227,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
} else { // Must be realized as a trunc store
EVT LegalizedStoredValueTy =
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
SDValue ExtendedStoreVal =
DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
LegalizedStoredValueTy);
LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
@ -13529,173 +13529,185 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned LastLegalType = 1;
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
IsElementZero = C->isNullValue();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
}
NonZero |= !IsElementZero;
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned LastLegalType = 1;
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
IsElementZero = C->isNullValue();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
}
NonZero |= !IsElementZero;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
LastIntegerTrunc = false;
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastIntegerTrunc = false;
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
}
}
// We only use vectors if the constant is known to be zero or the
// target allows it and the function is not marked with the
// noimplicitfloat attribute.
if ((!NonZero ||
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
!NoVectors) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
}
}
// We only use vectors if the constant is known to be zero or the target
// allows it and the function is not marked with the noimplicitfloat
// attribute.
if ((!NonZero ||
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
!NoVectors) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
// Check if we found a legal integer type that creates a meaningful
// merge.
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved or we've dropped a non-zero value. Drop as many
// candidates as we can here.
unsigned NumSkip = 1;
while (
(NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
}
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
// Check if we found a legal integer type that creates a meaningful merge.
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved or we've dropped a non-zero value. Drop as many
// candidates as we can here.
unsigned NumSkip = 1;
while (
(NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
NumSkip++;
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
continue;
}
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
UseVector, LastIntegerTrunc);
// Remove merged stores for next iteration.
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
NumConsecutiveStores -= NumElem;
}
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
RV |= Merged;
// Remove merged stores for next iteration.
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Loop on Consecutive Stores on success.
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
// Check if we found a legal integer type creating a meaningful
// merge.
if (NumStoresToMerge < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while (
(NumSkip < NumConsecutiveStores) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
// Check if we found a legal integer type that creates a meaningful merge.
if (NumStoresToMerge < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(
StoreNodes, NumStoresToMerge, RootNode)) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
NumConsecutiveStores -= NumStoresToMerge;
continue;
}
RV |= MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, false, true, false);
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(
StoreNodes, NumStoresToMerge, RootNode)) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
continue;
NumConsecutiveStores -= NumStoresToMerge;
}
RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
false, true, false);
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
continue;
}
@ -13731,94 +13743,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
if (LoadNodes.size() < 2) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
continue;
}
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
unsigned RequiredAlignment;
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
continue;
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
// Scan the memory operations on the chain and find the first
// non-consecutive load memory address. These variables hold the index in
// the store node array.
unsigned LastConsecutiveLoad = 1;
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
bool isDereferenceable = true;
bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
unsigned RequiredAlignment;
if (LoadNodes.size() == 2 &&
TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
isDereferenceable = false;
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
// Find a legal type for the integer store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = false;
// Or check whether a truncstore and extload is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
// Scan the memory operations on the chain and find the first
// non-consecutive load memory address. These variables hold the index in
// the store node array.
unsigned LastConsecutiveLoad = 1;
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
bool isDereferenceable = true;
bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
isDereferenceable = false;
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
// Find a legal type for the integer store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@ -13826,112 +13819,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
DoIntegerTruncate = false;
// Or check whether a truncstore and extload is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
}
}
}
}
// Only use vector types if the vector type is larger than the integer type.
// If they are the same, use integers.
bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
unsigned LastLegalType =
std::max(LastLegalVectorType, LastLegalIntegerType);
// Only use vector types if the vector type is larger than the integer
// type. If they are the same, use integers.
bool UseVectorTy =
LastLegalVectorType > LastLegalIntegerType && !NoVectors;
unsigned LastLegalType =
std::max(LastLegalVectorType, LastLegalIntegerType);
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
unsigned NumElem =
std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have is if the alignment or either
// the load or store has improved. Drop as many candidates as we
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have is if the alignment or either
// the load or store has improved. Drop as many candidates as we
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
continue;
}
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
// Find a legal type for the vector store.
unsigned Elts = NumElem * NumMemElts;
JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
MachineMemOperand::Flags MMOFlags =
isDereferenceable ? MachineMemOperand::MODereferenceable
: MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad =
DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
FirstLoadAlign, MMOFlags);
NewStore = DAG.getStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
FirstLoadAlign, MMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
JointMemOpVT, FirstInChain->getAlignment(),
FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
}
// Replace the all stores with the new store. Recursively remove
// corresponding value if its no longer used.
for (unsigned i = 0; i < NumElem; ++i) {
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
if (Val.getNode()->use_empty())
recursivelyDeleteUnusedNodes(Val.getNode());
}
RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
}
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
// Find a legal type for the vector store.
unsigned Elts = NumElem * NumMemElts;
JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
MachineMemOperand::Flags MMOFlags = isDereferenceable ?
MachineMemOperand::MODereferenceable:
MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign,
MMOFlags);
NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad =
DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
JointMemOpVT, FirstLoadAlign, MMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), JointMemOpVT,
FirstInChain->getAlignment(),
FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
}
// Replace the all stores with the new store. Recursively remove
// corresponding value if its no longer used.
for (unsigned i = 0; i < NumElem; ++i) {
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
if (Val.getNode()->use_empty())
recursivelyDeleteUnusedNodes(Val.getNode());
}
RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
}
return RV;
}