forked from OSchip/llvm-project
[DAG] Avoid checking for consecutive stores in store merge. NFCI.
llvm-svn: 333766
This commit is contained in:
parent
39ece11ae5
commit
fc9a700f94
|
@ -13227,13 +13227,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
|
|||
FirstInChain->getPointerInfo(),
|
||||
FirstInChain->getAlignment());
|
||||
} else { // Must be realized as a trunc store
|
||||
EVT LegalizedStoredValueTy =
|
||||
EVT LegalizedStoredValTy =
|
||||
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
|
||||
unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
|
||||
unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
|
||||
ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
|
||||
SDValue ExtendedStoreVal =
|
||||
DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
|
||||
LegalizedStoredValueTy);
|
||||
LegalizedStoredValTy);
|
||||
NewStore = DAG.getTruncStore(
|
||||
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
|
||||
|
@ -13529,173 +13529,185 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
|||
|
||||
// Store the constants into memory as one consecutive store.
|
||||
if (IsConstantSrc) {
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
unsigned LastLegalType = 1;
|
||||
unsigned LastLegalVectorType = 1;
|
||||
bool LastIntegerTrunc = false;
|
||||
bool NonZero = false;
|
||||
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
|
||||
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
||||
SDValue StoredVal = ST->getValue();
|
||||
bool IsElementZero = false;
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
|
||||
IsElementZero = C->isNullValue();
|
||||
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
|
||||
IsElementZero = C->getConstantFPValue()->isNullValue();
|
||||
if (IsElementZero) {
|
||||
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
|
||||
FirstZeroAfterNonZero = i;
|
||||
}
|
||||
NonZero |= !IsElementZero;
|
||||
while (NumConsecutiveStores >= 2) {
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
unsigned LastLegalType = 1;
|
||||
unsigned LastLegalVectorType = 1;
|
||||
bool LastIntegerTrunc = false;
|
||||
bool NonZero = false;
|
||||
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
|
||||
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
||||
SDValue StoredVal = ST->getValue();
|
||||
bool IsElementZero = false;
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
|
||||
IsElementZero = C->isNullValue();
|
||||
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
|
||||
IsElementZero = C->getConstantFPValue()->isNullValue();
|
||||
if (IsElementZero) {
|
||||
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
|
||||
FirstZeroAfterNonZero = i;
|
||||
}
|
||||
NonZero |= !IsElementZero;
|
||||
|
||||
// Find a legal type for the constant store.
|
||||
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
|
||||
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
|
||||
bool IsFast = false;
|
||||
// Find a legal type for the constant store.
|
||||
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
|
||||
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
|
||||
bool IsFast = false;
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
// Break early when size is too large to be legal.
|
||||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast) {
|
||||
LastIntegerTrunc = false;
|
||||
LastLegalType = i + 1;
|
||||
// Or check whether a truncstore is legal.
|
||||
} else if (TLI.getTypeAction(Context, StoreTy) ==
|
||||
TargetLowering::TypePromoteInteger) {
|
||||
EVT LegalizedStoredValueTy =
|
||||
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
|
||||
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast) {
|
||||
LastIntegerTrunc = true;
|
||||
LastIntegerTrunc = false;
|
||||
LastLegalType = i + 1;
|
||||
// Or check whether a truncstore is legal.
|
||||
} else if (TLI.getTypeAction(Context, StoreTy) ==
|
||||
TargetLowering::TypePromoteInteger) {
|
||||
EVT LegalizedStoredValTy =
|
||||
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
|
||||
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast) {
|
||||
LastIntegerTrunc = true;
|
||||
LastLegalType = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// We only use vectors if the constant is known to be zero or the
|
||||
// target allows it and the function is not marked with the
|
||||
// noimplicitfloat attribute.
|
||||
if ((!NonZero ||
|
||||
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
|
||||
!NoVectors) {
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast)
|
||||
LastLegalVectorType = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// We only use vectors if the constant is known to be zero or the target
|
||||
// allows it and the function is not marked with the noimplicitfloat
|
||||
// attribute.
|
||||
if ((!NonZero ||
|
||||
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
|
||||
!NoVectors) {
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast)
|
||||
LastLegalVectorType = i + 1;
|
||||
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
|
||||
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
|
||||
|
||||
// Check if we found a legal integer type that creates a meaningful
|
||||
// merge.
|
||||
if (NumElem < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have, is if the alignment has
|
||||
// improved or we've dropped a non-zero value. Drop as many
|
||||
// candidates as we can here.
|
||||
unsigned NumSkip = 1;
|
||||
while (
|
||||
(NumSkip < NumConsecutiveStores) &&
|
||||
(NumSkip < FirstZeroAfterNonZero) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
|
||||
NumSkip++;
|
||||
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
NumConsecutiveStores -= NumSkip;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
|
||||
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
|
||||
|
||||
// Check if we found a legal integer type that creates a meaningful merge.
|
||||
if (NumElem < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have, is if the alignment has
|
||||
// improved or we've dropped a non-zero value. Drop as many
|
||||
// candidates as we can here.
|
||||
unsigned NumSkip = 1;
|
||||
while (
|
||||
(NumSkip < NumConsecutiveStores) &&
|
||||
(NumSkip < FirstZeroAfterNonZero) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
|
||||
NumSkip++;
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
|
||||
RootNode)) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
NumConsecutiveStores -= NumElem;
|
||||
continue;
|
||||
}
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
|
||||
RootNode)) {
|
||||
RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
|
||||
UseVector, LastIntegerTrunc);
|
||||
|
||||
// Remove merged stores for next iteration.
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
continue;
|
||||
NumConsecutiveStores -= NumElem;
|
||||
}
|
||||
|
||||
bool Merged = MergeStoresOfConstantsOrVecElts(
|
||||
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
|
||||
RV |= Merged;
|
||||
|
||||
// Remove merged stores for next iteration.
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
continue;
|
||||
}
|
||||
|
||||
// When extracting multiple vector elements, try to store them
|
||||
// in one vector store rather than a sequence of scalar stores.
|
||||
if (IsExtractVecSrc) {
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
unsigned NumStoresToMerge = 1;
|
||||
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
|
||||
// Loop on Consecutive Stores on success.
|
||||
while (NumConsecutiveStores >= 2) {
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
unsigned NumStoresToMerge = 1;
|
||||
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT Ty =
|
||||
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
|
||||
bool IsFast;
|
||||
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT Ty =
|
||||
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
|
||||
bool IsFast;
|
||||
// Break early when size is too large to be legal.
|
||||
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
if (TLI.isTypeLegal(Ty) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast)
|
||||
NumStoresToMerge = i + 1;
|
||||
}
|
||||
|
||||
if (TLI.isTypeLegal(Ty) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast)
|
||||
NumStoresToMerge = i + 1;
|
||||
}
|
||||
// Check if we found a legal integer type creating a meaningful
|
||||
// merge.
|
||||
if (NumStoresToMerge < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have, is if the alignment has
|
||||
// improved. Drop as many candidates as we can here.
|
||||
unsigned NumSkip = 1;
|
||||
while (
|
||||
(NumSkip < NumConsecutiveStores) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
|
||||
NumSkip++;
|
||||
|
||||
// Check if we found a legal integer type that creates a meaningful merge.
|
||||
if (NumStoresToMerge < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have, is if the alignment has
|
||||
// improved. Drop as many candidates as we can here.
|
||||
unsigned NumSkip = 1;
|
||||
while ((NumSkip < NumConsecutiveStores) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
|
||||
NumSkip++;
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
NumConsecutiveStores -= NumSkip;
|
||||
continue;
|
||||
}
|
||||
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
continue;
|
||||
}
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(
|
||||
StoreNodes, NumStoresToMerge, RootNode)) {
|
||||
StoreNodes.erase(StoreNodes.begin(),
|
||||
StoreNodes.begin() + NumStoresToMerge);
|
||||
NumConsecutiveStores -= NumStoresToMerge;
|
||||
continue;
|
||||
}
|
||||
|
||||
RV |= MergeStoresOfConstantsOrVecElts(
|
||||
StoreNodes, MemVT, NumStoresToMerge, false, true, false);
|
||||
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(
|
||||
StoreNodes, NumStoresToMerge, RootNode)) {
|
||||
StoreNodes.erase(StoreNodes.begin(),
|
||||
StoreNodes.begin() + NumStoresToMerge);
|
||||
continue;
|
||||
NumConsecutiveStores -= NumStoresToMerge;
|
||||
}
|
||||
|
||||
RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
|
||||
false, true, false);
|
||||
|
||||
StoreNodes.erase(StoreNodes.begin(),
|
||||
StoreNodes.begin() + NumStoresToMerge);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -13731,94 +13743,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
|||
LoadNodes.push_back(MemOpLink(Ld, LdOffset));
|
||||
}
|
||||
|
||||
if (LoadNodes.size() < 2) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have load/store pair instructions and we only have two values,
|
||||
// don't bother merging.
|
||||
unsigned RequiredAlignment;
|
||||
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
|
||||
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
|
||||
continue;
|
||||
}
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
||||
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
|
||||
unsigned FirstLoadAlign = FirstLoad->getAlignment();
|
||||
|
||||
// Scan the memory operations on the chain and find the first
|
||||
// non-consecutive load memory address. These variables hold the index in
|
||||
// the store node array.
|
||||
unsigned LastConsecutiveLoad = 1;
|
||||
// This variable refers to the size and not index in the array.
|
||||
unsigned LastLegalVectorType = 1;
|
||||
unsigned LastLegalIntegerType = 1;
|
||||
bool isDereferenceable = true;
|
||||
bool DoIntegerTruncate = false;
|
||||
StartAddress = LoadNodes[0].OffsetFromBase;
|
||||
SDValue FirstChain = FirstLoad->getChain();
|
||||
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
|
||||
// All loads must share the same chain.
|
||||
if (LoadNodes[i].MemNode->getChain() != FirstChain)
|
||||
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
|
||||
// If we have load/store pair instructions and we only have two values,
|
||||
// don't bother merging.
|
||||
unsigned RequiredAlignment;
|
||||
if (LoadNodes.size() == 2 &&
|
||||
TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
|
||||
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
|
||||
break;
|
||||
|
||||
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
|
||||
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
|
||||
break;
|
||||
LastConsecutiveLoad = i;
|
||||
|
||||
if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
|
||||
isDereferenceable = false;
|
||||
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
|
||||
bool IsFastSt, IsFastLd;
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
|
||||
FirstLoadAlign, &IsFastLd) &&
|
||||
IsFastLd) {
|
||||
LastLegalVectorType = i + 1;
|
||||
}
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
||||
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
|
||||
unsigned FirstLoadAlign = FirstLoad->getAlignment();
|
||||
|
||||
// Find a legal type for the integer store.
|
||||
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
|
||||
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
|
||||
FirstLoadAlign, &IsFastLd) &&
|
||||
IsFastLd) {
|
||||
LastLegalIntegerType = i + 1;
|
||||
DoIntegerTruncate = false;
|
||||
// Or check whether a truncstore and extload is legal.
|
||||
} else if (TLI.getTypeAction(Context, StoreTy) ==
|
||||
TargetLowering::TypePromoteInteger) {
|
||||
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
|
||||
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
|
||||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
|
||||
StoreTy) &&
|
||||
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
|
||||
StoreTy) &&
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
||||
// Scan the memory operations on the chain and find the first
|
||||
// non-consecutive load memory address. These variables hold the index in
|
||||
// the store node array.
|
||||
|
||||
unsigned LastConsecutiveLoad = 1;
|
||||
|
||||
// This variable refers to the size and not index in the array.
|
||||
unsigned LastLegalVectorType = 1;
|
||||
unsigned LastLegalIntegerType = 1;
|
||||
bool isDereferenceable = true;
|
||||
bool DoIntegerTruncate = false;
|
||||
StartAddress = LoadNodes[0].OffsetFromBase;
|
||||
SDValue FirstChain = FirstLoad->getChain();
|
||||
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
|
||||
// All loads must share the same chain.
|
||||
if (LoadNodes[i].MemNode->getChain() != FirstChain)
|
||||
break;
|
||||
|
||||
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
|
||||
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
|
||||
break;
|
||||
LastConsecutiveLoad = i;
|
||||
|
||||
if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
|
||||
isDereferenceable = false;
|
||||
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
|
||||
bool IsFastSt, IsFastLd;
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
|
||||
FirstLoadAlign, &IsFastLd) &&
|
||||
IsFastLd) {
|
||||
LastLegalVectorType = i + 1;
|
||||
}
|
||||
|
||||
// Find a legal type for the integer store.
|
||||
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
|
||||
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
|
||||
if (TLI.isTypeLegal(StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
|
@ -13826,112 +13819,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
|||
FirstLoadAlign, &IsFastLd) &&
|
||||
IsFastLd) {
|
||||
LastLegalIntegerType = i + 1;
|
||||
DoIntegerTruncate = true;
|
||||
DoIntegerTruncate = false;
|
||||
// Or check whether a truncstore and extload is legal.
|
||||
} else if (TLI.getTypeAction(Context, StoreTy) ==
|
||||
TargetLowering::TypePromoteInteger) {
|
||||
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
|
||||
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
|
||||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
|
||||
StoreTy) &&
|
||||
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
|
||||
StoreTy) &&
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
|
||||
FirstLoadAlign, &IsFastLd) &&
|
||||
IsFastLd) {
|
||||
LastLegalIntegerType = i + 1;
|
||||
DoIntegerTruncate = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only use vector types if the vector type is larger than the integer type.
|
||||
// If they are the same, use integers.
|
||||
bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
|
||||
unsigned LastLegalType =
|
||||
std::max(LastLegalVectorType, LastLegalIntegerType);
|
||||
// Only use vector types if the vector type is larger than the integer
|
||||
// type. If they are the same, use integers.
|
||||
bool UseVectorTy =
|
||||
LastLegalVectorType > LastLegalIntegerType && !NoVectors;
|
||||
unsigned LastLegalType =
|
||||
std::max(LastLegalVectorType, LastLegalIntegerType);
|
||||
|
||||
// We add +1 here because the LastXXX variables refer to location while
|
||||
// the NumElem refers to array/index size.
|
||||
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
|
||||
NumElem = std::min(LastLegalType, NumElem);
|
||||
// We add +1 here because the LastXXX variables refer to location while
|
||||
// the NumElem refers to array/index size.
|
||||
unsigned NumElem =
|
||||
std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
|
||||
NumElem = std::min(LastLegalType, NumElem);
|
||||
|
||||
if (NumElem < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have is if the alignment or either
|
||||
// the load or store has improved. Drop as many candidates as we
|
||||
// can here.
|
||||
unsigned NumSkip = 1;
|
||||
while ((NumSkip < LoadNodes.size()) &&
|
||||
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
|
||||
NumSkip++;
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
continue;
|
||||
}
|
||||
if (NumElem < 2) {
|
||||
// We know that candidate stores are in order and of correct
|
||||
// shape. While there is no mergeable sequence from the
|
||||
// beginning one may start later in the sequence. The only
|
||||
// reason a merge of size N could have failed where another of
|
||||
// the same size would not have is if the alignment or either
|
||||
// the load or store has improved. Drop as many candidates as we
|
||||
// can here.
|
||||
unsigned NumSkip = 1;
|
||||
while ((NumSkip < LoadNodes.size()) &&
|
||||
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
|
||||
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
|
||||
NumSkip++;
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
|
||||
NumConsecutiveStores -= NumSkip;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
|
||||
RootNode)) {
|
||||
// Check that we can merge these candidates without causing a cycle.
|
||||
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
|
||||
RootNode)) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
|
||||
NumConsecutiveStores -= NumElem;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find if it is better to use vectors or integers to load and store
|
||||
// to memory.
|
||||
EVT JointMemOpVT;
|
||||
if (UseVectorTy) {
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = NumElem * NumMemElts;
|
||||
JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
} else {
|
||||
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
|
||||
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
|
||||
}
|
||||
|
||||
SDLoc LoadDL(LoadNodes[0].MemNode);
|
||||
SDLoc StoreDL(StoreNodes[0].MemNode);
|
||||
|
||||
// The merged loads are required to have the same incoming chain, so
|
||||
// using the first's chain is acceptable.
|
||||
|
||||
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
|
||||
AddToWorklist(NewStoreChain.getNode());
|
||||
|
||||
MachineMemOperand::Flags MMOFlags =
|
||||
isDereferenceable ? MachineMemOperand::MODereferenceable
|
||||
: MachineMemOperand::MONone;
|
||||
|
||||
SDValue NewLoad, NewStore;
|
||||
if (UseVectorTy || !DoIntegerTruncate) {
|
||||
NewLoad =
|
||||
DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
|
||||
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
|
||||
FirstLoadAlign, MMOFlags);
|
||||
NewStore = DAG.getStore(
|
||||
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(), FirstStoreAlign);
|
||||
} else { // This must be the truncstore/extload case
|
||||
EVT ExtendedTy =
|
||||
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
|
||||
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
|
||||
FirstLoad->getChain(), FirstLoad->getBasePtr(),
|
||||
FirstLoad->getPointerInfo(), JointMemOpVT,
|
||||
FirstLoadAlign, MMOFlags);
|
||||
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
|
||||
FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(),
|
||||
JointMemOpVT, FirstInChain->getAlignment(),
|
||||
FirstInChain->getMemOperand()->getFlags());
|
||||
}
|
||||
|
||||
// Transfer chain users from old loads to the new load.
|
||||
for (unsigned i = 0; i < NumElem; ++i) {
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
|
||||
SDValue(NewLoad.getNode(), 1));
|
||||
}
|
||||
|
||||
// Replace the all stores with the new store. Recursively remove
|
||||
// corresponding value if its no longer used.
|
||||
for (unsigned i = 0; i < NumElem; ++i) {
|
||||
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
|
||||
CombineTo(StoreNodes[i].MemNode, NewStore);
|
||||
if (Val.getNode()->use_empty())
|
||||
recursivelyDeleteUnusedNodes(Val.getNode());
|
||||
}
|
||||
|
||||
RV = true;
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
continue;
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
|
||||
NumConsecutiveStores -= NumElem;
|
||||
}
|
||||
|
||||
// Find if it is better to use vectors or integers to load and store
|
||||
// to memory.
|
||||
EVT JointMemOpVT;
|
||||
if (UseVectorTy) {
|
||||
// Find a legal type for the vector store.
|
||||
unsigned Elts = NumElem * NumMemElts;
|
||||
JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
|
||||
} else {
|
||||
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
|
||||
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
|
||||
}
|
||||
|
||||
SDLoc LoadDL(LoadNodes[0].MemNode);
|
||||
SDLoc StoreDL(StoreNodes[0].MemNode);
|
||||
|
||||
// The merged loads are required to have the same incoming chain, so
|
||||
// using the first's chain is acceptable.
|
||||
|
||||
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
|
||||
AddToWorklist(NewStoreChain.getNode());
|
||||
|
||||
MachineMemOperand::Flags MMOFlags = isDereferenceable ?
|
||||
MachineMemOperand::MODereferenceable:
|
||||
MachineMemOperand::MONone;
|
||||
|
||||
SDValue NewLoad, NewStore;
|
||||
if (UseVectorTy || !DoIntegerTruncate) {
|
||||
NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
|
||||
FirstLoad->getBasePtr(),
|
||||
FirstLoad->getPointerInfo(), FirstLoadAlign,
|
||||
MMOFlags);
|
||||
NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
|
||||
FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(), FirstStoreAlign);
|
||||
} else { // This must be the truncstore/extload case
|
||||
EVT ExtendedTy =
|
||||
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
|
||||
NewLoad =
|
||||
DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
|
||||
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
|
||||
JointMemOpVT, FirstLoadAlign, MMOFlags);
|
||||
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
|
||||
FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(), JointMemOpVT,
|
||||
FirstInChain->getAlignment(),
|
||||
FirstInChain->getMemOperand()->getFlags());
|
||||
}
|
||||
|
||||
// Transfer chain users from old loads to the new load.
|
||||
for (unsigned i = 0; i < NumElem; ++i) {
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
|
||||
SDValue(NewLoad.getNode(), 1));
|
||||
}
|
||||
|
||||
// Replace the all stores with the new store. Recursively remove
|
||||
// corresponding value if its no longer used.
|
||||
for (unsigned i = 0; i < NumElem; ++i) {
|
||||
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
|
||||
CombineTo(StoreNodes[i].MemNode, NewStore);
|
||||
if (Val.getNode()->use_empty())
|
||||
recursivelyDeleteUnusedNodes(Val.getNode());
|
||||
}
|
||||
|
||||
RV = true;
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
|
||||
}
|
||||
return RV;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue