[DAG] Improve candidate pruning in store merge failure case. NFCI

During store merge we construct a sorted list of consecutive store
candidates and consider subsequences for merging into a single
store. For each subsequence we check if the stored value type is legal
the merged store would have valid and fast and if the constructed
value to be stored is valid. The only properties that affect this
check between subsequences is the size of the subsequence, the
alignment of the first store, the alignment of the stored load value
(when merging stores-of-loads), and whether the merged value is a
constant zero.

If we do not find a viable mergeable subsequence starting from the
first store of length N, we know that a subsequence starting at a
later store of length N will also fail unless the new store's
alignment, the new load's alignment (if we're merging store-of-loads),
or we've dropped stores of nonzero value and could construct a merged
stores of zero (for merging constants).

As a result if we fail to find a valid subsequence starting from the
first store we can safely skip considering subsequences that start
with subsequent stores unless one of the above properties is
true. This significantly (2x) improves compile time in some
pathological cases.

Reviewers: RKSimon, efriedma, zvi, spatel, waltl

Subscribers: grandinj, llvm-commits

Differential Revision: https://reviews.llvm.org/D35901

llvm-svn: 309830
This commit is contained in:
Nirav Dave 2017-08-02 16:35:58 +00:00
parent edd6c3587c
commit e44032f7e7
1 changed files with 62 additions and 21 deletions

View File

@ -12803,19 +12803,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
NonZero |= !C->isNullValue();
} else if (ConstantFPSDNode *C =
dyn_cast<ConstantFPSDNode>(StoredVal)) {
NonZero |= !C->getConstantFPValue()->isNullValue();
} else {
// Non-constant.
break;
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
IsElementZero = C->isNullValue();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
}
NonZero |= !IsElementZero;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
@ -12861,23 +12862,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
}
// Check if we found a legal integer type that creates a meaningful merge.
if (LastLegalType < 2 && LastLegalVectorType < 2) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
continue;
}
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
if (!Merged) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
// Check if we found a legal integer type that creates a meaningful merge.
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved or we've dropped a non-zero value. Drop as many
// candidates as we can here.
unsigned NumSkip = 1;
while (
(NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
NumSkip++;
}
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
RV |= Merged;
// Remove merged stores for next iteration.
RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
}
@ -12914,6 +12926,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumStoresToMerge = i + 1;
}
// Check if we found a legal integer type that creates a meaningful merge.
if (NumStoresToMerge < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, false, true, false);
if (!Merged) {
@ -13081,7 +13110,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have is if the alignment or either
// the load or store has improved. Drop as many candidates as we
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}