From aadf55d1cea24a4e5384ab8546c3d794cb1ec724 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 17 Sep 2020 11:08:26 +0300 Subject: [PATCH] [NFC] EliminateDuplicatePHINodes(): small-size optimization: if there are <= 32 PHI's, O(n^2) algo is faster (geomean -0.08%) This is functionally equivalent to the old implementation. As per https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=4739e6e4eb54d3736e6457249c0919b30f6c855a&stat=instructions this is a clear geomean compile-time regression-free win with overall geomean of `-0.08%` 32 PHI's appears to be the sweet spot; both the 16 and 64 performed worse: https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=c4efe1fbbfdf0305ac26cd19eacb0c7774cdf60e&stat=instructions https://llvm-compile-time-tracker.com/compare.php?from=5f4e9bf6416e45eba483a4e5e263749989fdb3b3&to=e4989d1c67010d3339d1a40ff5286a31f10cfe82&stat=instructions If we have more PHI's than that, we fall-back to the original DenseSet-based implementation, so the not-so-fast cases will still be handled. However compile-time isn't the main motivation here. I can name at least 3 limitations of this CSE: 1. Assumes that all PHI nodes have incoming basic blocks in the same order (can be fixed while keeping the DenseMap) 2. Does not special-handle `undef` incoming values (i don't see how we can do this with hashing) 3. Does not special-handle backedge incoming values (maybe can be fixed by hashing backedge as some magical value) Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87408 --- llvm/lib/Transforms/Utils/Local.cpp | 55 +++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0b848feddf8e..51e8251b2280 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -104,6 +104,12 @@ static cl::opt PHICSEDebugHash( cl::desc("Perform extra assertion checking to verify that PHINodes's hash " "function is well-behaved w.r.t. its isEqual predicate")); +static cl::opt PHICSENumPHISmallSize( + "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, + cl::desc( + "When the basic block contains not more than this number of PHI nodes, " + "perform a (faster!) exhaustive search instead of set-driven one.")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms static const unsigned BitPartRecursionMaxDepth = 64; @@ -1132,9 +1138,39 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -// WARNING: this logic must be kept in sync with -// Instruction::isIdenticalToWhenDefined()! -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { +static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + bool Changed = false; + + // Examine each PHI. + // Note that increment of I must *NOT* be in the iteration_expression, since + // we don't want to immediately advance when we restart from the beginning. + for (auto I = BB->begin(); PHINode *PN = dyn_cast(I);) { + ++I; + // Is there an identical PHI node in this basic block? + // Note that we only look in the upper square's triangle, + // we already checked that the lower triangle PHI's aren't identical. + for (auto J = I; PHINode *DuplicatePN = dyn_cast(J); ++J) { + if (!DuplicatePN->isIdenticalToWhenDefined(PN)) + continue; + // A duplicate. Replace this PHI with the base PHI. + ++NumPHICSEs; + DuplicatePN->replaceAllUsesWith(PN); + DuplicatePN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. + I = BB->begin(); + break; // Start over from the beginning. + } + } + return Changed; +} + +static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1152,6 +1188,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return PN == getEmptyKey() || PN == getTombstoneKey(); } + // WARNING: this logic must be kept in sync with + // Instruction::isIdenticalToWhenDefined()! static unsigned getHashValueImpl(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all @@ -1191,6 +1229,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // Set of unique PHINodes. DenseSet PHISet; + PHISet.reserve(4 * PHICSENumPHISmallSize); // Examine each PHI. bool Changed = false; @@ -1213,6 +1252,16 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return Changed; } +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + if ( +#ifndef NDEBUG + !PHICSEDebugHash && +#endif + hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) + return EliminateDuplicatePHINodesNaiveImpl(BB); + return EliminateDuplicatePHINodesSetBasedImpl(BB); +} + /// enforceKnownAlignment - If the specified pointer points to an object that /// we control, modify the object's alignment to PrefAlign. This isn't /// often possible though. If alignment is important, a more reliable approach