diff --git a/bolt/src/Passes/BinaryPasses.cpp b/bolt/src/Passes/BinaryPasses.cpp index 1ee58c0cbb92..569598c5ae18 100644 --- a/bolt/src/Passes/BinaryPasses.cpp +++ b/bolt/src/Passes/BinaryPasses.cpp @@ -481,7 +481,7 @@ void ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF, break; case LT_OPTIMIZE_CACHE_PLUS: - Algo.reset(new CachePlusReorderAlgorithm(std::move(CAlgo))); + Algo.reset(new CachePlusReorderAlgorithm()); break; case LT_OPTIMIZE_SHUFFLE: diff --git a/bolt/src/Passes/CachePlusReorderAlgorithm.cpp b/bolt/src/Passes/CachePlusReorderAlgorithm.cpp index 26d35f00011b..1e99792bfe53 100644 --- a/bolt/src/Passes/CachePlusReorderAlgorithm.cpp +++ b/bolt/src/Passes/CachePlusReorderAlgorithm.cpp @@ -82,7 +82,7 @@ public: return Blocks; } - /// Update the list of basic blocks and meta-info + /// Update the list of basic blocks and aggregated cluster data void merge(const Cluster *Other, const std::vector &MergedBlocks, double MergedScore) { @@ -93,6 +93,10 @@ public: Score = MergedScore; } + void clear() { + Blocks.clear(); + } + private: std::vector Blocks; size_t Id; @@ -219,65 +223,14 @@ public: /// Run cache+ algorithm and return a basic block ordering std::vector run() { - // Merge blocks with their fallthrough successors - for (auto BB : BF.layout()) { - if (FallthroughPred[BB->getLayoutIndex()] == nullptr && - FallthroughSucc[BB->getLayoutIndex()] != nullptr) { - auto CurBB = BB; - while (FallthroughSucc[CurBB->getLayoutIndex()] != nullptr) { - const auto NextBB = FallthroughSucc[CurBB->getLayoutIndex()]; - mergeClusters(&AllClusters[BB->getLayoutIndex()], - &AllClusters[NextBB->getLayoutIndex()], - 0); - CurBB = NextBB; - } - } - } + // Pass 1: Merge blocks with their fallthrough successors + mergeFallthroughs(); - // Merge pairs of clusters while there is an improvement in ExtTSP metric - while (Clusters.size() > 1) { - Cluster *BestClusterPred = nullptr; - Cluster *BestClusterSucc = nullptr; - std::pair BestGain(-1, 0); - for (auto ClusterPred : Clusters) { - // Do not merge cold blocks - if (ClusterPred->isCold()) - continue; + // Pass 2: Merge pairs of clusters while improving the ExtTSP metric + mergeClusterPairs(); - // Get candidates for merging with the current cluster - Adjacent.forAllAdjacent( - ClusterPred, - // Find the best candidate - [&](Cluster *ClusterSucc) { - assert(ClusterPred != ClusterSucc && "loop edges are not supported"); - assert(!ClusterSucc->isCold() && "cannot merge cold clusters"); - - // Compute the gain of merging two clusters - auto Gain = mergeGain(ClusterPred, ClusterSucc); - if (Gain.first <= 0.0) - return; - - // Breaking ties by density to make the hottest clusters be merged first - if (Gain.first > BestGain.first || - (std::abs(Gain.first - BestGain.first) < 1e-8 && - compareClusterPairs(ClusterPred, - ClusterSucc, - BestClusterPred, - BestClusterSucc))) { - BestGain = Gain; - BestClusterPred = ClusterPred; - BestClusterSucc = ClusterSucc; - } - }); - } - - // Stop merging when there is no improvement - if (BestGain.first <= 0.0) - break; - - // Merge the best pair of clusters - mergeClusters(BestClusterPred, BestClusterSucc, BestGain.second); - } + // Pass 3: Merge cold blocks to reduce code size + mergeColdClusters(); // Sorting clusters by density std::stable_sort(Clusters.begin(), Clusters.end(), compareClusters); @@ -339,12 +292,14 @@ private: // Initialize clusters Clusters.reserve(BF.layout_size()); AllClusters.reserve(BF.layout_size()); + CurCluster.reserve(BF.layout_size()); Size.reserve(BF.layout_size()); for (auto BB : BF.layout()) { size_t Index = BB->getLayoutIndex(); Size.push_back(std::max(BB->estimateSize(), size_t(1))); AllClusters.emplace_back(BB, ExecutionCounts[Index], Size[Index]); Clusters.push_back(&AllClusters[Index]); + CurCluster.push_back(&AllClusters[Index]); } // Initialize adjacency matrix @@ -364,6 +319,88 @@ private: findFallthroughBlocks(InWeight, OutWeight); } + /// Merge blocks with their fallthrough successors. + void mergeFallthroughs() { + for (auto BB : BF.layout()) { + if (FallthroughPred[BB->getLayoutIndex()] == nullptr && + FallthroughSucc[BB->getLayoutIndex()] != nullptr) { + auto CurBB = BB; + while (FallthroughSucc[CurBB->getLayoutIndex()] != nullptr) { + const auto NextBB = FallthroughSucc[CurBB->getLayoutIndex()]; + mergeClusters(&AllClusters[BB->getLayoutIndex()], + &AllClusters[NextBB->getLayoutIndex()], + 0); + CurBB = NextBB; + } + } + } + } + + /// Merge pairs of clusters while improving the ExtTSP metric + void mergeClusterPairs() { + while (Clusters.size() > 1) { + Cluster *BestClusterPred = nullptr; + Cluster *BestClusterSucc = nullptr; + std::pair BestGain(-1, 0); + for (auto ClusterPred : Clusters) { + // Do not merge cold blocks + if (ClusterPred->isCold()) + continue; + + // Get candidates for merging with the current cluster + Adjacent.forAllAdjacent( + ClusterPred, + // Find the best candidate + [&](Cluster *ClusterSucc) { + assert(ClusterPred != ClusterSucc && "loop edges are not supported"); + assert(!ClusterSucc->isCold() && "cannot merge cold clusters"); + + // Compute the gain of merging two clusters + auto Gain = mergeGain(ClusterPred, ClusterSucc); + if (Gain.first <= 0.0) + return; + + // Breaking ties by density to make the hottest clusters be merged first + if (Gain.first > BestGain.first || + (std::abs(Gain.first - BestGain.first) < 1e-8 && + compareClusterPairs(ClusterPred, + ClusterSucc, + BestClusterPred, + BestClusterSucc))) { + BestGain = Gain; + BestClusterPred = ClusterPred; + BestClusterSucc = ClusterSucc; + } + }); + } + + // Stop merging when there is no improvement + if (BestGain.first <= 0.0) + break; + + // Merge the best pair of clusters + mergeClusters(BestClusterPred, BestClusterSucc, BestGain.second); + } + } + + /// Merge cold blocks to reduce code size + void mergeColdClusters() { + for (auto SrcBB : BF.layout()) { + // Iterating in reverse order to make sure original fall-trough jumps are + // merged first + for (auto Itr = SrcBB->succ_rbegin(); Itr != SrcBB->succ_rend(); ++Itr) { + BinaryBasicBlock *DstBB = *Itr; + auto SrcCluster = CurCluster[SrcBB->getLayoutIndex()]; + auto DstCluster = CurCluster[DstBB->getLayoutIndex()]; + if (SrcCluster != DstCluster && !DstCluster->isEntryPoint() && + SrcCluster->blocks().back() == SrcBB && + DstCluster->blocks().front() == DstBB) { + mergeClusters(SrcCluster, DstCluster, 0); + } + } + } + } + /// For a pair of blocks, A and B, block B is the fallthrough successor of A, /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps /// to B are from A. Such blocks should be adjacent in an optimal ordering, @@ -558,11 +595,17 @@ private: // Merge the blocks of clusters auto MergedBlocks = mergeBlocks(Into->blocks(), From->blocks(), MergeType); Into->merge(From, MergedBlocks.getBlocks(), score(MergedBlocks)); + From->clear(); // Remove cluster From from the list of active clusters auto Iter = std::remove(Clusters.begin(), Clusters.end(), From); Clusters.erase(Iter, Clusters.end()); + // Update block clusters + for (auto BB : Into->blocks()) { + CurCluster[BB->getLayoutIndex()] = Into; + } + // Invalidate caches Cache.invalidate(Into); @@ -582,6 +625,9 @@ private: // Active clusters. The vector gets udpated at runtime when clusters are merged std::vector Clusters; + // Current cluster of a basic block + std::vector CurCluster; + // Size of the block std::vector Size; diff --git a/bolt/src/Passes/HFSortPlus.cpp b/bolt/src/Passes/HFSortPlus.cpp index e02c965c2b2d..6bb9dfbae9a0 100644 --- a/bolt/src/Passes/HFSortPlus.cpp +++ b/bolt/src/Passes/HFSortPlus.cpp @@ -9,24 +9,6 @@ // //===----------------------------------------------------------------------===// -// TODO: copyright/license msg. - -/* - +----------------------------------------------------------------------+ - | HipHop for PHP | - +----------------------------------------------------------------------+ - | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | - +----------------------------------------------------------------------+ - | This source file is subject to version 3.01 of the PHP license, | - | that is bundled with this package in the file LICENSE, and is | - | available through the world-wide-web at the following url: | - | http://www.php.net/license/3_01.txt | - | If you did not receive a copy of the PHP license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@php.net so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ -*/ - #include "BinaryFunction.h" #include "HFSort.h" #include "ReorderUtils.h" @@ -112,14 +94,6 @@ bool compareClusterPairs(const Cluster *A1, const Cluster *B1, return A1->target(0) < A2->target(0); } -/// Sorting clusters by their density in decreasing order. -template -std::vector sortByDensity(const C &Clusters_) { - std::vector Clusters(Clusters_.begin(), Clusters_.end()); - std::stable_sort(Clusters.begin(), Clusters.end(), compareClusters); - return Clusters; -} - /// HFSortPlus - layout of hot functions with iTLB cache optimization /// /// Given an ordering of hot functions (and hence, their assignment to the @@ -398,15 +372,17 @@ public: DEBUG(dbgs() << "Completed hfsort+ with " << Clusters.size() << " clusters\n"); + // Sorting clusters by density in decreasing order + std::stable_sort(Clusters.begin(), Clusters.end(), compareClusters); + // Return the set of clusters that are left, which are the ones that // didn't get merged (so their first func is its original func) std::vector Result; - for (auto Cluster : sortByDensity(Clusters)) { + Result.reserve(Clusters.size()); + for (auto Cluster : Clusters) { Result.emplace_back(std::move(*Cluster)); } - assert(std::is_sorted(Result.begin(), Result.end(), compareClustersDensity)); - return Result; } @@ -473,6 +449,7 @@ private: Adjacent.merge(Into, From); Into->merge(*From); + From->clear(); // Update the clusters and addresses for functions merged from From. size_t CurAddr = 0; diff --git a/bolt/src/Passes/ReorderAlgorithm.h b/bolt/src/Passes/ReorderAlgorithm.h index 5be8a93f6f1f..29c300c02ff9 100644 --- a/bolt/src/Passes/ReorderAlgorithm.h +++ b/bolt/src/Passes/ReorderAlgorithm.h @@ -246,10 +246,6 @@ public: /// A new reordering algorithm for basic blocks, cache+ class CachePlusReorderAlgorithm : public ReorderAlgorithm { public: - explicit CachePlusReorderAlgorithm( - std::unique_ptr CAlgo) : - ReorderAlgorithm(std::move(CAlgo)) { } - void reorderBasicBlocks( const BinaryFunction &BF, BasicBlockOrder &Order) const override; };