diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index 08a6a7e142e6..f619b385d1bf 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -90,7 +90,7 @@ private: unsigned Index{InvalidIndex}; /// Index in the current layout. - unsigned LayoutIndex{InvalidIndex}; + mutable unsigned LayoutIndex{InvalidIndex}; /// Number of pseudo instructions in this block. uint32_t NumPseudos{0}; @@ -778,6 +778,19 @@ public: /// Returns an estimate of size of basic block during run time. uint64_t estimateSize() const; + /// Return index in the current layout. The user is responsible for + /// making sure the indices are up to date, + /// e.g. by calling BinaryFunction::updateLayoutIndices(); + unsigned getLayoutIndex() const { + assert(isValid()); + return LayoutIndex; + } + + /// Set layout index. To be used by BinaryFunction. + void setLayoutIndex(unsigned Index) const { + LayoutIndex = Index; + } + private: void adjustNumPseudos(const MCInst &Inst, int Sign); @@ -815,19 +828,6 @@ private: void setIndex(unsigned I) { Index = I; } - - /// Return index in the current layout. The user is responsible for - /// making sure the indices are up to date, - /// e.g. by calling BinaryFunction::updateLayoutIndices(); - unsigned getLayoutIndex() const { - assert(isValid()); - return LayoutIndex; - } - - /// Set layout index. To be used by BinaryFunction. - void setLayoutIndex(unsigned Index) { - LayoutIndex = Index; - } }; bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); diff --git a/bolt/Passes/BinaryPasses.cpp b/bolt/Passes/BinaryPasses.cpp index 43021aa13d01..4dc00fdc3805 100644 --- a/bolt/Passes/BinaryPasses.cpp +++ b/bolt/Passes/BinaryPasses.cpp @@ -169,6 +169,15 @@ SplitEH("split-eh", cl::Hidden, cl::cat(BoltOptCategory)); +static cl::opt +TSPThreshold("tsp-threshold", + cl::desc("maximum number of hot basic blocks in a function for which to use " + "a precise TSP solution while re-ordering basic blocks"), + cl::init(10), + cl::ZeroOrMore, + cl::Hidden, + cl::cat(BoltOptCategory)); + } // namespace opts namespace llvm { @@ -389,8 +398,7 @@ void ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF, if (Type == LT_REVERSE) { Algo.reset(new ReverseReorderAlgorithm()); - } - else if (BF.size() <= FUNC_SIZE_THRESHOLD && Type != LT_OPTIMIZE_SHUFFLE) { + } else if (BF.size() <= opts::TSPThreshold && Type != LT_OPTIMIZE_SHUFFLE) { // Work on optimal solution if problem is small enough DEBUG(dbgs() << "finding optimal block layout for " << BF << "\n"); Algo.reset(new OptimalReorderAlgorithm()); diff --git a/bolt/Passes/BinaryPasses.h b/bolt/Passes/BinaryPasses.h index 1c5539199063..5cf91bc387d2 100644 --- a/bolt/Passes/BinaryPasses.h +++ b/bolt/Passes/BinaryPasses.h @@ -174,10 +174,6 @@ public: }; private: - // Function size, in number of BBs, above which we fallback to a heuristic - // solution to the layout problem instead of seeking the optimal one. - static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10; - void modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, bool MinBranchClusters, diff --git a/bolt/Passes/ReorderAlgorithm.cpp b/bolt/Passes/ReorderAlgorithm.cpp index b475b6f58bc9..6956a8207ba8 100644 --- a/bolt/Passes/ReorderAlgorithm.cpp +++ b/bolt/Passes/ReorderAlgorithm.cpp @@ -396,24 +396,26 @@ void MinBranchGreedyClusterAlgorithm::reset() { } void OptimalReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + const BinaryFunction &BF, BasicBlockOrder &Order) const { std::vector> Weight; - std::unordered_map BBToIndex; std::vector IndexToBB; - unsigned N = BF.layout_size(); + const auto N = BF.layout_size(); + assert(N <= std::numeric_limits::digits && + "cannot use TSP solution for sizes larger than bits in uint64_t"); + // Populating weight map and index map - for (auto BB : BF.layout()) { - BBToIndex[BB] = IndexToBB.size(); + for (auto *BB : BF.layout()) { + BB->setLayoutIndex(IndexToBB.size()); IndexToBB.push_back(BB); } Weight.resize(N); - for (auto BB : BF.layout()) { + for (auto *BB : BF.layout()) { auto BI = BB->branch_info_begin(); - Weight[BBToIndex[BB]].resize(N); - for (auto I : BB->successors()) { + Weight[BB->getLayoutIndex()].resize(N); + for (auto *SuccBB : BB->successors()) { if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) - Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count; + Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count; ++BI; } } @@ -427,26 +429,26 @@ void OptimalReorderAlgorithm::reorderBasicBlocks( DP[1][0] = 0; // Walk through TSP solutions using a bitmask to represent state (current set // of BBs in the layout) - unsigned BestSet = 1; - unsigned BestLast = 0; + uint64_t BestSet = 1; + uint64_t BestLast = 0; int64_t BestWeight = 0; - for (unsigned Set = 1; Set < (1U << N); ++Set) { + for (uint64_t Set = 1; Set < (1ULL << N); ++Set) { // Traverse each possibility of Last BB visited in this layout - for (unsigned Last = 0; Last < N; ++Last) { + for (uint64_t Last = 0; Last < N; ++Last) { // Case 1: There is no possible layout with this BB as Last if (DP[Set][Last] == -1) continue; // Case 2: There is a layout with this Set and this Last, and we try // to expand this set with New - for (unsigned New = 1; New < N; ++New) { + for (uint64_t New = 1; New < N; ++New) { // Case 2a: BB "New" is already in this Set - if ((Set & (1 << New)) != 0) + if ((Set & (1ULL << New)) != 0) continue; // Case 2b: BB "New" is not in this set and we add it to this Set and // record total weight of this layout with "New" as the last BB. - unsigned NewSet = (Set | (1 << New)); + uint64_t NewSet = (Set | (1ULL << New)); if (DP[NewSet][New] == -1) DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New]; DP[NewSet][New] = std::max(DP[NewSet][New], @@ -462,38 +464,42 @@ void OptimalReorderAlgorithm::reorderBasicBlocks( } // Define final function layout based on layout that maximizes weight - unsigned Last = BestLast; - unsigned Set = BestSet; + uint64_t Last = BestLast; + uint64_t Set = BestSet; std::vector Visited; Visited.resize(N); Visited[Last] = true; Order.push_back(IndexToBB[Last]); - Set = Set & ~(1U << Last); + Set = Set & ~(1ULL << Last); while (Set != 0) { int64_t Best = -1; - for (unsigned I = 0; I < N; ++I) { + uint64_t NewLast; + for (uint64_t I = 0; I < N; ++I) { if (DP[Set][I] == -1) continue; - if (DP[Set][I] > Best) { - Last = I; - Best = DP[Set][I]; + int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0; + if (DP[Set][I] + AdjWeight > Best) { + NewLast = I; + Best = DP[Set][I] + AdjWeight; } } + Last = NewLast; Visited[Last] = true; Order.push_back(IndexToBB[Last]); - Set = Set & ~(1U << Last); + Set = Set & ~(1ULL << Last); } std::reverse(Order.begin(), Order.end()); - // Finalize layout with BBs that weren't assigned to the layout - for (auto BB : BF.layout()) { - if (Visited[BBToIndex[BB]] == false) + // Finalize layout with BBs that weren't assigned to the layout using the + // input layout. + for (auto *BB : BF.layout()) { + if (Visited[BB->getLayoutIndex()] == false) Order.push_back(BB); } } void OptimizeReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + const BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.layout_empty()) return; @@ -509,7 +515,7 @@ void OptimizeReorderAlgorithm::reorderBasicBlocks( } void OptimizeBranchReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + const BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.layout_empty()) return;