forked from OSchip/llvm-project
[BOLT] Fix implementation for TSP solution
Summary: Fix a bug in reconstruction of an optimal path. When calculating the best path we need to take into account a path from new "last" node to the current last node. Add "-tsp-threshold" (defaults to 10) to control when the TSP algorithm should be used. (cherry picked from FBD6253461)
This commit is contained in:
parent
624b2d984a
commit
69ddcfa5cb
|
@ -90,7 +90,7 @@ private:
|
|||
unsigned Index{InvalidIndex};
|
||||
|
||||
/// Index in the current layout.
|
||||
unsigned LayoutIndex{InvalidIndex};
|
||||
mutable unsigned LayoutIndex{InvalidIndex};
|
||||
|
||||
/// Number of pseudo instructions in this block.
|
||||
uint32_t NumPseudos{0};
|
||||
|
@ -778,6 +778,19 @@ public:
|
|||
/// Returns an estimate of size of basic block during run time.
|
||||
uint64_t estimateSize() const;
|
||||
|
||||
/// Return index in the current layout. The user is responsible for
|
||||
/// making sure the indices are up to date,
|
||||
/// e.g. by calling BinaryFunction::updateLayoutIndices();
|
||||
unsigned getLayoutIndex() const {
|
||||
assert(isValid());
|
||||
return LayoutIndex;
|
||||
}
|
||||
|
||||
/// Set layout index. To be used by BinaryFunction.
|
||||
void setLayoutIndex(unsigned Index) const {
|
||||
LayoutIndex = Index;
|
||||
}
|
||||
|
||||
private:
|
||||
void adjustNumPseudos(const MCInst &Inst, int Sign);
|
||||
|
||||
|
@ -815,19 +828,6 @@ private:
|
|||
void setIndex(unsigned I) {
|
||||
Index = I;
|
||||
}
|
||||
|
||||
/// Return index in the current layout. The user is responsible for
|
||||
/// making sure the indices are up to date,
|
||||
/// e.g. by calling BinaryFunction::updateLayoutIndices();
|
||||
unsigned getLayoutIndex() const {
|
||||
assert(isValid());
|
||||
return LayoutIndex;
|
||||
}
|
||||
|
||||
/// Set layout index. To be used by BinaryFunction.
|
||||
void setLayoutIndex(unsigned Index) {
|
||||
LayoutIndex = Index;
|
||||
}
|
||||
};
|
||||
|
||||
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
|
||||
|
|
|
@ -169,6 +169,15 @@ SplitEH("split-eh",
|
|||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
TSPThreshold("tsp-threshold",
|
||||
cl::desc("maximum number of hot basic blocks in a function for which to use "
|
||||
"a precise TSP solution while re-ordering basic blocks"),
|
||||
cl::init(10),
|
||||
cl::ZeroOrMore,
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
|
@ -389,8 +398,7 @@ void ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF,
|
|||
|
||||
if (Type == LT_REVERSE) {
|
||||
Algo.reset(new ReverseReorderAlgorithm());
|
||||
}
|
||||
else if (BF.size() <= FUNC_SIZE_THRESHOLD && Type != LT_OPTIMIZE_SHUFFLE) {
|
||||
} else if (BF.size() <= opts::TSPThreshold && Type != LT_OPTIMIZE_SHUFFLE) {
|
||||
// Work on optimal solution if problem is small enough
|
||||
DEBUG(dbgs() << "finding optimal block layout for " << BF << "\n");
|
||||
Algo.reset(new OptimalReorderAlgorithm());
|
||||
|
|
|
@ -174,10 +174,6 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
// Function size, in number of BBs, above which we fallback to a heuristic
|
||||
// solution to the layout problem instead of seeking the optimal one.
|
||||
static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10;
|
||||
|
||||
void modifyFunctionLayout(BinaryFunction &Function,
|
||||
LayoutType Type,
|
||||
bool MinBranchClusters,
|
||||
|
|
|
@ -396,24 +396,26 @@ void MinBranchGreedyClusterAlgorithm::reset() {
|
|||
}
|
||||
|
||||
void OptimalReorderAlgorithm::reorderBasicBlocks(
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
std::vector<std::vector<uint64_t>> Weight;
|
||||
std::unordered_map<const BinaryBasicBlock *, int> BBToIndex;
|
||||
std::vector<BinaryBasicBlock *> IndexToBB;
|
||||
|
||||
unsigned N = BF.layout_size();
|
||||
const auto N = BF.layout_size();
|
||||
assert(N <= std::numeric_limits<uint64_t>::digits &&
|
||||
"cannot use TSP solution for sizes larger than bits in uint64_t");
|
||||
|
||||
// Populating weight map and index map
|
||||
for (auto BB : BF.layout()) {
|
||||
BBToIndex[BB] = IndexToBB.size();
|
||||
for (auto *BB : BF.layout()) {
|
||||
BB->setLayoutIndex(IndexToBB.size());
|
||||
IndexToBB.push_back(BB);
|
||||
}
|
||||
Weight.resize(N);
|
||||
for (auto BB : BF.layout()) {
|
||||
for (auto *BB : BF.layout()) {
|
||||
auto BI = BB->branch_info_begin();
|
||||
Weight[BBToIndex[BB]].resize(N);
|
||||
for (auto I : BB->successors()) {
|
||||
Weight[BB->getLayoutIndex()].resize(N);
|
||||
for (auto *SuccBB : BB->successors()) {
|
||||
if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count;
|
||||
Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
|
||||
++BI;
|
||||
}
|
||||
}
|
||||
|
@ -427,26 +429,26 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
|
|||
DP[1][0] = 0;
|
||||
// Walk through TSP solutions using a bitmask to represent state (current set
|
||||
// of BBs in the layout)
|
||||
unsigned BestSet = 1;
|
||||
unsigned BestLast = 0;
|
||||
uint64_t BestSet = 1;
|
||||
uint64_t BestLast = 0;
|
||||
int64_t BestWeight = 0;
|
||||
for (unsigned Set = 1; Set < (1U << N); ++Set) {
|
||||
for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
|
||||
// Traverse each possibility of Last BB visited in this layout
|
||||
for (unsigned Last = 0; Last < N; ++Last) {
|
||||
for (uint64_t Last = 0; Last < N; ++Last) {
|
||||
// Case 1: There is no possible layout with this BB as Last
|
||||
if (DP[Set][Last] == -1)
|
||||
continue;
|
||||
|
||||
// Case 2: There is a layout with this Set and this Last, and we try
|
||||
// to expand this set with New
|
||||
for (unsigned New = 1; New < N; ++New) {
|
||||
for (uint64_t New = 1; New < N; ++New) {
|
||||
// Case 2a: BB "New" is already in this Set
|
||||
if ((Set & (1 << New)) != 0)
|
||||
if ((Set & (1ULL << New)) != 0)
|
||||
continue;
|
||||
|
||||
// Case 2b: BB "New" is not in this set and we add it to this Set and
|
||||
// record total weight of this layout with "New" as the last BB.
|
||||
unsigned NewSet = (Set | (1 << New));
|
||||
uint64_t NewSet = (Set | (1ULL << New));
|
||||
if (DP[NewSet][New] == -1)
|
||||
DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
|
||||
DP[NewSet][New] = std::max(DP[NewSet][New],
|
||||
|
@ -462,38 +464,42 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
|
|||
}
|
||||
|
||||
// Define final function layout based on layout that maximizes weight
|
||||
unsigned Last = BestLast;
|
||||
unsigned Set = BestSet;
|
||||
uint64_t Last = BestLast;
|
||||
uint64_t Set = BestSet;
|
||||
std::vector<bool> Visited;
|
||||
Visited.resize(N);
|
||||
Visited[Last] = true;
|
||||
Order.push_back(IndexToBB[Last]);
|
||||
Set = Set & ~(1U << Last);
|
||||
Set = Set & ~(1ULL << Last);
|
||||
while (Set != 0) {
|
||||
int64_t Best = -1;
|
||||
for (unsigned I = 0; I < N; ++I) {
|
||||
uint64_t NewLast;
|
||||
for (uint64_t I = 0; I < N; ++I) {
|
||||
if (DP[Set][I] == -1)
|
||||
continue;
|
||||
if (DP[Set][I] > Best) {
|
||||
Last = I;
|
||||
Best = DP[Set][I];
|
||||
int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
|
||||
if (DP[Set][I] + AdjWeight > Best) {
|
||||
NewLast = I;
|
||||
Best = DP[Set][I] + AdjWeight;
|
||||
}
|
||||
}
|
||||
Last = NewLast;
|
||||
Visited[Last] = true;
|
||||
Order.push_back(IndexToBB[Last]);
|
||||
Set = Set & ~(1U << Last);
|
||||
Set = Set & ~(1ULL << Last);
|
||||
}
|
||||
std::reverse(Order.begin(), Order.end());
|
||||
|
||||
// Finalize layout with BBs that weren't assigned to the layout
|
||||
for (auto BB : BF.layout()) {
|
||||
if (Visited[BBToIndex[BB]] == false)
|
||||
// Finalize layout with BBs that weren't assigned to the layout using the
|
||||
// input layout.
|
||||
for (auto *BB : BF.layout()) {
|
||||
if (Visited[BB->getLayoutIndex()] == false)
|
||||
Order.push_back(BB);
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeReorderAlgorithm::reorderBasicBlocks(
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
if (BF.layout_empty())
|
||||
return;
|
||||
|
||||
|
@ -509,7 +515,7 @@ void OptimizeReorderAlgorithm::reorderBasicBlocks(
|
|||
}
|
||||
|
||||
void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
const BinaryFunction &BF, BasicBlockOrder &Order) const {
|
||||
if (BF.layout_empty())
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in New Issue