diff --git a/bolt/src/BinaryBasicBlock.h b/bolt/src/BinaryBasicBlock.h index 6cbabaf29c52..9093ddaef864 100644 --- a/bolt/src/BinaryBasicBlock.h +++ b/bolt/src/BinaryBasicBlock.h @@ -365,14 +365,24 @@ public: return getSuccessor(); } - const BinaryBranchInfo &getBranchInfo(bool Condition) const { + /// Return branch info corresponding to a taken branch. + const BinaryBranchInfo &getTakenBranchInfo() const { assert(BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"); - return BranchInfo[Condition == true ? 0 : 1]; + return BranchInfo[0]; }; + /// Return branch info corresponding to a fall-through branch. + const BinaryBranchInfo &getFallthroughBranchInfo() const { + assert(BranchInfo.size() == 2 && + "could only be called for blocks with 2 successors"); + return BranchInfo[1]; + }; + + /// Return branch info corresponding to an edge going to \p Succ basic block. BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ); + /// Set branch information for the outgoing edge to block \p Succ. void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count, uint64_t MispredictedCount) { diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index 68c909872f01..0220a2f7219e 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -3962,11 +3962,11 @@ DynoStats BinaryFunction::getDynoStats() const { } // Conditional branch that could be followed by an unconditional branch. - uint64_t TakenCount = BB->getBranchInfo(true).Count; + auto TakenCount = BB->getTakenBranchInfo().Count; if (TakenCount == COUNT_NO_PROFILE) TakenCount = 0; - uint64_t NonTakenCount = BB->getBranchInfo(false).Count; + auto NonTakenCount = BB->getFallthroughBranchInfo().Count; if (NonTakenCount == COUNT_NO_PROFILE) NonTakenCount = 0; diff --git a/bolt/src/Passes/BinaryPasses.cpp b/bolt/src/Passes/BinaryPasses.cpp index 3367ca4f6d3e..e42a8551417d 100644 --- a/bolt/src/Passes/BinaryPasses.cpp +++ b/bolt/src/Passes/BinaryPasses.cpp @@ -904,7 +904,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, // Record this block so that we don't try to optimize it twice. BeenOptimized.insert(PredBB); - bool BranchForStats; + uint64_t Count = 0; if (CondSucc != BB) { // Patch the new target address into the conditional branch. MIB->reverseBranchCondition(*CondBranch, CalleeSymbol, BC.Ctx.get()); @@ -913,13 +913,12 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, // branch to the old target. This has to be done manually since // fixupBranches is not called after SCTC. NeedsUncondBranch.emplace_back(std::make_pair(PredBB, CondSucc)); - BranchForStats = false; + Count = PredBB->getFallthroughBranchInfo().Count; } else { // Change destination of the conditional branch. MIB->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get()); - BranchForStats = true; + Count = PredBB->getTakenBranchInfo().Count; } - const auto Count = PredBB->getBranchInfo(BranchForStats).Count; const uint64_t CTCTakenFreq = Count == BinaryBasicBlock::COUNT_NO_PROFILE ? 0 : Count; diff --git a/bolt/src/Passes/IndirectCallPromotion.cpp b/bolt/src/Passes/IndirectCallPromotion.cpp index 3fd79ac1eb38..43071b2dab4c 100644 --- a/bolt/src/Passes/IndirectCallPromotion.cpp +++ b/bolt/src/Passes/IndirectCallPromotion.cpp @@ -740,92 +740,67 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG( // Scale indirect call counts to the execution count of the original // basic block containing the indirect call. + auto TotalCount = IndCallBlock->getKnownExecutionCount(); uint64_t TotalIndirectBranches = 0; - uint64_t TotalIndirectMispreds = 0; - for (const auto &BI : Targets) { - TotalIndirectBranches += BI.Branches; - TotalIndirectMispreds += BI.Mispreds; + for (const auto &Target : Targets) { + TotalIndirectBranches += Target.Branches; } - - uint64_t TotalCount = 0; - uint64_t TotalMispreds = 0; - - if (Function.hasValidProfile()) { - TotalCount = IndCallBlock->getExecutionCount(); - TotalMispreds = - TotalCount * ((double)TotalIndirectMispreds / TotalIndirectBranches); - assert(TotalCount != BinaryBasicBlock::COUNT_NO_PROFILE); - } - - // New BinaryBranchInfo scaled to the execution count of the original BB. std::vector BBI; - for (auto Itr = Targets.begin(); Itr != Targets.end(); ++Itr) { - const auto BranchPct = (double)Itr->Branches / TotalIndirectBranches; - const auto MispredPct = - (double)Itr->Mispreds / std::max(TotalIndirectMispreds, 1ul); - if (Itr->JTIndex.empty()) { - BBI.push_back(BinaryBranchInfo{uint64_t(TotalCount * BranchPct), - uint64_t(TotalMispreds * MispredPct)}); - continue; - } - for (size_t I = 0, E = Itr->JTIndex.size(); I != E; ++I) { - BBI.push_back( - BinaryBranchInfo{uint64_t(TotalCount * (BranchPct / E)), - uint64_t(TotalMispreds * (MispredPct / E))}); + std::vector ScaledBBI; + for (const auto &Target : Targets) { + const auto NumEntries = std::max(1UL, Target.JTIndex.size()); + for (size_t I = 0; I < NumEntries; ++I) { + BBI.push_back(BinaryBranchInfo{Target.Branches / NumEntries, + Target.Mispreds / NumEntries}); + ScaledBBI.push_back(BinaryBranchInfo{ + uint64_t(TotalCount * Target.Branches / + (NumEntries * TotalIndirectBranches)), + uint64_t(TotalCount * Target.Mispreds / + (NumEntries * TotalIndirectBranches))}); } } - auto BI = BBI.begin(); - auto updateCurrentBranchInfo = [&]{ - assert(BI < BBI.end()); - TotalCount -= BI->Count; - TotalMispreds -= BI->MispredictedCount; - ++BI; - }; - if (IsJumpTable) { - IndCallBlock->moveAllSuccessorsTo(NewBBs.back().get()); + auto *NewIndCallBlock = NewBBs.back().get(); + IndCallBlock->moveAllSuccessorsTo(NewIndCallBlock); std::vector SymTargets; - for (size_t I = 0; I < Targets.size(); ++I) { - assert(Targets[I].To.Sym); - if (Targets[I].JTIndex.empty()) - SymTargets.push_back(Targets[I].To.Sym); - else { - for (size_t Idx = 0, E = Targets[I].JTIndex.size(); Idx != E; ++Idx) { - SymTargets.push_back(Targets[I].To.Sym); - } + for (const auto &Target : Targets) { + const auto NumEntries = std::max(1UL, Target.JTIndex.size()); + for (size_t I = 0; I < NumEntries; ++I) { + SymTargets.push_back(Target.To.Sym); } } assert(SymTargets.size() > NewBBs.size() - 1 && "There must be a target symbol associated with each new BB."); - // Fix up successors and execution counts. - updateCurrentBranchInfo(); - auto *Succ = Function.getBasicBlockForLabel(SymTargets[0]); - assert(Succ && "each jump target must be a legal BB label"); - IndCallBlock->addSuccessor(Succ, BBI[0]); // cond branch - IndCallBlock->addSuccessor(NewBBs[0].get(), TotalCount); // fallthru branch + for (uint64_t I = 0; I < NewBBs.size(); ++I) { + BinaryBasicBlock *SourceBB = I ? NewBBs[I - 1].get() : IndCallBlock; + SourceBB->setExecutionCount(TotalCount); - for (size_t I = 0; I < NewBBs.size() - 1; ++I) { - assert(TotalCount <= IndCallBlock->getExecutionCount() || - TotalCount <= uint64_t(TotalIndirectBranches)); - uint64_t ExecCount = BBI[I+1].Count; - updateCurrentBranchInfo(); - auto *Succ = Function.getBasicBlockForLabel(SymTargets[I+1]); - assert(Succ && "each jump target must be a legal BB label"); - NewBBs[I]->addSuccessor(Succ, BBI[I+1]); - NewBBs[I]->addSuccessor(NewBBs[I+1].get(), TotalCount); // fallthru - ExecCount += TotalCount; - NewBBs[I]->setCanOutline(IndCallBlock->canOutline()); - NewBBs[I]->setIsCold(IndCallBlock->isCold()); - NewBBs[I]->setExecutionCount(ExecCount); + auto *TargetBB = Function.getBasicBlockForLabel(SymTargets[I]); + SourceBB->addSuccessor(TargetBB, ScaledBBI[I]); // taken + + TotalCount -= ScaledBBI[I].Count; + SourceBB->addSuccessor(NewBBs[I].get(), TotalCount); // fall-through + + // Update branch info for the indirect jump. + auto &BranchInfo = NewIndCallBlock->getBranchInfo(*TargetBB); + BranchInfo.Count -= BBI[I].Count; + BranchInfo.MispredictedCount -= BBI[I].MispredictedCount; } } else { assert(NewBBs.size() >= 2); assert(NewBBs.size() % 2 == 1 || IndCallBlock->succ_empty()); assert(NewBBs.size() % 2 == 1 || IsTailCall); + auto ScaledBI = ScaledBBI.begin(); + auto updateCurrentBranchInfo = [&]{ + assert(ScaledBI != ScaledBBI.end()); + TotalCount -= ScaledBI->Count; + ++ScaledBI; + }; + if (!IsTailCall) { MergeBlock = NewBBs.back().get(); IndCallBlock->moveAllSuccessorsTo(MergeBlock); @@ -833,25 +808,23 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG( // Fix up successors and execution counts. updateCurrentBranchInfo(); - IndCallBlock->addSuccessor(NewBBs[1].get(), TotalCount); // cond branch - IndCallBlock->addSuccessor(NewBBs[0].get(), BBI[0]); // uncond branch + IndCallBlock->addSuccessor(NewBBs[1].get(), TotalCount); + IndCallBlock->addSuccessor(NewBBs[0].get(), ScaledBBI[0]); const size_t Adj = IsTailCall ? 1 : 2; for (size_t I = 0; I < NewBBs.size() - Adj; ++I) { assert(TotalCount <= IndCallBlock->getExecutionCount() || TotalCount <= uint64_t(TotalIndirectBranches)); - uint64_t ExecCount = BBI[(I+1)/2].Count; - NewBBs[I]->setCanOutline(IndCallBlock->canOutline()); - NewBBs[I]->setIsCold(IndCallBlock->isCold()); + auto ExecCount = ScaledBBI[(I+1)/2].Count; if (I % 2 == 0) { if (MergeBlock) { - NewBBs[I]->addSuccessor(MergeBlock, BBI[(I+1)/2].Count); // uncond + NewBBs[I]->addSuccessor(MergeBlock, ScaledBBI[(I+1)/2].Count); } } else { assert(I + 2 < NewBBs.size()); updateCurrentBranchInfo(); - NewBBs[I]->addSuccessor(NewBBs[I+2].get(), TotalCount); // uncond branch - NewBBs[I]->addSuccessor(NewBBs[I+1].get(), BBI[(I+1)/2]); // cond. branch + NewBBs[I]->addSuccessor(NewBBs[I+2].get(), TotalCount); + NewBBs[I]->addSuccessor(NewBBs[I+1].get(), ScaledBBI[(I+1)/2]); ExecCount += TotalCount; } NewBBs[I]->setExecutionCount(ExecCount); @@ -860,8 +833,6 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG( if (MergeBlock) { // Arrange for the MergeBlock to be the fallthrough for the first // promoted call block. - MergeBlock->setCanOutline(IndCallBlock->canOutline()); - MergeBlock->setIsCold(IndCallBlock->isCold()); std::unique_ptr MBPtr; std::swap(MBPtr, NewBBs.back()); NewBBs.pop_back(); @@ -871,13 +842,10 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG( } } - // cold call block - // TODO: should be able to outline/cold this block. + // Update the execution count. NewBBs.back()->setExecutionCount(TotalCount); - NewBBs.back()->setCanOutline(IndCallBlock->canOutline()); - NewBBs.back()->setIsCold(IndCallBlock->isCold()); - // update BB and BB layout. + // Update BB and BB layout. Function.insertBasicBlocks(IndCallBlock, std::move(NewBBs)); assert(Function.validateCFG());