diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 23e3071fc007..571f9624ecdc 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -1091,6 +1091,9 @@ public: uint64_t computeInstructionSize(const MCInst &Inst, const MCCodeEmitter *Emitter = nullptr) const { + if (auto Size = MIB->getAnnotationWithDefault(Inst, "Size")) + return Size; + if (!Emitter) Emitter = this->MCE.get(); SmallString<256> Code; diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h index 7e5dcb7aeadb..9af899eaa26e 100644 --- a/bolt/include/bolt/Passes/BinaryPasses.h +++ b/bolt/include/bolt/Passes/BinaryPasses.h @@ -105,7 +105,6 @@ public: /// Detect and eliminate unreachable basic blocks. We could have those /// filled with nops and they are used for alignment. class EliminateUnreachableBlocks : public BinaryFunctionPass { - std::shared_timed_mutex ModifiedMtx; std::unordered_set Modified; std::atomic DeletedBlocks{0}; std::atomic DeletedBytes{0}; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 60bc1351900d..2482fe0156e8 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1980,6 +1980,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { BinaryBasicBlock *InsertBB = nullptr; BinaryBasicBlock *PrevBB = nullptr; bool IsLastInstrNop = false; + // Offset of the last non-nop instruction. uint64_t LastInstrOffset = 0; auto addCFIPlaceholders = [this](uint64_t CFIOffset, @@ -1992,13 +1993,22 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { }; // For profiling purposes we need to save the offset of the last instruction - // in the basic block. But in certain cases we don't if the instruction was - // the last one, and we have to go back and update its offset. + // in the basic block. + // NOTE: nops always have an Offset annotation. Annotate the last non-nop as + // older profiles ignored nops. auto updateOffset = [&](uint64_t Offset) { assert(PrevBB && PrevBB != InsertBB && "invalid previous block"); - MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr(); - if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset")) - MIB->addAnnotation(*PrevInstr, "Offset", static_cast(Offset), + MCInst *LastNonNop = nullptr; + for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(), + E = PrevBB->rend(); + RII != E; ++RII) { + if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) { + LastNonNop = &*RII; + break; + } + } + if (LastNonNop && !MIB->hasAnnotation(*LastNonNop, "Offset")) + MIB->addAnnotation(*LastNonNop, "Offset", static_cast(Offset), AllocatorId); }; @@ -2009,7 +2019,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { auto LI = Labels.find(Offset); if (LI != Labels.end()) { // Always create new BB at branch destination. - PrevBB = InsertBB; + PrevBB = InsertBB ? InsertBB : PrevBB; InsertBB = addBasicBlock(LI->first, LI->second, opts::PreserveBlocksAlignment && IsLastInstrNop); if (PrevBB) @@ -2020,14 +2030,16 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { bool IsSDTMarker = MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr); bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr); - if (IsSDTMarker || IsLKMarker) { - HasSDTMarker = true; - LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : " - << utohexstr(InstrInputAddr) << "\n"); - if (!MIB->hasAnnotation(Instr, "Offset")) { + // Mark all nops with Offset for profile tracking purposes. + if (MIB->isNoop(Instr) || IsLKMarker) { + if (!MIB->hasAnnotation(Instr, "Offset")) MIB->addAnnotation(Instr, "Offset", static_cast(Offset), AllocatorId); - } + if (IsSDTMarker || IsLKMarker) + HasSDTMarker = true; + else + // Annotate ordinary nops, so we can safely delete them if required. + MIB->addAnnotation(Instr, "NOP", static_cast(1), AllocatorId); } if (!InsertBB) { @@ -2060,7 +2072,8 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { const bool IsBlockEnd = MIB->isTerminator(Instr); IsLastInstrNop = MIB->isNoop(Instr); - LastInstrOffset = Offset; + if (!IsLastInstrNop) + LastInstrOffset = Offset; InsertBB->addInstruction(std::move(Instr)); // Add associated CFI instrs. We always add the CFI instruction that is @@ -4361,6 +4374,13 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) { return &Inst; } + if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) { + const uint32_t Size = + BC.MIB->getAnnotationWithDefault(*LastInstr, "Size"); + if (BB->getEndOffset() - Offset == Size) + return LastInstr; + } + return nullptr; } else { llvm_unreachable("invalid CFG state to use getInstructionAtOffset()"); diff --git a/bolt/lib/Core/BinaryFunctionProfile.cpp b/bolt/lib/Core/BinaryFunctionProfile.cpp index 441cf29c766e..dba20d2ccbb9 100644 --- a/bolt/lib/Core/BinaryFunctionProfile.cpp +++ b/bolt/lib/Core/BinaryFunctionProfile.cpp @@ -100,6 +100,28 @@ void BinaryFunction::postProcessProfile() { } } + // Fix for old profiles. + for (BinaryBasicBlock *BB : BasicBlocks) { + if (BB->size() != 1 || BB->succ_size() != 1) + continue; + + if (BB->getKnownExecutionCount() == 0) + continue; + + MCInst *Instr = BB->getFirstNonPseudoInstr(); + assert(Instr && "expected non-pseudo instr"); + if (!BC.MIB->hasAnnotation(*Instr, "NOP")) + continue; + + BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); + BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); + if (!BI.Count) { + BI.Count = BB->getKnownExecutionCount(); + FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + + BI.Count); + } + } + if (opts::FixBlockCounts) { for (BinaryBasicBlock *BB : BasicBlocks) { // Make sure that execution count of a block is at least the branch count diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index 41d99b879b2b..5952615fc259 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -720,10 +720,9 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, return true; } - if (FromBB->succ_size() == 0) { - // Return from a tail call. + // Return from a tail call. + if (FromBB->succ_size() == 0) return true; - } // Very rarely we will see ignored branches. Do a linear check. for (std::pair &Branch : BF.IgnoredBranches) { @@ -817,10 +816,21 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, if (collectedInBoltedBinary() && FromBB == ToBB) return true; - LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n' - << Twine::utohexstr(From) << " -> " - << Twine::utohexstr(To) << '\n'); - return false; + BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false); + if (FTSuccessor && FTSuccessor->succ_size() == 1 && + FTSuccessor->getSuccessor(ToBB->getLabel())) { + BinaryBasicBlock::BinaryBranchInfo &FTBI = + FTSuccessor->getBranchInfo(*ToBB); + FTBI.Count += Count; + if (Count) + FTBI.MispredictedCount += Mispreds; + ToBB = FTSuccessor; + } else { + LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n' + << Twine::utohexstr(From) << " -> " + << Twine::utohexstr(To) << '\n'); + return false; + } } BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 020238481582..216702419886 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -147,7 +147,7 @@ PrintICP("print-icp", cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt +cl::opt PrintNormalized("print-normalized", cl::desc("print functions after CFG is normalized"), cl::ZeroOrMore, diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp index ce6ec2ebdc78..c77c00ccb7bd 100644 --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -43,6 +43,7 @@ extern cl::opt NeverPrint; extern cl::opt OutputFilename; extern cl::opt PrintAfterBranchFixup; extern cl::opt PrintFinalized; +extern cl::opt PrintNormalized; extern cl::opt PrintReordered; extern cl::opt PrintSections; extern cl::opt PrintDisasm; @@ -352,6 +353,13 @@ void MachORewriteInstance::runOptimizationPasses() { Manager.registerPass(std::make_unique()); Manager.registerPass(std::make_unique(opts::NeverPrint)); } + + Manager.registerPass(std::make_unique(opts::NeverPrint)); + + Manager.registerPass(std::make_unique(opts::NeverPrint)); + + Manager.registerPass(std::make_unique(opts::PrintNormalized)); + Manager.registerPass( std::make_unique(opts::PrintReordered)); Manager.registerPass( diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 5c226d4fb1ca..aa57e2b7fe76 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2816,6 +2816,8 @@ void RewriteInstance::buildFunctionsCFG() { // Create annotation indices to allow lock-free execution BC->MIB->getOrCreateAnnotationIndex("Offset"); BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); + BC->MIB->getOrCreateAnnotationIndex("NOP"); + BC->MIB->getOrCreateAnnotationIndex("Size"); ParallelUtilities::WorkFuncWithAllocTy WorkFun = [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {