From b2f132c7c2d0867a9d2232cd4a5bedeb8bbbed54 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Sat, 4 Nov 2017 19:22:05 -0700 Subject: [PATCH] [RFC] [BOLT] Use iterators for MC branch/call analysis code. Summary: Here's an implementation of an abstract instruction iterator for the branch/call analysis code in MCInstrAnalysis. I'm posting it up to see what you guys think. It's a bit sloppy with constness and probably needs more tidying up. (cherry picked from FBD6244012) --- bolt/BinaryBasicBlock.cpp | 7 ++- bolt/BinaryFunction.cpp | 84 +++++++++++++-------------- bolt/BinaryFunction.h | 26 ++++----- bolt/Exceptions.cpp | 8 +-- bolt/Passes/IndirectCallPromotion.cpp | 6 +- bolt/Passes/Inliner.cpp | 4 +- 6 files changed, 66 insertions(+), 69 deletions(-) diff --git a/bolt/BinaryBasicBlock.cpp b/bolt/BinaryBasicBlock.cpp index 9ea9cf11bd1d..b3d9328f6a24 100644 --- a/bolt/BinaryBasicBlock.cpp +++ b/bolt/BinaryBasicBlock.cpp @@ -313,7 +313,12 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, MCInst *&CondBranch, MCInst *&UncondBranch) { auto &MIA = Function->getBinaryContext().MIA; - return MIA->analyzeBranch(Instructions, TBB, FBB, CondBranch, UncondBranch); + return MIA->analyzeBranch(Instructions.begin(), + Instructions.end(), + TBB, + FBB, + CondBranch, + UncondBranch); } MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) { diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 3852f7fd1415..7d89b3ee4a19 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -438,9 +438,9 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, // Offset of the instruction in function. uint64_t Offset{0}; - if (BasicBlocks.empty() && !InstructionOffsets.empty()) { + if (BasicBlocks.empty() && !Instructions.empty()) { // Print before CFG was built. - for (const auto &II : InstructionOffsets) { + for (const auto &II : Instructions) { Offset = II.first; // Print label if exists at this offset. @@ -448,7 +448,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, if (LI != Labels.end()) OS << LI->second->getName() << ":\n"; - BC.printInstruction(OS, Instructions[II.second], Offset, this); + BC.printInstruction(OS, II.second, Offset, this); } } @@ -634,7 +634,8 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction, MCInst *PCRelBaseInstr; uint64_t PCRelAddr = 0; - MutableArrayRef BB = Instructions; + auto Begin = Instructions.begin(); + auto End = Instructions.end(); if (BC.TheTriple->getArch() == llvm::Triple::aarch64) { PreserveNops = opts::Relocs; @@ -642,16 +643,17 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction, // This is a heuristic, since the full set of labels have yet to be // determined for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) { - auto II = InstructionOffsets.find(LI->first); - if (II != InstructionOffsets.end()) { - BB = BB.slice(II->second); + auto II = Instructions.find(LI->first); + if (II != Instructions.end()) { + Begin = II; break; } } } auto Type = BC.MIA->analyzeIndirectBranch(Instruction, - BB, + Begin, + End, PtrSize, MemLocInstr, BaseRegNum, @@ -681,9 +683,8 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction, } } uint64_t InstrAddr = 0; - for (auto II = InstructionOffsets.rbegin(); II != InstructionOffsets.rend(); - ++II) { - if (&Instructions[II->second] == PCRelBaseInstr) { + for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) { + if (&II->second == PCRelBaseInstr) { InstrAddr = II->first + getAddress(); break; } @@ -1473,10 +1474,9 @@ bool BinaryFunction::buildCFG() { } }; - for (auto I = InstructionOffsets.begin(), - E = InstructionOffsets.end(); I != E; ++I) { + for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) { const auto Offset = I->first; - const auto &Instr = Instructions[I->second]; + const auto &Instr = I->second; auto LI = Labels.find(Offset); if (LI != Labels.end()) { @@ -1621,9 +1621,8 @@ bool BinaryFunction::buildCFG() { // basic block. auto *ToBB = getBasicBlockAtOffset(Branch.second); if (ToBB == nullptr) { - auto I = InstructionOffsets.find(Branch.second); - auto E = InstructionOffsets.end(); - while (ToBB == nullptr && I != E && MIA->isNoop(Instructions[I->second])) { + auto I = Instructions.find(Branch.second), E = Instructions.end(); + while (ToBB == nullptr && I != E && MIA->isNoop(I->second)) { ++I; if (I == E) break; @@ -1781,7 +1780,6 @@ bool BinaryFunction::buildCFG() { // // NB: don't clear Labels list as we may need them if we mark the function // as non-simple later in the process of discovering extra entry points. - clearList(InstructionOffsets); clearList(Instructions); clearList(OffsetToCFI); clearList(TakenBranches); @@ -2079,18 +2077,18 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) { // Eliminate recursive calls and returns from recursive calls from the list // of branches that have no match. They are not considered local branches. auto isRecursiveBranch = [&](std::pair &Branch) { - auto SrcInstrI = InstructionOffsets.find(Branch.first); - if (SrcInstrI == InstructionOffsets.end()) + auto SrcInstrI = Instructions.find(Branch.first); + if (SrcInstrI == Instructions.end()) return false; // Check if it is a recursive call. - const auto &SrcInstr = Instructions[SrcInstrI->second]; + const auto &SrcInstr = SrcInstrI->second; if ((BC.MIA->isCall(SrcInstr) || BC.MIA->isIndirectBranch(SrcInstr)) && Branch.second == 0) return true; - auto DstInstrI = InstructionOffsets.find(Branch.second); - if (DstInstrI == InstructionOffsets.end()) + auto DstInstrI = Instructions.find(Branch.second); + if (DstInstrI == Instructions.end()) return false; // Check if it is a return from a recursive call. @@ -2099,17 +2097,16 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) { if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstr)) { auto SrcInstrSuccessorI = SrcInstrI; ++SrcInstrSuccessorI; - assert(SrcInstrSuccessorI != InstructionOffsets.end() && + assert(SrcInstrSuccessorI != Instructions.end() && "unexpected prefix instruction at the end of function"); - IsSrcReturn = BC.MIA->isReturn(Instructions[SrcInstrSuccessorI->second]); + IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second); } if (IsSrcReturn && Branch.second != 0) { // Make sure the destination follows the call instruction. auto DstInstrPredecessorI = DstInstrI; --DstInstrPredecessorI; - assert(DstInstrPredecessorI != InstructionOffsets.end() && - "invalid iterator"); - if (BC.MIA->isCall(Instructions[DstInstrPredecessorI->second])) + assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator"); + if (BC.MIA->isCall(DstInstrPredecessorI->second)) return true; } return false; @@ -2124,10 +2121,10 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) { ExternProfileBranches.end(), std::back_inserter(OrphanBranches), [&](const std::pair &Branch) { - auto II = InstructionOffsets.find(Branch.first); - if (II == InstructionOffsets.end()) + auto II = Instructions.find(Branch.first); + if (II == Instructions.end()) return true; - const auto &Instr = Instructions[II->second]; + const auto &Instr = II->second; // Check for calls, tail calls, rets and indirect branches. // When matching profiling info, we did not reach the stage // when we identify tail calls, so they are still represented @@ -2139,8 +2136,7 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) { // Check for "rep ret" if (BC.MIA->isPrefix(Instr)) { ++II; - if (II != InstructionOffsets.end() && - BC.MIA->isReturn(Instructions[II->second])) + if (II != Instructions.end() && BC.MIA->isReturn(II->second)) return false; } return true; @@ -4320,12 +4316,12 @@ BinaryFunction::getFallthroughsInTrace(uint64_t From, uint64_t To) const { return NoneType(); // Get iterators and validate trace start/end - auto FromIter = InstructionOffsets.find(From); - if (FromIter == InstructionOffsets.end()) + auto FromIter = Instructions.find(From); + if (FromIter == Instructions.end()) return NoneType(); - auto ToIter = InstructionOffsets.find(To); - if (ToIter == InstructionOffsets.end()) + auto ToIter = Instructions.find(To); + if (ToIter == Instructions.end()) return NoneType(); // Trace needs to go forward @@ -4333,22 +4329,20 @@ BinaryFunction::getFallthroughsInTrace(uint64_t From, uint64_t To) const { return NoneType(); // Trace needs to finish in a branch - auto &ToInst = Instructions[ToIter->second]; - if (!BC.MIA->isBranch(ToInst) && !BC.MIA->isCall(ToInst) && - !BC.MIA->isReturn(ToInst)) + if (!BC.MIA->isBranch(ToIter->second) && !BC.MIA->isCall(ToIter->second) && + !BC.MIA->isReturn(ToIter->second)) return NoneType(); // Analyze intermediate instructions for (; FromIter != ToIter; ++FromIter) { // This operates under an assumption that we collect all branches in LBR // No unconditional branches in the middle of the trace - auto &FromInst = Instructions[FromIter->second]; - if (BC.MIA->isUnconditionalBranch(FromInst) || - BC.MIA->isReturn(FromInst) || - BC.MIA->isCall(FromInst)) + if (BC.MIA->isUnconditionalBranch(FromIter->second) || + BC.MIA->isReturn(FromIter->second) || + BC.MIA->isCall(FromIter->second)) return NoneType(); - if (!BC.MIA->isConditionalBranch(FromInst)) + if (!BC.MIA->isConditionalBranch(FromIter->second)) continue; const uint64_t Src = FromIter->first; diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index c803f4c849e5..487f93cb7dba 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -456,10 +456,9 @@ private: LabelsMapType Labels; /// Temporary holder of instructions before CFG is constructed. - /// Map offset in the function to MCInst index. - using InstrMapType = std::map; - InstrMapType InstructionOffsets; - std::vector Instructions; + /// Map offset in the function to MCInst. + using InstrMapType = std::map; + InstrMapType Instructions; /// List of DWARF CFI instructions. Original CFI from the binary must be /// sorted w.r.t. offset that it appears. We rely on this to replay CFIs @@ -749,10 +748,7 @@ private: } void addInstruction(uint64_t Offset, MCInst &&Instruction) { - assert(InstructionOffsets.size() == Instructions.size() && - "There must be one instruction at every offset."); - Instructions.emplace_back(std::forward(Instruction)); - InstructionOffsets[Offset] = Instructions.size() - 1; + Instructions.emplace(Offset, std::forward(Instruction)); } /// Return instruction at a given offset in the function. Valid before @@ -760,9 +756,8 @@ private: MCInst *getInstructionAtOffset(uint64_t Offset) { assert(CurrentState == State::Disassembled && "can only call function in Disassembled state"); - auto II = InstructionOffsets.find(Offset); - return (II == InstructionOffsets.end()) - ? nullptr : &Instructions[II->second]; + auto II = Instructions.find(Offset); + return (II == Instructions.end()) ? nullptr : &II->second; } /// Analyze and process indirect branch \p Instruction before it is @@ -1486,23 +1481,22 @@ public: // harder for us to recover this information, since we can create empty BBs // with NOPs and then reorder it away. // We fix this by moving the CFI instruction just before any NOPs. - auto I = InstructionOffsets.lower_bound(Offset); + auto I = Instructions.lower_bound(Offset); if (Offset == getSize()) { - assert(I == InstructionOffsets.end() && "unexpected iterator value"); + assert(I == Instructions.end() && "unexpected iterator value"); // Sometimes compiler issues restore_state after all instructions // in the function (even after nop). --I; Offset = I->first; } assert(I->first == Offset && "CFI pointing to unknown instruction"); - if (I == InstructionOffsets.begin()) { + if (I == Instructions.begin()) { CIEFrameInstructions.emplace_back(std::forward(Inst)); return; } --I; - while (I != InstructionOffsets.begin() && - BC.MIA->isNoop(Instructions[I->second])) { + while (I != Instructions.begin() && BC.MIA->isNoop(I->second)) { Offset = I->first; --I; } diff --git a/bolt/Exceptions.cpp b/bolt/Exceptions.cpp index e5fa3f469b53..ac303bfd0207 100644 --- a/bolt/Exceptions.cpp +++ b/bolt/Exceptions.cpp @@ -219,7 +219,7 @@ void BinaryFunction::parseLSDA(ArrayRef LSDASectionData, // Create a handler entry if necessary. MCSymbol *LPSymbol{nullptr}; if (LandingPad) { - if (InstructionOffsets.find(LandingPad) == InstructionOffsets.end()) { + if (Instructions.find(LandingPad) == Instructions.end()) { if (opts::Verbosity >= 1) { errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad) << " not pointing to an instruction in function " @@ -237,11 +237,11 @@ void BinaryFunction::parseLSDA(ArrayRef LSDASectionData, } // Mark all call instructions in the range. - auto II = InstructionOffsets.find(Start); - auto IE = InstructionOffsets.end(); + auto II = Instructions.find(Start); + auto IE = Instructions.end(); assert(II != IE && "exception range not pointing to an instruction"); do { - auto &Instruction = Instructions[II->second]; + auto &Instruction = II->second; if (BC.MIA->isCall(Instruction)) { assert(!BC.MIA->isInvoke(Instruction) && "overlapping exception ranges detected"); diff --git a/bolt/Passes/IndirectCallPromotion.cpp b/bolt/Passes/IndirectCallPromotion.cpp index 25f1a678c30a..580795911b76 100644 --- a/bolt/Passes/IndirectCallPromotion.cpp +++ b/bolt/Passes/IndirectCallPromotion.cpp @@ -325,7 +325,8 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets( const MCExpr *DispExpr; MutableArrayRef Insts(&BB->front(), &CallInst); const auto Type = BC.MIA->analyzeIndirectBranch(CallInst, - Insts, + Insts.begin(), + Insts.end(), BC.AsmInfo->getPointerSize(), MemLocInstr, BaseReg, @@ -555,7 +556,8 @@ IndirectCallPromotion::maybeGetVtableAddrs( return MethodInfoType(); MutableArrayRef Insts(&BB->front(), &Inst + 1); - if (!BC.MIA->analyzeVirtualMethodCall(Insts, + if (!BC.MIA->analyzeVirtualMethodCall(Insts.begin(), + Insts.end(), MethodFetchInsns, VtableReg, MethodReg, diff --git a/bolt/Passes/Inliner.cpp b/bolt/Passes/Inliner.cpp index 65347403c1da..dc7e62e34b6c 100644 --- a/bolt/Passes/Inliner.cpp +++ b/bolt/Passes/Inliner.cpp @@ -246,7 +246,9 @@ InlineSmallFunctions::inlineCall( const MCSymbol *OldFTLabel = nullptr; MCInst *CondBranch = nullptr; MCInst *UncondBranch = nullptr; - const bool Result = BC.MIA->analyzeBranch(Instruction, OldTargetLabel, + const bool Result = BC.MIA->analyzeBranch(&Instruction, + &Instruction + 1, + OldTargetLabel, OldFTLabel, CondBranch, UncondBranch); (void)Result;