[RFC] [BOLT] Use iterators for MC branch/call analysis code.

Summary:
Here's an implementation of an abstract instruction iterator for the branch/call
analysis code in MCInstrAnalysis.  I'm posting it up to see what you guys think.
It's a bit sloppy with constness and probably needs more tidying up.

(cherry picked from FBD6244012)
This commit is contained in:
Bill Nell 2017-11-04 19:22:05 -07:00 committed by Maksim Panchenko
parent c4d7460ed6
commit b2f132c7c2
6 changed files with 66 additions and 69 deletions

View File

@ -313,7 +313,12 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB,
MCInst *&CondBranch, MCInst *&CondBranch,
MCInst *&UncondBranch) { MCInst *&UncondBranch) {
auto &MIA = Function->getBinaryContext().MIA; auto &MIA = Function->getBinaryContext().MIA;
return MIA->analyzeBranch(Instructions, TBB, FBB, CondBranch, UncondBranch); return MIA->analyzeBranch(Instructions.begin(),
Instructions.end(),
TBB,
FBB,
CondBranch,
UncondBranch);
} }
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) { MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {

View File

@ -438,9 +438,9 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
// Offset of the instruction in function. // Offset of the instruction in function.
uint64_t Offset{0}; uint64_t Offset{0};
if (BasicBlocks.empty() && !InstructionOffsets.empty()) { if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built. // Print before CFG was built.
for (const auto &II : InstructionOffsets) { for (const auto &II : Instructions) {
Offset = II.first; Offset = II.first;
// Print label if exists at this offset. // Print label if exists at this offset.
@ -448,7 +448,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (LI != Labels.end()) if (LI != Labels.end())
OS << LI->second->getName() << ":\n"; OS << LI->second->getName() << ":\n";
BC.printInstruction(OS, Instructions[II.second], Offset, this); BC.printInstruction(OS, II.second, Offset, this);
} }
} }
@ -634,7 +634,8 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
MCInst *PCRelBaseInstr; MCInst *PCRelBaseInstr;
uint64_t PCRelAddr = 0; uint64_t PCRelAddr = 0;
MutableArrayRef<MCInst> BB = Instructions; auto Begin = Instructions.begin();
auto End = Instructions.end();
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) { if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
PreserveNops = opts::Relocs; PreserveNops = opts::Relocs;
@ -642,16 +643,17 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
// This is a heuristic, since the full set of labels have yet to be // This is a heuristic, since the full set of labels have yet to be
// determined // determined
for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) { for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
auto II = InstructionOffsets.find(LI->first); auto II = Instructions.find(LI->first);
if (II != InstructionOffsets.end()) { if (II != Instructions.end()) {
BB = BB.slice(II->second); Begin = II;
break; break;
} }
} }
} }
auto Type = BC.MIA->analyzeIndirectBranch(Instruction, auto Type = BC.MIA->analyzeIndirectBranch(Instruction,
BB, Begin,
End,
PtrSize, PtrSize,
MemLocInstr, MemLocInstr,
BaseRegNum, BaseRegNum,
@ -681,9 +683,8 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
} }
} }
uint64_t InstrAddr = 0; uint64_t InstrAddr = 0;
for (auto II = InstructionOffsets.rbegin(); II != InstructionOffsets.rend(); for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
++II) { if (&II->second == PCRelBaseInstr) {
if (&Instructions[II->second] == PCRelBaseInstr) {
InstrAddr = II->first + getAddress(); InstrAddr = II->first + getAddress();
break; break;
} }
@ -1473,10 +1474,9 @@ bool BinaryFunction::buildCFG() {
} }
}; };
for (auto I = InstructionOffsets.begin(), for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
E = InstructionOffsets.end(); I != E; ++I) {
const auto Offset = I->first; const auto Offset = I->first;
const auto &Instr = Instructions[I->second]; const auto &Instr = I->second;
auto LI = Labels.find(Offset); auto LI = Labels.find(Offset);
if (LI != Labels.end()) { if (LI != Labels.end()) {
@ -1621,9 +1621,8 @@ bool BinaryFunction::buildCFG() {
// basic block. // basic block.
auto *ToBB = getBasicBlockAtOffset(Branch.second); auto *ToBB = getBasicBlockAtOffset(Branch.second);
if (ToBB == nullptr) { if (ToBB == nullptr) {
auto I = InstructionOffsets.find(Branch.second); auto I = Instructions.find(Branch.second), E = Instructions.end();
auto E = InstructionOffsets.end(); while (ToBB == nullptr && I != E && MIA->isNoop(I->second)) {
while (ToBB == nullptr && I != E && MIA->isNoop(Instructions[I->second])) {
++I; ++I;
if (I == E) if (I == E)
break; break;
@ -1781,7 +1780,6 @@ bool BinaryFunction::buildCFG() {
// //
// NB: don't clear Labels list as we may need them if we mark the function // NB: don't clear Labels list as we may need them if we mark the function
// as non-simple later in the process of discovering extra entry points. // as non-simple later in the process of discovering extra entry points.
clearList(InstructionOffsets);
clearList(Instructions); clearList(Instructions);
clearList(OffsetToCFI); clearList(OffsetToCFI);
clearList(TakenBranches); clearList(TakenBranches);
@ -2079,18 +2077,18 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
// Eliminate recursive calls and returns from recursive calls from the list // Eliminate recursive calls and returns from recursive calls from the list
// of branches that have no match. They are not considered local branches. // of branches that have no match. They are not considered local branches.
auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) { auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) {
auto SrcInstrI = InstructionOffsets.find(Branch.first); auto SrcInstrI = Instructions.find(Branch.first);
if (SrcInstrI == InstructionOffsets.end()) if (SrcInstrI == Instructions.end())
return false; return false;
// Check if it is a recursive call. // Check if it is a recursive call.
const auto &SrcInstr = Instructions[SrcInstrI->second]; const auto &SrcInstr = SrcInstrI->second;
if ((BC.MIA->isCall(SrcInstr) || BC.MIA->isIndirectBranch(SrcInstr)) && if ((BC.MIA->isCall(SrcInstr) || BC.MIA->isIndirectBranch(SrcInstr)) &&
Branch.second == 0) Branch.second == 0)
return true; return true;
auto DstInstrI = InstructionOffsets.find(Branch.second); auto DstInstrI = Instructions.find(Branch.second);
if (DstInstrI == InstructionOffsets.end()) if (DstInstrI == Instructions.end())
return false; return false;
// Check if it is a return from a recursive call. // Check if it is a return from a recursive call.
@ -2099,17 +2097,16 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstr)) { if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstr)) {
auto SrcInstrSuccessorI = SrcInstrI; auto SrcInstrSuccessorI = SrcInstrI;
++SrcInstrSuccessorI; ++SrcInstrSuccessorI;
assert(SrcInstrSuccessorI != InstructionOffsets.end() && assert(SrcInstrSuccessorI != Instructions.end() &&
"unexpected prefix instruction at the end of function"); "unexpected prefix instruction at the end of function");
IsSrcReturn = BC.MIA->isReturn(Instructions[SrcInstrSuccessorI->second]); IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second);
} }
if (IsSrcReturn && Branch.second != 0) { if (IsSrcReturn && Branch.second != 0) {
// Make sure the destination follows the call instruction. // Make sure the destination follows the call instruction.
auto DstInstrPredecessorI = DstInstrI; auto DstInstrPredecessorI = DstInstrI;
--DstInstrPredecessorI; --DstInstrPredecessorI;
assert(DstInstrPredecessorI != InstructionOffsets.end() && assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator");
"invalid iterator"); if (BC.MIA->isCall(DstInstrPredecessorI->second))
if (BC.MIA->isCall(Instructions[DstInstrPredecessorI->second]))
return true; return true;
} }
return false; return false;
@ -2124,10 +2121,10 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
ExternProfileBranches.end(), ExternProfileBranches.end(),
std::back_inserter(OrphanBranches), std::back_inserter(OrphanBranches),
[&](const std::pair<uint32_t, uint32_t> &Branch) { [&](const std::pair<uint32_t, uint32_t> &Branch) {
auto II = InstructionOffsets.find(Branch.first); auto II = Instructions.find(Branch.first);
if (II == InstructionOffsets.end()) if (II == Instructions.end())
return true; return true;
const auto &Instr = Instructions[II->second]; const auto &Instr = II->second;
// Check for calls, tail calls, rets and indirect branches. // Check for calls, tail calls, rets and indirect branches.
// When matching profiling info, we did not reach the stage // When matching profiling info, we did not reach the stage
// when we identify tail calls, so they are still represented // when we identify tail calls, so they are still represented
@ -2139,8 +2136,7 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
// Check for "rep ret" // Check for "rep ret"
if (BC.MIA->isPrefix(Instr)) { if (BC.MIA->isPrefix(Instr)) {
++II; ++II;
if (II != InstructionOffsets.end() && if (II != Instructions.end() && BC.MIA->isReturn(II->second))
BC.MIA->isReturn(Instructions[II->second]))
return false; return false;
} }
return true; return true;
@ -4320,12 +4316,12 @@ BinaryFunction::getFallthroughsInTrace(uint64_t From, uint64_t To) const {
return NoneType(); return NoneType();
// Get iterators and validate trace start/end // Get iterators and validate trace start/end
auto FromIter = InstructionOffsets.find(From); auto FromIter = Instructions.find(From);
if (FromIter == InstructionOffsets.end()) if (FromIter == Instructions.end())
return NoneType(); return NoneType();
auto ToIter = InstructionOffsets.find(To); auto ToIter = Instructions.find(To);
if (ToIter == InstructionOffsets.end()) if (ToIter == Instructions.end())
return NoneType(); return NoneType();
// Trace needs to go forward // Trace needs to go forward
@ -4333,22 +4329,20 @@ BinaryFunction::getFallthroughsInTrace(uint64_t From, uint64_t To) const {
return NoneType(); return NoneType();
// Trace needs to finish in a branch // Trace needs to finish in a branch
auto &ToInst = Instructions[ToIter->second]; if (!BC.MIA->isBranch(ToIter->second) && !BC.MIA->isCall(ToIter->second) &&
if (!BC.MIA->isBranch(ToInst) && !BC.MIA->isCall(ToInst) && !BC.MIA->isReturn(ToIter->second))
!BC.MIA->isReturn(ToInst))
return NoneType(); return NoneType();
// Analyze intermediate instructions // Analyze intermediate instructions
for (; FromIter != ToIter; ++FromIter) { for (; FromIter != ToIter; ++FromIter) {
// This operates under an assumption that we collect all branches in LBR // This operates under an assumption that we collect all branches in LBR
// No unconditional branches in the middle of the trace // No unconditional branches in the middle of the trace
auto &FromInst = Instructions[FromIter->second]; if (BC.MIA->isUnconditionalBranch(FromIter->second) ||
if (BC.MIA->isUnconditionalBranch(FromInst) || BC.MIA->isReturn(FromIter->second) ||
BC.MIA->isReturn(FromInst) || BC.MIA->isCall(FromIter->second))
BC.MIA->isCall(FromInst))
return NoneType(); return NoneType();
if (!BC.MIA->isConditionalBranch(FromInst)) if (!BC.MIA->isConditionalBranch(FromIter->second))
continue; continue;
const uint64_t Src = FromIter->first; const uint64_t Src = FromIter->first;

View File

@ -456,10 +456,9 @@ private:
LabelsMapType Labels; LabelsMapType Labels;
/// Temporary holder of instructions before CFG is constructed. /// Temporary holder of instructions before CFG is constructed.
/// Map offset in the function to MCInst index. /// Map offset in the function to MCInst.
using InstrMapType = std::map<uint32_t, size_t>; using InstrMapType = std::map<uint32_t, MCInst>;
InstrMapType InstructionOffsets; InstrMapType Instructions;
std::vector<MCInst> Instructions;
/// List of DWARF CFI instructions. Original CFI from the binary must be /// List of DWARF CFI instructions. Original CFI from the binary must be
/// sorted w.r.t. offset that it appears. We rely on this to replay CFIs /// sorted w.r.t. offset that it appears. We rely on this to replay CFIs
@ -749,10 +748,7 @@ private:
} }
void addInstruction(uint64_t Offset, MCInst &&Instruction) { void addInstruction(uint64_t Offset, MCInst &&Instruction) {
assert(InstructionOffsets.size() == Instructions.size() && Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
"There must be one instruction at every offset.");
Instructions.emplace_back(std::forward<MCInst>(Instruction));
InstructionOffsets[Offset] = Instructions.size() - 1;
} }
/// Return instruction at a given offset in the function. Valid before /// Return instruction at a given offset in the function. Valid before
@ -760,9 +756,8 @@ private:
MCInst *getInstructionAtOffset(uint64_t Offset) { MCInst *getInstructionAtOffset(uint64_t Offset) {
assert(CurrentState == State::Disassembled && assert(CurrentState == State::Disassembled &&
"can only call function in Disassembled state"); "can only call function in Disassembled state");
auto II = InstructionOffsets.find(Offset); auto II = Instructions.find(Offset);
return (II == InstructionOffsets.end()) return (II == Instructions.end()) ? nullptr : &II->second;
? nullptr : &Instructions[II->second];
} }
/// Analyze and process indirect branch \p Instruction before it is /// Analyze and process indirect branch \p Instruction before it is
@ -1486,23 +1481,22 @@ public:
// harder for us to recover this information, since we can create empty BBs // harder for us to recover this information, since we can create empty BBs
// with NOPs and then reorder it away. // with NOPs and then reorder it away.
// We fix this by moving the CFI instruction just before any NOPs. // We fix this by moving the CFI instruction just before any NOPs.
auto I = InstructionOffsets.lower_bound(Offset); auto I = Instructions.lower_bound(Offset);
if (Offset == getSize()) { if (Offset == getSize()) {
assert(I == InstructionOffsets.end() && "unexpected iterator value"); assert(I == Instructions.end() && "unexpected iterator value");
// Sometimes compiler issues restore_state after all instructions // Sometimes compiler issues restore_state after all instructions
// in the function (even after nop). // in the function (even after nop).
--I; --I;
Offset = I->first; Offset = I->first;
} }
assert(I->first == Offset && "CFI pointing to unknown instruction"); assert(I->first == Offset && "CFI pointing to unknown instruction");
if (I == InstructionOffsets.begin()) { if (I == Instructions.begin()) {
CIEFrameInstructions.emplace_back(std::forward<MCCFIInstruction>(Inst)); CIEFrameInstructions.emplace_back(std::forward<MCCFIInstruction>(Inst));
return; return;
} }
--I; --I;
while (I != InstructionOffsets.begin() && while (I != Instructions.begin() && BC.MIA->isNoop(I->second)) {
BC.MIA->isNoop(Instructions[I->second])) {
Offset = I->first; Offset = I->first;
--I; --I;
} }

View File

@ -219,7 +219,7 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
// Create a handler entry if necessary. // Create a handler entry if necessary.
MCSymbol *LPSymbol{nullptr}; MCSymbol *LPSymbol{nullptr};
if (LandingPad) { if (LandingPad) {
if (InstructionOffsets.find(LandingPad) == InstructionOffsets.end()) { if (Instructions.find(LandingPad) == Instructions.end()) {
if (opts::Verbosity >= 1) { if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad) errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
<< " not pointing to an instruction in function " << " not pointing to an instruction in function "
@ -237,11 +237,11 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
} }
// Mark all call instructions in the range. // Mark all call instructions in the range.
auto II = InstructionOffsets.find(Start); auto II = Instructions.find(Start);
auto IE = InstructionOffsets.end(); auto IE = Instructions.end();
assert(II != IE && "exception range not pointing to an instruction"); assert(II != IE && "exception range not pointing to an instruction");
do { do {
auto &Instruction = Instructions[II->second]; auto &Instruction = II->second;
if (BC.MIA->isCall(Instruction)) { if (BC.MIA->isCall(Instruction)) {
assert(!BC.MIA->isInvoke(Instruction) && assert(!BC.MIA->isInvoke(Instruction) &&
"overlapping exception ranges detected"); "overlapping exception ranges detected");

View File

@ -325,7 +325,8 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(
const MCExpr *DispExpr; const MCExpr *DispExpr;
MutableArrayRef<MCInst> Insts(&BB->front(), &CallInst); MutableArrayRef<MCInst> Insts(&BB->front(), &CallInst);
const auto Type = BC.MIA->analyzeIndirectBranch(CallInst, const auto Type = BC.MIA->analyzeIndirectBranch(CallInst,
Insts, Insts.begin(),
Insts.end(),
BC.AsmInfo->getPointerSize(), BC.AsmInfo->getPointerSize(),
MemLocInstr, MemLocInstr,
BaseReg, BaseReg,
@ -555,7 +556,8 @@ IndirectCallPromotion::maybeGetVtableAddrs(
return MethodInfoType(); return MethodInfoType();
MutableArrayRef<MCInst> Insts(&BB->front(), &Inst + 1); MutableArrayRef<MCInst> Insts(&BB->front(), &Inst + 1);
if (!BC.MIA->analyzeVirtualMethodCall(Insts, if (!BC.MIA->analyzeVirtualMethodCall(Insts.begin(),
Insts.end(),
MethodFetchInsns, MethodFetchInsns,
VtableReg, VtableReg,
MethodReg, MethodReg,

View File

@ -246,7 +246,9 @@ InlineSmallFunctions::inlineCall(
const MCSymbol *OldFTLabel = nullptr; const MCSymbol *OldFTLabel = nullptr;
MCInst *CondBranch = nullptr; MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr; MCInst *UncondBranch = nullptr;
const bool Result = BC.MIA->analyzeBranch(Instruction, OldTargetLabel, const bool Result = BC.MIA->analyzeBranch(&Instruction,
&Instruction + 1,
OldTargetLabel,
OldFTLabel, CondBranch, OldFTLabel, CondBranch,
UncondBranch); UncondBranch);
(void)Result; (void)Result;