diff --git a/bolt/src/BinaryContext.cpp b/bolt/src/BinaryContext.cpp index 111c77e396d2..3c56fd41542e 100644 --- a/bolt/src/BinaryContext.cpp +++ b/bolt/src/BinaryContext.cpp @@ -289,9 +289,7 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, // better heuristic. if (opts::StrictMode && MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { - JumpTable *JT; - const MCSymbol *Symbol; - std::tie(JT, Symbol) = + const MCSymbol *Symbol = getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); return std::make_pair(Symbol, Addend); @@ -506,17 +504,16 @@ BinaryFunction *BinaryContext::createBinaryFunction( return BF; } -std::pair -BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, - uint64_t Address, - JumpTable::JumpTableType Type) { +const MCSymbol * +BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, + JumpTable::JumpTableType Type) { if (auto *JT = getJumpTableContainingAddress(Address)) { assert(JT->Type == Type && "jump table types have to match"); assert(JT->Parent == &Function && "cannot re-use jump table of a different function"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); - return std::make_pair(JT, JT->getFirstLabel()); + return JT->getFirstLabel(); } const auto EntrySize = @@ -551,7 +548,40 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, // Duplicate the entry for the parent function for easy access. Function.JumpTables.emplace(Address, JT); - return std::make_pair(JT, JTLabel); + return JTLabel; +} + +std::pair +BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, + const MCSymbol *OldLabel) { + unsigned Offset = 0; + bool Found = false; + for (auto Elmt : JT->Labels) { + if (Elmt.second != OldLabel) + continue; + Offset = Elmt.first; + Found = true; + break; + } + assert(Found && "Label not found"); + auto *NewLabel = Ctx->createTempSymbol("duplicatedJT", true); + auto *NewJT = new JumpTable(NewLabel->getName(), + JT->getAddress(), + JT->EntrySize, + JT->Type, + {}, + JumpTable::LabelMapType{{Offset, NewLabel}}, + Function, + *getSectionForAddress(JT->getAddress())); + NewJT->Entries = JT->Entries; + NewJT->Counts = JT->Counts; + uint64_t JumpTableID = ++DuplicatedJumpTables; + // Invert it to differentiate from regular jump tables whose IDs are their + // addresses in the input binary memory space + JumpTableID = ~JumpTableID; + JumpTables.emplace(JumpTableID, NewJT); + Function.JumpTables.emplace(JumpTableID, NewJT); + return std::make_pair(JumpTableID, NewLabel); } std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, diff --git a/bolt/src/BinaryContext.h b/bolt/src/BinaryContext.h index c1c5b1b61b2a..dd1afafcdb17 100644 --- a/bolt/src/BinaryContext.h +++ b/bolt/src/BinaryContext.h @@ -165,6 +165,11 @@ class BinaryContext { /// Jump tables for all functions mapped by address. std::map JumpTables; + /// Used in duplicateJumpTable() to uniquely identify a JT clone + /// Start our IDs with a high number so getJumpTableContainingAddress checks + /// with size won't overflow + uint32_t DuplicatedJumpTables{0x10000000}; + public: /// [name] -> [BinaryData*] map used for global symbol resolution. using SymbolMapType = std::map; @@ -313,20 +318,26 @@ public: /// /// May create an embedded jump table and return its label as the second /// element of the pair. - std::pair - getOrCreateJumpTable(BinaryFunction &Function, - uint64_t Address, - JumpTable::JumpTableType Type); + const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function, + uint64_t Address, + JumpTable::JumpTableType Type); /// After jump table locations are established, this function will populate /// their OffsetEntries based on memory contents. void populateJumpTables(); + /// Returns a jump table ID and label pointing to the duplicated jump table. + /// Ordinarily, jump tables are identified by their address in the input + /// binary. We return an ID with the high bit set to differentiate it from + /// regular addresses, avoiding conflicts with standard jump tables. + std::pair + duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, + const MCSymbol *OldLabel); + /// Generate a unique name for jump table at a given \p Address belonging /// to function \p BF. std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address); -public: /// Regular page size. static constexpr unsigned RegularPageSize = 0x1000; diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index e560758ecbe8..1f5c5ed31be8 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -807,9 +807,8 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, } auto useJumpTableForInstruction = [&](JumpTable::JumpTableType JTType) { - JumpTable *JT; - const MCSymbol *JTLabel; - std::tie(JT, JTLabel) = BC.getOrCreateJumpTable(*this, ArrayStart, JTType); + const MCSymbol *JTLabel = + BC.getOrCreateJumpTable(*this, ArrayStart, JTType); BC.MIB->replaceMemOperandDisp(const_cast(*MemLocInstr), JTLabel, BC.Ctx.get()); @@ -3503,7 +3502,8 @@ void BinaryFunction::insertBasicBlocks( BinaryBasicBlock *Start, std::vector> &&NewBBs, const bool UpdateLayout, - const bool UpdateCFIState) { + const bool UpdateCFIState, + const bool RecomputeLandingPads) { const auto StartIndex = Start ? getIndex(Start) : -1; const auto NumNewBlocks = NewBBs.size(); @@ -3517,7 +3517,11 @@ void BinaryFunction::insertBasicBlocks( BasicBlocks[I++] = BB.release(); } - recomputeLandingPads(); + if (RecomputeLandingPads) { + recomputeLandingPads(); + } else { + updateBBIndices(0); + } if (UpdateLayout) { updateLayout(Start, NumNewBlocks); @@ -3532,7 +3536,8 @@ BinaryFunction::iterator BinaryFunction::insertBasicBlocks( BinaryFunction::iterator StartBB, std::vector> &&NewBBs, const bool UpdateLayout, - const bool UpdateCFIState) { + const bool UpdateCFIState, + const bool RecomputeLandingPads) { const auto StartIndex = getIndex(&*StartBB); const auto NumNewBlocks = NewBBs.size(); @@ -3547,7 +3552,11 @@ BinaryFunction::iterator BinaryFunction::insertBasicBlocks( BasicBlocks[I++] = BB.release(); } - recomputeLandingPads(); + if (RecomputeLandingPads) { + recomputeLandingPads(); + } else { + updateBBIndices(0); + } if (UpdateLayout) { updateLayout(*std::prev(RetIter), NumNewBlocks); @@ -3594,6 +3603,106 @@ void BinaryFunction::updateLayout(BinaryBasicBlock *Start, updateLayoutIndices(); } +bool BinaryFunction::checkForAmbiguousJumpTables() { + SmallPtrSet JumpTables; + for (auto &BB : BasicBlocks) { + for (auto &Inst : *BB) { + if (!BC.MIB->isIndirectBranch(Inst)) + continue; + auto JTAddress = BC.MIB->getJumpTable(Inst); + if (!JTAddress) + continue; + // This address can be inside another jump table, but we only consider + // it ambiguous when the same start address is used, not the same JT + // object. + auto Iter = JumpTables.find(JTAddress); + if (Iter == JumpTables.end()) { + JumpTables.insert(JTAddress); + continue; + } + return true; + } + } + return false; +} + +void BinaryFunction::disambiguateJumpTables() { + assert((opts::JumpTables != JTS_BASIC && isSimple()) || BC.HasRelocations); + SmallPtrSet JumpTables; + for (auto &BB : BasicBlocks) { + for (auto &Inst : *BB) { + if (!BC.MIB->isIndirectBranch(Inst)) + continue; + auto *JT = getJumpTable(Inst); + if (!JT) + continue; + auto Iter = JumpTables.find(JT); + if (Iter == JumpTables.end()) { + JumpTables.insert(JT); + continue; + } + // This instruction is an indirect jump using a jump table, but it is + // using the same jump table of another jump. Try all our tricks to + // extract the jump table symbol and make it point to a new, duplicated JT + uint64_t Scale; + const MCSymbol *Target; + MCInst *JTLoadInst = &Inst; + // Try a standard indirect jump matcher, scale 8 + auto IndJmpMatcher = BC.MIB->matchIndJmp( + BC.MIB->matchReg(), BC.MIB->matchImm(Scale), BC.MIB->matchReg(), + /*Offset=*/BC.MIB->matchSymbol(Target)); + if (!BC.MIB->hasPCRelOperand(Inst) || + !IndJmpMatcher->match( + *BC.MRI, *BC.MIB, + MutableArrayRef(&*BB->begin(), &Inst + 1), -1) || + Scale != 8) { + // Standard JT matching failed. Trying now: + // PIC-style matcher, scale 4 + // addq %rdx, %rsi + // addq %rdx, %rdi + // leaq DATAat0x402450(%rip), %r11 + // movslq (%r11,%rdx,4), %rcx + // addq %r11, %rcx + // jmpq *%rcx # JUMPTABLE @0x402450 + MCPhysReg BaseReg1; + MCPhysReg BaseReg2; + uint64_t Offset; + auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd( + BC.MIB->matchReg(BaseReg1), + BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2), + BC.MIB->matchImm(Scale), BC.MIB->matchReg(), + BC.MIB->matchImm(Offset)))); + auto LEAMatcherOwner = + BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target)); + auto LEAMatcher = LEAMatcherOwner.get(); + auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd( + std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand())); + if (!PICIndJmpMatcher->match( + *BC.MRI, *BC.MIB, + MutableArrayRef(&*BB->begin(), &Inst + 1), -1) || + Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 || + !PICBaseAddrMatcher->match( + *BC.MRI, *BC.MIB, + MutableArrayRef(&*BB->begin(), &Inst + 1), -1)) { + llvm_unreachable("Failed to extract jump table base"); + continue; + } + // Matched PIC + JTLoadInst = &*LEAMatcher->CurInst; + } + + uint64_t NewJumpTableID{0}; + const MCSymbol *NewJTLabel; + std::tie(NewJumpTableID, NewJTLabel) = + BC.duplicateJumpTable(*this, JT, Target); + BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get()); + // We use a unique ID with the high bit set as address for this "injected" + // jump table (not originally in the input binary). + BC.MIB->setJumpTable(Inst, NewJumpTableID, 0); + } + } +} + bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB, BinaryBasicBlock *OldDest, BinaryBasicBlock *NewDest) { @@ -3641,7 +3750,8 @@ BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From, // Update CFI and BB layout with new intermediate BB std::vector> NewBBs; NewBBs.emplace_back(std::move(NewBB)); - insertBasicBlocks(From, std::move(NewBBs), true, true); + insertBasicBlocks(From, std::move(NewBBs), true, true, + /*RecomputeLandingPads=*/false); return NewBBPtr; } diff --git a/bolt/src/BinaryFunction.h b/bolt/src/BinaryFunction.h index 5fcc5c690284..5b6388dfd3ed 100644 --- a/bolt/src/BinaryFunction.h +++ b/bolt/src/BinaryFunction.h @@ -1441,13 +1441,15 @@ public: BinaryBasicBlock *Start, std::vector> &&NewBBs, const bool UpdateLayout = true, - const bool UpdateCFIState = true); + const bool UpdateCFIState = true, + const bool RecomputeLandingPads = true); iterator insertBasicBlocks( iterator StartBB, std::vector> &&NewBBs, const bool UpdateLayout = true, - const bool UpdateCFIState = true); + const bool UpdateCFIState = true, + const bool RecomputeLandingPads = true); /// Update the basic block layout for this function. The BBs from /// [Start->Index, Start->Index + NumNewBlocks) are inserted into the @@ -1466,6 +1468,20 @@ public: /// new blocks into the CFG. This must be called after updateLayout. void updateCFIState(BinaryBasicBlock *Start, const unsigned NumNewBlocks); + /// Return true if we detected ambiguous jump tables in this function, which + /// happen when one JT is used in more than one indirect jumps. This precludes + /// us from splitting edges for this JT unless we duplicate the JT (see + /// disambiguateJumpTables). + bool checkForAmbiguousJumpTables(); + + /// Detect when two distinct indirect jumps are using the same jump table and + /// duplicate it, allocating a separate JT for each indirect branch. This is + /// necessary for code transformations on the CFG that change an edge induced + /// by an indirect branch, e.g.: instrumentation or shrink wrapping. However, + /// this is only possible if we are not updating jump tables in place, but are + /// writing it to a new location (moving them). + void disambiguateJumpTables(); + /// Change \p OrigDest to \p NewDest in the jump table used at the end of /// \p BB. Returns false if \p OrigDest couldn't be find as a valid target /// and no replacement took place. diff --git a/bolt/src/JumpTable.cpp b/bolt/src/JumpTable.cpp index b90e48105781..3ad09cf13566 100644 --- a/bolt/src/JumpTable.cpp +++ b/bolt/src/JumpTable.cpp @@ -47,6 +47,10 @@ JumpTable::JumpTable(StringRef Name, std::pair JumpTable::getEntriesForAddress(const uint64_t Addr) const { + // Check if this is not an address, but a cloned JT id + if ((int64_t)Addr < 0ll) + return std::make_pair(0, Entries.size()); + const uint64_t InstOffset = Addr - getAddress(); size_t StartIndex = 0, EndIndex = 0; uint64_t Offset = 0; @@ -73,13 +77,12 @@ JumpTable::getEntriesForAddress(const uint64_t Addr) const { return std::make_pair(StartIndex, EndIndex); } -bool JumpTable::replaceDestination(uint64_t JTAddress, - const MCSymbol *OldDest, +bool JumpTable::replaceDestination(uint64_t JTAddress, const MCSymbol *OldDest, MCSymbol *NewDest) { bool Patched{false}; const auto Range = getEntriesForAddress(JTAddress); - for (auto I = &Entries[Range.first], E = &Entries[Range.second]; - I != E; ++I) { + for (auto I = &Entries[Range.first], E = &Entries[Range.second]; I != E; + ++I) { auto &Entry = *I; if (Entry == OldDest) { Patched = true; diff --git a/bolt/src/JumpTable.h b/bolt/src/JumpTable.h index 7ab462acad76..84499a66b784 100644 --- a/bolt/src/JumpTable.h +++ b/bolt/src/JumpTable.h @@ -49,7 +49,6 @@ public: JTT_PIC, }; -public: /// Branch statistics for jump table entries. struct JumpInfo { uint64_t Mispreds{0}; diff --git a/bolt/src/MCPlusBuilder.cpp b/bolt/src/MCPlusBuilder.cpp index 1c8162ec5c4f..59543582d227 100644 --- a/bolt/src/MCPlusBuilder.cpp +++ b/bolt/src/MCPlusBuilder.cpp @@ -171,9 +171,8 @@ bool MCPlusBuilder::setJumpTable(MCInst &Inst, uint64_t Value, uint16_t IndexReg) { if (!isIndirectBranch(Inst)) return false; - assert(getJumpTable(Inst) == 0 && "jump table already set"); setAnnotationOpValue(Inst, MCAnnotation::kJumpTable, Value); - addAnnotation<>(Inst, "JTIndexReg", IndexReg); + getOrCreateAnnotationAs(Inst, "JTIndexReg") = IndexReg; return true; } diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index e1e555d60e37..1bb4f884636b 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -28,6 +28,13 @@ cl::opt InstrumentationFilename( cl::init("/tmp/prof.fdata"), cl::Optional, cl::cat(BoltCategory)); + +cl::opt InstrumentHotOnly( + "instrument-hot-only", + cl::desc("only insert instrumentation on hot functions (need profile)"), + cl::init(false), + cl::Optional, + cl::cat(BoltCategory)); } namespace llvm { @@ -138,8 +145,10 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) { uint64_t InstrumentationSitesSavingFlags{0ULL}; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; - if (!Function.isSimple() || !opts::shouldProcess(Function)) + if (!Function.isSimple() || !opts::shouldProcess(Function) + || (opts::InstrumentHotOnly && !Function.getKnownExecutionCount())) continue; + Function.disambiguateJumpTables(); SplitWorklist.clear(); SplitInstrs.clear(); @@ -193,6 +202,7 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) { if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 && BB.size() > 0) { auto *FTBB = BB.getFallthrough(); + assert(FTBB && "expected valid fall-through basic block"); auto I = BB.begin(); auto LastInstr = BB.end(); --LastInstr; diff --git a/bolt/src/Passes/ShrinkWrapping.cpp b/bolt/src/Passes/ShrinkWrapping.cpp index 5800b8285d9f..6e90b9695e06 100644 --- a/bolt/src/Passes/ShrinkWrapping.cpp +++ b/bolt/src/Passes/ShrinkWrapping.cpp @@ -1910,6 +1910,15 @@ bool ShrinkWrapping::perform() { PopOffsetByReg = std::vector(BC.MRI->getNumRegs(), 0LL); DomOrder = std::vector(BC.MRI->getNumRegs(), 0); + if (BF.checkForAmbiguousJumpTables()) { + DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() + << ".\n"); + // We could call disambiguateJumpTables here, but it is probably not worth + // the cost (of duplicating potentially large jump tables that could regress + // dcache misses). Moreover, ambiguous JTs are rare and coming from code + // written in assembly language. Just bail. + return false; + } SLM.initialize(); CSA.compute(); classifyCSRUses(); diff --git a/bolt/src/Target/X86/X86MCPlusBuilder.cpp b/bolt/src/Target/X86/X86MCPlusBuilder.cpp index eacaddb608c0..2549e1710baa 100644 --- a/bolt/src/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/src/Target/X86/X86MCPlusBuilder.cpp @@ -2736,7 +2736,7 @@ public: bool createIncMemory(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) const override { - Inst.setOpcode(X86::INC64m); + Inst.setOpcode(X86::LOCK_INC64m); Inst.clear(); Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt