[BOLT] Create OffsetTranslationTable for basic blocks

Summary:
Use BinaryBasicBlock::OffsetTranslationTable for BAT. This removes
dependency on instructions after the code emission.

(cherry picked from FBD18283965)
This commit is contained in:
Maksim Panchenko 2019-11-01 16:19:45 -07:00
parent a1388308f0
commit 98e63610b1
5 changed files with 63 additions and 65 deletions

View File

@ -81,11 +81,23 @@ private:
MCSymbol *Label{nullptr};
/// [Begin, End) address range for this block in the output binary.
std::pair<uint64_t, uint64_t> OutputAddressRange{0, 0};
std::pair<uint32_t, uint32_t> OutputAddressRange{0, 0};
/// Original offset range of the basic block in the function.
std::pair<uint32_t, uint32_t> InputRange{INVALID_OFFSET, INVALID_OFFSET};
/// Map input offset of an instruction to an output symbol. Enables writing
/// bolt address translation tables, used for mapping control transfer in the
/// output binary back to the original binary.
using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
std::unique_ptr<LocSymsTy> LocSyms;
/// Map input offsets in the basic block to output offsets.
///
/// NOTE: map only instruction of interest, such as calls.
using OffsetTranslationTableTy = std::vector<std::pair<uint16_t, uint16_t>>;
std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;
/// Alignment requirements for the block.
uint32_t Alignment{1};
@ -816,6 +828,28 @@ public:
return OutputAddressRange;
}
/// Return mapping of input offsets to symbols in the output.
LocSymsTy &getLocSyms() {
return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>());
}
/// Return mapping of input offsets to symbols in the output.
const LocSymsTy &getLocSyms() const {
return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
}
/// Return offset translation table for the basic block.
OffsetTranslationTableTy &getOffsetTranslationTable() {
return OffsetTranslationTable ?
*OffsetTranslationTable :
*(OffsetTranslationTable = std::make_unique<OffsetTranslationTableTy>());
}
/// Return offset translation table for the basic block.
const OffsetTranslationTableTy &getOffsetTranslationTable() const {
return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable();
}
/// Return size of the basic block in the output binary.
uint64_t getOutputSize() const {
return OutputAddressRange.second - OutputAddressRange.first;
@ -937,6 +971,9 @@ private:
}
};
/// Keep the size of the BinaryBasicBlock within a reasonable size class.
static_assert(sizeof(BinaryBasicBlock) <= 256, "");
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
} // namespace bolt

View File

@ -2743,12 +2743,10 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,
// the location of calls/returns for BOLT address translation maps
if (!EmitCodeOnly && LabelsForOffsets &&
BC.MIB->hasAnnotation(Instr, "Offset")) {
const auto Offset = BC.MIB->getAnnotationAs<uint32_t>(Instr, "Offset");
MCSymbol *LocSym = BC.Ctx->createTempSymbol(/*CanBeUnnamed=*/true);
Streamer.EmitLabel(LocSym);
BC.MIB->addAnnotation(Instr, "LocSym",
static_cast<uint32_t>(LocSyms.size()));
LocSyms.push_back(LocSym);
BB->getLocSyms().emplace_back(std::make_pair(Offset, LocSym));
}
// Emit SDT labels

View File

@ -542,12 +542,6 @@ private:
/// Count the number of functions created.
static uint64_t Count;
/// LocSym annotation records an index to this vector. This holds a label
/// for each instruction whose input/output offsets need to be known after
/// emission. Enables writing bolt address translation tables, used for
/// mapping control transfer in the output binary back to the original binary.
std::vector<const MCSymbol *> LocSyms;
/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Names.emplace_back(NewName);
@ -772,13 +766,6 @@ public:
return *this;
}
/// Return a symbol for an instruction location. \p Idx is recorded as an
/// annotation in the instruction.
const MCSymbol *getLocSym(size_t Idx) const {
assert(Idx < LocSyms.size() && "Invalid index");
return LocSyms[Idx];
}
/// Update layout of basic blocks used for output.
void updateBasicBlockLayout(BasicBlockOrderType &NewLayout) {
BasicBlocksPreviousLayout = BasicBlocksLayout;

View File

@ -23,50 +23,31 @@ const char* BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncAddress) {
const uint64_t Key = BB.getOutputAddressRange().first - FuncAddress;
const uint64_t Val = BB.getInputOffset();
const auto BBOutputOffset = BB.getOutputAddressRange().first - FuncAddress;
const auto BBInputOffset = BB.getInputOffset();
assert(Val != BinaryBasicBlock::INVALID_OFFSET &&
assert(BBInputOffset != BinaryBasicBlock::INVALID_OFFSET &&
"Every output BB must track back to an input BB for profile "
"collection in bolted binaries");
DEBUG(dbgs() << "BB " << BB.getName() <<"\n");
DEBUG(dbgs() << " Key: " << Twine::utohexstr(Key)
<< " Val: " << Twine::utohexstr(Val) << "\n");
DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
// NOPs or a jump to the next block (successor). Either way, the successor
// and this deleted block will both share the same output address (the same key),
// and we need to map back. We choose here to privilege the successor by
// and this deleted block will both share the same output address (the same
// key), and we need to map back. We choose here to privilege the successor by
// allowing it to overwrite the previously inserted key in the map.
Map[Key] = Val;
Map[BBOutputOffset] = BBInputOffset;
// Look for special instructions we are interested in mapping offsets. These
// are key instructions for the profile identified by
// BC.keepOffsetForInstruction(Inst) and are instructions that cause control
// flow change. We also record offsets for the last instruction in the BB in
// some cases. These are harmless for BAT writing purposes, besides increasing
// the size of the table unnecessarily.
for (const auto &Inst : BB) {
if (!BC.MIB->hasAnnotation(Inst, "LocSym"))
continue;
const auto OutputOffset =
BC.MIB->getAnnotationAs<uint32_t>(Inst, "LocSym") - FuncAddress;
for (const auto &IOPair : BB.getOffsetTranslationTable()) {
const auto InputOffset = IOPair.first + BBInputOffset;
const auto OutputOffset = IOPair.second + BBOutputOffset;
auto InputOffsetOrErr = BC.MIB->tryGetAnnotationAs<uint32_t>(Inst, "Offset");
DEBUG(if (!InputOffsetOrErr) {
auto *Function = BB.getFunction();
dbgs() << "Function: " << Function->getPrintName()
<< " BB: " << BB.getName() << " lacking annotation for: ";
BC.printInstruction(dbgs(), Inst);
dbgs() << "\n";
});
assert(InputOffsetOrErr && "Expected annotation with input offset");
const auto InputOffset = *InputOffsetOrErr;
// Is this the first instruction in the BB? No need to duplicate the entry
if (Key == OutputOffset)
// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
continue;
DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset)

View File

@ -3608,7 +3608,7 @@ void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
}
// Update basic block output ranges for the debug info, if we have
// secondary entry points in the symbol table to update or if writing BAT
// secondary entry points in the symbol table to update or if writing BAT.
if (!opts::UpdateDebugSections && !Function.isMultiEntry() &&
!opts::EnableBAT)
return;
@ -3634,7 +3634,8 @@ void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
assert(BBBaseAddress == Function.getOutputAddress());
}
}
auto BBAddress = BBBaseAddress + Layout.getSymbolOffset(*BB->getLabel());
const auto BBOffset = Layout.getSymbolOffset(*BB->getLabel());
const auto BBAddress = BBBaseAddress + BBOffset;
BB->setOutputStartAddress(BBAddress);
if (PrevBB) {
@ -3647,19 +3648,13 @@ void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
}
PrevBB = BB;
if (!opts::EnableBAT)
continue;
// Record location of special instrs that require an offset for profile
// assignment when writing BOLT address translation table
for (auto &Inst : *BB) {
if (!BC->MIB->hasAnnotation(Inst, "LocSym"))
continue;
uint32_t &SymIdx = BC->MIB->getAnnotationAs<uint32_t>(Inst, "LocSym");
const MCSymbol *LocSym = Function.getLocSym(SymIdx);
const auto CallOffset =
BBBaseAddress + Layout.getSymbolOffset(*LocSym);
SymIdx = CallOffset;
for (const auto &LocSymKV : BB->getLocSyms()) {
const uint16_t InputOffset =
static_cast<uint16_t>(LocSymKV.first - BB->getInputOffset());
const uint16_t OutputOffset = static_cast<uint16_t>(
Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
BB->getOffsetTranslationTable().emplace_back(
std::make_pair(InputOffset, OutputOffset));
}
}
PrevBB->setOutputEndAddress(PrevBB->isCold() ?