[BOLT] Delay populating jump tables

Summary:
During the initial disassembly pass, only identify jump tables
without populating the contents. Later, after all functions have been
disassembled, we have a better idea of jump table boundaries and can do
a better job of populating their entries.

As a result, we no longer have embedded jump tables (i.e. a jump table
that is parter of another jump table). If we ever need to keep
sequential jump tables inseparable during the output, we can always
add such functionality later.

Fixes facebookincubator/BOLT#56.

(cherry picked from FBD15800427)
This commit is contained in:
Maksim Panchenko 2019-06-12 18:21:02 -07:00
parent 66cf16208f
commit 9e2ad3f593
5 changed files with 232 additions and 133 deletions

View File

@ -286,6 +286,139 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF) {
return std::make_pair(TargetSymbol, Addend);
}
MemoryContentsType
BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) {
if (!isX86())
return MemoryContentsType::UNKNOWN;
auto Section = getSectionForAddress(Address);
if (!Section) {
// No section - possibly an absolute address. Since we don't allow
// internal function addresses to escape the function scope - we
// consider it a tail call.
if (opts::Verbosity > 1) {
errs() << "BOLT-WARNING: no section for address 0x"
<< Twine::utohexstr(Address) << " referenced from function "
<< BF << '\n';
}
return MemoryContentsType::UNKNOWN;
}
if (Section->isVirtual()) {
// The contents are filled at runtime.
return MemoryContentsType::UNKNOWN;
}
auto couldBeJumpTable = [&](const uint64_t JTAddress,
JumpTable::JumpTableType Type) {
const auto EntrySize =
Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
auto ValueAddress = JTAddress;
auto UpperBound = Section->getEndAddress();
const auto *JumpTableBD = getBinaryDataAtAddress(JTAddress);
if (JumpTableBD && JumpTableBD->getSize()) {
UpperBound = JumpTableBD->getEndAddress();
assert(UpperBound <= Section->getEndAddress() &&
"data object cannot cross a section boundary");
}
while (ValueAddress <= UpperBound - EntrySize) {
DEBUG(dbgs() << "BOLT-DEBUG: analyzing memory at 0x"
<< Twine::utohexstr(ValueAddress));
uint64_t Value;
if (Type == JumpTable::JTT_PIC) {
Value = JTAddress + *getSignedValueAtAddress(ValueAddress, EntrySize);
} else {
Value = *getPointerAtAddress(ValueAddress);
}
DEBUG(dbgs() << ", which contains value 0x"
<< Twine::utohexstr(Value) << '\n');
ValueAddress += EntrySize;
// We assume that a jump table cannot have function start as an entry.
if (BF.containsAddress(Value) && Value != BF.getAddress())
return true;
// Potentially a jump table can contain __builtin_unreachable() entry
// pointing just right after the function. In this case we have to check
// another entry. Otherwise the entry is outside of this function scope
// and it's not a jump table.
if (Value == BF.getAddress() + BF.getSize())
continue;
return false;
}
return false;
};
// Start with checking for PIC jump table. We expect non-PIC jump tables
// to have high 32 bits set to 0.
if (couldBeJumpTable(Address, JumpTable::JTT_PIC))
return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
if (couldBeJumpTable(Address, JumpTable::JTT_NORMAL))
return MemoryContentsType::POSSIBLE_JUMP_TABLE;
return MemoryContentsType::UNKNOWN;
}
void BinaryContext::populateJumpTables() {
for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
++JTI) {
auto *JT = JTI->second;
auto &BF = *JT->Parent;
DEBUG(dbgs() << "BOLT-DEBUG: populating jump table "
<< JT->getName() << '\n');
// The upper bound is defined by containing object, section limits, and
// the next jump table in memory.
auto UpperBound = JT->getSection().getEndAddress();
const auto *JumpTableBD = getBinaryDataAtAddress(JT->getAddress());
if (JumpTableBD && JumpTableBD->getSize()) {
assert(JumpTableBD->getEndAddress() <= UpperBound &&
"data object cannot cross a section boundary");
UpperBound = JumpTableBD->getEndAddress();
}
auto NextJTI = std::next(JTI);
if (NextJTI != JTE) {
assert (UpperBound != JT->getAddress());
UpperBound = std::min(NextJTI->second->getAddress(), UpperBound);
}
for (auto EntryAddress = JT->getAddress();
EntryAddress <= UpperBound - JT->EntrySize;
EntryAddress += JT->EntrySize) {
uint64_t Value;
if (JT->Type == JumpTable::JTT_PIC) {
Value = JT->getAddress() +
*getSignedValueAtAddress(EntryAddress, JT->EntrySize);
} else {
Value = *getPointerAtAddress(EntryAddress);
}
// __builtin_unreachable() case.
if (Value == BF.getAddress() + BF.getSize()) {
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
BF.IgnoredBranches.emplace_back(Value - BF.getAddress(), BF.getSize());
continue;
}
// We assume that a jump table cannot have function start as an entry.
if (BF.containsAddress(Value) && Value != BF.getAddress()) {
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
continue;
}
break;
}
assert(JT->OffsetEntries.size() > 1 &&
"expected more than one jump table entry");
}
}
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix,
uint64_t Size,
@ -316,10 +449,9 @@ BinaryFunction *BinaryContext::createBinaryFunction(
}
std::pair<JumpTable *, const MCSymbol *>
BinaryContext::createJumpTable(BinaryFunction &Function,
uint64_t Address,
JumpTable::JumpTableType Type,
JumpTable::OffsetEntriesType &&OffsetEntries) {
BinaryContext::getOrCreateJumpTable(BinaryFunction &Function,
uint64_t Address,
JumpTable::JumpTableType Type) {
const auto JumpTableName = generateJumpTableName(Function, Address);
if (auto *JT = getJumpTableContainingAddress(Address)) {
assert(JT->Type == Type && "jump table types have to match");
@ -349,14 +481,13 @@ BinaryContext::createJumpTable(BinaryFunction &Function,
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTStartLabel->getName()
<< " in function " << Function << " with "
<< OffsetEntries.size() << " entries\n");
<< " in function " << Function << 'n');
auto *JT = new JumpTable(JumpTableName,
Address,
EntrySize,
Type,
std::move(OffsetEntries),
{},
JumpTable::LabelMapType{{0, JTStartLabel}},
Function,
*getSectionForAddress(Address));

View File

@ -60,6 +60,12 @@ class BinaryFunction;
class BinaryBasicBlock;
class DataReader;
enum class MemoryContentsType : char {
UNKNOWN = 0, /// Unknown contents.
POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
};
/// Helper function to truncate a \p Value to given size in \p Bytes.
inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
@ -209,9 +215,10 @@ public:
if (JTI == JumpTables.begin())
return nullptr;
--JTI;
if (JTI->first + JTI->second->getSize() > Address) {
if (JTI->first + JTI->second->getSize() > Address)
return JTI->second;
if (JTI->second->getSize() == 0 && JTI->first == Address)
return JTI->second;
}
return nullptr;
}
@ -292,10 +299,13 @@ public:
/// May create an embedded jump table and return its label as the second
/// element of the pair.
std::pair<JumpTable *, const MCSymbol *>
createJumpTable(BinaryFunction &Function,
uint64_t Address,
JumpTable::JumpTableType Type,
JumpTable::OffsetEntriesType &&OffsetEntries);
getOrCreateJumpTable(BinaryFunction &Function,
uint64_t Address,
JumpTable::JumpTableType Type);
/// After jump table locations are established, this function will populate
/// their OffsetEntries based on memory contents.
void populateJumpTables();
/// Generate a unique name for jump table at a given \p Address belonging
/// to function \p BF.
@ -486,6 +496,10 @@ public:
std::pair<MCSymbol *, uint64_t> handleAddressRef(uint64_t Address,
BinaryFunction &BF);
/// Analyze memory contents at the given \p Address and return the type of
/// memory contents (such as a possible jump table).
MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
/// Return a value of the global \p Symbol or an error if the value
/// was not set.
ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {

View File

@ -675,8 +675,6 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
uint64_t PCRelAddr = 0;
auto Begin = Instructions.begin();
auto End = Instructions.end();
if (BC.isAArch64()) {
PreserveNops = BC.HasRelocations;
// Start at the last label as an approximation of the current basic block.
@ -693,7 +691,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
auto Type = BC.MIB->analyzeIndirectBranch(Instruction,
Begin,
End,
Instructions.end(),
PtrSize,
MemLocInstr,
BaseRegNum,
@ -765,50 +763,6 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');
// List of possible jump targets.
std::vector<uint64_t> JTOffsetCandidates;
auto useJumpTableForInstruction = [&](JumpTable::JumpTableType JTType) {
JumpTable *JT;
const MCSymbol *JTLabel;
std::tie(JT, JTLabel) = BC.createJumpTable(*this,
ArrayStart,
JTType,
std::move(JTOffsetCandidates));
BC.MIB->replaceMemOperandDisp(const_cast<MCInst &>(*MemLocInstr),
JTLabel, BC.Ctx.get());
BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
JTSites.emplace_back(Offset, ArrayStart);
};
// Check if there's already a jump table registered at this address.
if (auto *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
const auto JTOffset = ArrayStart - JT->getAddress();
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE && JTOffset != 0) {
// Adjust the size of this jump table and create a new one if necessary.
// We cannot re-use the entries since the offsets are relative to the
// table start.
DEBUG(dbgs() << "BOLT-DEBUG: adjusting size of jump table at 0x"
<< Twine::utohexstr(JT->getAddress()) << '\n');
JT->OffsetEntries.resize(JTOffset / JT->EntrySize);
} else if (Type != IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
// Re-use the existing jump table or parts of it.
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JT->Type == JumpTable::JTT_NORMAL &&
"normal jump table expected");
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
} else {
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
}
useJumpTableForInstruction(JT->Type);
return Type;
}
}
auto Section = BC.getSectionForAddress(ArrayStart);
if (!Section) {
// No section - possibly an absolute address. Since we don't allow
@ -826,78 +780,67 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}
// Extract the value at the start of the array.
StringRef SectionContents = Section->getContents();
const auto EntrySize =
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE ? 4 : PtrSize;
DataExtractor DE(SectionContents, BC.AsmInfo->isLittleEndian(), EntrySize);
auto ValueOffset = static_cast<uint32_t>(ArrayStart - Section->getAddress());
uint64_t Value = 0;
auto UpperBound = Section->getSize();
const auto *JumpTableBD = BC.getBinaryDataAtAddress(ArrayStart);
if (JumpTableBD && JumpTableBD->getSize()) {
UpperBound = ValueOffset + JumpTableBD->getSize();
assert(UpperBound <= Section->getSize() &&
"data object cannot cross a section boundary");
}
while (ValueOffset <= UpperBound - EntrySize) {
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
<< Twine::utohexstr(getAddress() + Offset)
<< " is referencing address 0x"
<< Twine::utohexstr(Section->getAddress() + ValueOffset));
// Extract the value and increment the offset.
if (BC.isAArch64()) {
Value = PCRelAddr + DE.getSigned(&ValueOffset, EntrySize);
} else if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
Value = ArrayStart + DE.getSigned(&ValueOffset, 4);
} else {
Value = DE.getAddress(&ValueOffset);
}
DEBUG(dbgs() << ", which contains value "
<< Twine::utohexstr(Value) << '\n');
if (Type == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
if (Section->isReadOnly()) {
outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " the destination value is 0x" << Twine::utohexstr(Value)
<< '\n';
TargetAddress = Value;
return Type;
}
if (Type == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
auto Value = BC.getPointerAtAddress(ArrayStart);
if (!Value)
return IndirectBranchType::UNKNOWN;
}
if (containsAddress(Value) && Value != getAddress()) {
// Is it possible to have a jump table with function start as an entry?
JTOffsetCandidates.push_back(Value - getAddress());
if (Type == IndirectBranchType::UNKNOWN)
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
continue;
}
// Potentially a switch table can contain __builtin_unreachable() entry
// pointing just right after the function. In this case we have to check
// another entry. Otherwise the entry is outside of this function scope
// and it's not a switch table.
if (Value == getAddress() + getSize()) {
JTOffsetCandidates.push_back(getSize());
IgnoredBranches.emplace_back(Offset, getSize());
} else {
break;
}
}
if (Type == IndirectBranchType::POSSIBLE_JUMP_TABLE ||
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JTOffsetCandidates.size() > 1 &&
"expected more than one jump table entry");
const auto JumpTableType = Type == IndirectBranchType::POSSIBLE_JUMP_TABLE
? JumpTable::JTT_NORMAL
: JumpTable::JTT_PIC;
useJumpTableForInstruction(JumpTableType);
if (!BC.getSectionForAddress(ArrayStart)->isReadOnly())
return IndirectBranchType::UNKNOWN;
outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " referencing data at 0x" << Twine::utohexstr(ArrayStart)
<< " the destination value is 0x" << Twine::utohexstr(*Value)
<< '\n';
TargetAddress = *Value;
return Type;
}
auto useJumpTableForInstruction = [&](JumpTable::JumpTableType JTType) {
JumpTable *JT;
const MCSymbol *JTLabel;
std::tie(JT, JTLabel) = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
BC.MIB->replaceMemOperandDisp(const_cast<MCInst &>(*MemLocInstr),
JTLabel, BC.Ctx.get());
BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
JTSites.emplace_back(Offset, ArrayStart);
};
// Check if there's already a jump table registered at this address.
// At this point, all jump tables are empty.
if (auto *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
// Make sure the type of the table matches the code.
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
} else {
assert(JT->Type == JumpTable::JTT_NORMAL && "normal jump table expected");
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
}
useJumpTableForInstruction(JT->Type);
return Type;
}
const auto MemType = BC.analyzeMemoryAt(ArrayStart, *this);
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE &&
"PIC jump table heuristic failure");
useJumpTableForInstruction(JumpTable::JTT_PIC);
return Type;
}
if (MemType == MemoryContentsType::POSSIBLE_JUMP_TABLE) {
assert(Type == IndirectBranchType::UNKNOWN &&
"non-PIC jump table heuristic failure");
useJumpTableForInstruction(JumpTable::JTT_NORMAL);
return IndirectBranchType::POSSIBLE_JUMP_TABLE;
}
// We have a possible tail call, so let's add the value read from the possible
// memory location as a reference. Only do that if the address we read is sane
// enough (is inside an allocatable section). It is possible that we read
@ -905,8 +848,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
// than the one where the indirect jump is. However, later,
// postProcessIndirectBranches() is going to mark the function as non-simple
// in this case.
if (Value && BC.getSectionForAddress(Value))
BC.InterproceduralReferences.insert(std::make_pair(this, Value));
auto Value = BC.getPointerAtAddress(ArrayStart);
if (Value && BC.getSectionForAddress(*Value))
BC.InterproceduralReferences.insert(std::make_pair(this, *Value));
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}
@ -1307,7 +1251,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
break;
};
}
// Indirect call. We only need to fix it if the operand is RIP-relative
// Indirect call. We only need to fix it if the operand is RIP-relative.
if (IsSimple && MIB->hasPCRelOperand(Instruction)) {
if (!handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size)) {
errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
@ -1371,8 +1315,6 @@ add_instruction:
updateState(State::Disassembled);
postProcessEntryPoints();
postProcessJumpTables();
}
void BinaryFunction::postProcessEntryPoints() {

View File

@ -895,9 +895,10 @@ public:
if (JTI == JumpTables.begin())
return nullptr;
--JTI;
if (JTI->first + JTI->second->getSize() > Address) {
if (JTI->first + JTI->second->getSize() > Address)
return JTI->second;
if (JTI->second->getSize() == 0 && JTI->first == Address)
return JTI->second;
}
return nullptr;
}

View File

@ -2644,6 +2644,17 @@ void RewriteInstance::disassembleFunctions() {
BC->processInterproceduralReferences();
}
BC->populateJumpTables();
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!shouldDisassemble(Function))
continue;
Function.postProcessJumpTables();
}
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;