[BOLT] Add code padding verification

Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.

When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.

However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.

Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.

(cherry picked from FBD16477787)
This commit is contained in:
Maksim Panchenko 2019-07-23 20:48:41 -07:00
parent 6722875047
commit a9b9aa1e02
6 changed files with 133 additions and 5 deletions

View File

@ -26,6 +26,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include <functional>
#include <iterator>
using namespace llvm;
@ -603,6 +604,111 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
(Offset ? ("." + std::to_string(Offset)) : ""));
}
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
// FIXME: aarch64 support is missing.
if (!isX86())
return true;
if (BF.getSize() == BF.getMaxSize())
return true;
auto FunctionData = getFunctionData(BF);
assert(FunctionData && "cannot get function as data");
uint64_t Offset = BF.getSize();
MCInst Instr;
uint64_t InstrSize{0};
uint64_t InstrAddress = BF.getAddress() + Offset;
using std::placeholders::_1;
// Skip instructions that satisfy the predicate condition.
auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
const auto StartOffset = Offset;
for (; Offset < BF.getMaxSize();
Offset += InstrSize, InstrAddress += InstrSize) {
if (!DisAsm->getInstruction(Instr,
InstrSize,
FunctionData->slice(Offset),
InstrAddress,
nulls(),
nulls()))
break;
if (!Predicate(Instr))
break;
}
return Offset - StartOffset;
};
// Skip a sequence of zero bytes.
auto skipZeros = [&]() {
const auto StartOffset = Offset;
for (; Offset < BF.getMaxSize(); ++Offset)
if ((*FunctionData)[Offset] != 0)
break;
return Offset - StartOffset;
};
// Accept the whole padding area filled with breakpoints.
auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
return true;
auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
// Some functions have a jump to the next function or to the padding area
// inserted after the body.
auto isSkipJump = [&](const MCInst &Instr) {
uint64_t TargetAddress{0};
if (MIB->isUnconditionalBranch(Instr) &&
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
if (TargetAddress >= InstrAddress + InstrSize &&
TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
return true;
}
}
return false;
};
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
while (skipInstructions(isNoop) ||
skipInstructions(isSkipJump) ||
skipZeros())
;
if (Offset == BF.getMaxSize())
return true;
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: bad padding at address 0x"
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
<< " starting at offset "
<< (Offset - BF.getSize()) << " in function "
<< BF << '\n';
for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) {
errs() << format("%.2x ", (*FunctionData)[I]);
}
errs() << '\n';
}
return false;
}
void BinaryContext::adjustCodePadding() {
assert(!HasRelocations && "cannot adjust padding in relocation mode");
for (auto &BFI : BinaryFunctions) {
auto &BF = BFI.second;
if (!BF.isSimple())
continue;
if (!hasValidCodePadding(BF)) {
BF.setMaxSize(BF.getSize());
}
}
}
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
uint64_t Address,
uint64_t Size,
@ -856,8 +962,7 @@ void BinaryContext::processInterproceduralReferences() {
errs() << "BOLT-WARNING: function " << *ContainingFunction
<< " has an object detected in a padding region at address 0x"
<< Twine::utohexstr(Addr) << '\n';
ContainingFunction->setMaxSize(Addr -
ContainingFunction->getAddress());
ContainingFunction->setMaxSize(ContainingFunction->getSize());
}
}
}
@ -1422,7 +1527,7 @@ void BinaryContext::printInstruction(raw_ostream &OS,
ErrorOr<ArrayRef<uint8_t>>
BinaryContext::getFunctionData(const BinaryFunction &Function) const {
auto &Section = Function.getSection();
assert(Section.containsRange(Function.getAddress(), Function.getSize()) &&
assert(Section.containsRange(Function.getAddress(), Function.getMaxSize()) &&
"wrong section for function");
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
@ -1437,7 +1542,7 @@ BinaryContext::getFunctionData(const BinaryFunction &Function) const {
// Function offset from the section start.
auto FunctionOffset = Function.getAddress() - Section.getAddress();
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getSize());
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getMaxSize());
}
ErrorOr<BinarySection&> BinaryContext::getSectionForAddress(uint64_t Address) {

View File

@ -338,6 +338,13 @@ public:
/// to function \p BF.
std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
/// Return true if the array of bytes represents a valid code padding.
bool hasValidCodePadding(const BinaryFunction &BF);
/// Verify padding area between functions, and adjust max function size
/// accordingly.
void adjustCodePadding();
/// Regular page size.
static constexpr unsigned RegularPageSize = 0x1000;

View File

@ -898,7 +898,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
"Build Binary Functions", opts::TimeBuild);
assert(FunctionData.size() == getSize() &&
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");
auto &Ctx = BC.Ctx;

View File

@ -463,6 +463,11 @@ public:
return false;
}
virtual bool isBreakpoint(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}
virtual bool isPrefix(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;

View File

@ -2731,6 +2731,9 @@ void RewriteInstance::disassembleFunctions() {
Function.postProcessJumpTables();
}
if (!BC->HasRelocations)
BC->adjustCodePadding();
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;

View File

@ -474,12 +474,20 @@ public:
switch (Inst.getOpcode()) {
case X86::NOOP:
case X86::NOOPL:
case X86::NOOPLr:
case X86::NOOPQ:
case X86::NOOPQr:
case X86::NOOPW:
case X86::NOOPWr:
return true;
}
return false;
}
bool isBreakpoint(const MCInst &Inst) const override {
return Inst.getOpcode() == X86::INT3;
}
bool isPrefix(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case X86::LOCK_PREFIX: