forked from OSchip/llvm-project
[BOLT] Add code padding verification
Summary: In non-relocation mode, we allow data objects to be embedded in the code. Such objects could be unmarked, and could occupy an area between functions, the area which is considered to be code padding. When we disassemble code, we detect references into the padding area and adjust it, so that it is not overwritten during the code emission. We assume the reference to be pointing to the beginning of the object. However, assembly-written functions may reference the middle of an object and use negative offsets to reference data fields. Thus, conservatively, we reduce the possibly-overwritten padding area to a minimum if the object reference was detected. Since we also allow functions with unknown code in non-relocation mode, it is possible that we miss references to some objects in code. To cover such cases, we need to verify the padding area before we allow to overwrite it. (cherry picked from FBD16477787)
This commit is contained in:
parent
6722875047
commit
a9b9aa1e02
|
@ -26,6 +26,7 @@
|
|||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -603,6 +604,111 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
|
|||
(Offset ? ("." + std::to_string(Offset)) : ""));
|
||||
}
|
||||
|
||||
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
|
||||
// FIXME: aarch64 support is missing.
|
||||
if (!isX86())
|
||||
return true;
|
||||
|
||||
if (BF.getSize() == BF.getMaxSize())
|
||||
return true;
|
||||
|
||||
auto FunctionData = getFunctionData(BF);
|
||||
assert(FunctionData && "cannot get function as data");
|
||||
|
||||
uint64_t Offset = BF.getSize();
|
||||
MCInst Instr;
|
||||
uint64_t InstrSize{0};
|
||||
uint64_t InstrAddress = BF.getAddress() + Offset;
|
||||
using std::placeholders::_1;
|
||||
|
||||
// Skip instructions that satisfy the predicate condition.
|
||||
auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
|
||||
const auto StartOffset = Offset;
|
||||
for (; Offset < BF.getMaxSize();
|
||||
Offset += InstrSize, InstrAddress += InstrSize) {
|
||||
if (!DisAsm->getInstruction(Instr,
|
||||
InstrSize,
|
||||
FunctionData->slice(Offset),
|
||||
InstrAddress,
|
||||
nulls(),
|
||||
nulls()))
|
||||
break;
|
||||
if (!Predicate(Instr))
|
||||
break;
|
||||
}
|
||||
|
||||
return Offset - StartOffset;
|
||||
};
|
||||
|
||||
// Skip a sequence of zero bytes.
|
||||
auto skipZeros = [&]() {
|
||||
const auto StartOffset = Offset;
|
||||
for (; Offset < BF.getMaxSize(); ++Offset)
|
||||
if ((*FunctionData)[Offset] != 0)
|
||||
break;
|
||||
|
||||
return Offset - StartOffset;
|
||||
};
|
||||
|
||||
// Accept the whole padding area filled with breakpoints.
|
||||
auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
|
||||
if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
|
||||
return true;
|
||||
|
||||
auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
|
||||
|
||||
// Some functions have a jump to the next function or to the padding area
|
||||
// inserted after the body.
|
||||
auto isSkipJump = [&](const MCInst &Instr) {
|
||||
uint64_t TargetAddress{0};
|
||||
if (MIB->isUnconditionalBranch(Instr) &&
|
||||
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
|
||||
if (TargetAddress >= InstrAddress + InstrSize &&
|
||||
TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
|
||||
while (skipInstructions(isNoop) ||
|
||||
skipInstructions(isSkipJump) ||
|
||||
skipZeros())
|
||||
;
|
||||
|
||||
if (Offset == BF.getMaxSize())
|
||||
return true;
|
||||
|
||||
if (opts::Verbosity >= 1) {
|
||||
errs() << "BOLT-WARNING: bad padding at address 0x"
|
||||
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
|
||||
<< " starting at offset "
|
||||
<< (Offset - BF.getSize()) << " in function "
|
||||
<< BF << '\n';
|
||||
for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) {
|
||||
errs() << format("%.2x ", (*FunctionData)[I]);
|
||||
}
|
||||
errs() << '\n';
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BinaryContext::adjustCodePadding() {
|
||||
assert(!HasRelocations && "cannot adjust padding in relocation mode");
|
||||
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &BF = BFI.second;
|
||||
if (!BF.isSimple())
|
||||
continue;
|
||||
|
||||
if (!hasValidCodePadding(BF)) {
|
||||
BF.setMaxSize(BF.getSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
|
||||
uint64_t Address,
|
||||
uint64_t Size,
|
||||
|
@ -856,8 +962,7 @@ void BinaryContext::processInterproceduralReferences() {
|
|||
errs() << "BOLT-WARNING: function " << *ContainingFunction
|
||||
<< " has an object detected in a padding region at address 0x"
|
||||
<< Twine::utohexstr(Addr) << '\n';
|
||||
ContainingFunction->setMaxSize(Addr -
|
||||
ContainingFunction->getAddress());
|
||||
ContainingFunction->setMaxSize(ContainingFunction->getSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1422,7 +1527,7 @@ void BinaryContext::printInstruction(raw_ostream &OS,
|
|||
ErrorOr<ArrayRef<uint8_t>>
|
||||
BinaryContext::getFunctionData(const BinaryFunction &Function) const {
|
||||
auto &Section = Function.getSection();
|
||||
assert(Section.containsRange(Function.getAddress(), Function.getSize()) &&
|
||||
assert(Section.containsRange(Function.getAddress(), Function.getMaxSize()) &&
|
||||
"wrong section for function");
|
||||
|
||||
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
|
||||
|
@ -1437,7 +1542,7 @@ BinaryContext::getFunctionData(const BinaryFunction &Function) const {
|
|||
// Function offset from the section start.
|
||||
auto FunctionOffset = Function.getAddress() - Section.getAddress();
|
||||
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
|
||||
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getSize());
|
||||
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getMaxSize());
|
||||
}
|
||||
|
||||
ErrorOr<BinarySection&> BinaryContext::getSectionForAddress(uint64_t Address) {
|
||||
|
|
|
@ -338,6 +338,13 @@ public:
|
|||
/// to function \p BF.
|
||||
std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
|
||||
|
||||
/// Return true if the array of bytes represents a valid code padding.
|
||||
bool hasValidCodePadding(const BinaryFunction &BF);
|
||||
|
||||
/// Verify padding area between functions, and adjust max function size
|
||||
/// accordingly.
|
||||
void adjustCodePadding();
|
||||
|
||||
/// Regular page size.
|
||||
static constexpr unsigned RegularPageSize = 0x1000;
|
||||
|
||||
|
|
|
@ -898,7 +898,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
|
||||
"Build Binary Functions", opts::TimeBuild);
|
||||
|
||||
assert(FunctionData.size() == getSize() &&
|
||||
assert(FunctionData.size() == getMaxSize() &&
|
||||
"function size does not match raw data size");
|
||||
|
||||
auto &Ctx = BC.Ctx;
|
||||
|
|
|
@ -463,6 +463,11 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
virtual bool isBreakpoint(const MCInst &Inst) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool isPrefix(const MCInst &Inst) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
|
|
|
@ -2731,6 +2731,9 @@ void RewriteInstance::disassembleFunctions() {
|
|||
Function.postProcessJumpTables();
|
||||
}
|
||||
|
||||
if (!BC->HasRelocations)
|
||||
BC->adjustCodePadding();
|
||||
|
||||
for (auto &BFI : BC->getBinaryFunctions()) {
|
||||
BinaryFunction &Function = BFI.second;
|
||||
|
||||
|
|
|
@ -474,12 +474,20 @@ public:
|
|||
switch (Inst.getOpcode()) {
|
||||
case X86::NOOP:
|
||||
case X86::NOOPL:
|
||||
case X86::NOOPLr:
|
||||
case X86::NOOPQ:
|
||||
case X86::NOOPQr:
|
||||
case X86::NOOPW:
|
||||
case X86::NOOPWr:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isBreakpoint(const MCInst &Inst) const override {
|
||||
return Inst.getOpcode() == X86::INT3;
|
||||
}
|
||||
|
||||
bool isPrefix(const MCInst &Inst) const override {
|
||||
switch (Inst.getOpcode()) {
|
||||
case X86::LOCK_PREFIX:
|
||||
|
|
Loading…
Reference in New Issue