forked from OSchip/llvm-project
New function discovery and support for multiple entries.
Summary: Modified function discovery process to tolerate more functions and symbols coming from assembly. The processing order now matches the memory order of the functions (input symbol table is unsorted). Added basic support for functions with multiple entries. When a function references its internal address other than with a branch instruction, that address could potentially escape. We mark such addresses as entry points and make sure they are treated as roots by unreachable code elimination. Without relocations we have to mark multiple-entry functions as non-simple. (cherry picked from FBD3950243)
This commit is contained in:
parent
9cf5d74ffb
commit
e241e9c156
|
@ -84,6 +84,10 @@ private:
|
|||
/// Number of pseudo instructions in this block.
|
||||
uint32_t NumPseudos{0};
|
||||
|
||||
/// True if this basic block is (potentially) an external entry point into
|
||||
/// the function.
|
||||
bool IsEntryPoint{false};
|
||||
|
||||
/// In cases where the parent function has been split, IsCold == true means
|
||||
/// this BB will be allocated outside its parent function.
|
||||
bool IsCold{false};
|
||||
|
@ -469,6 +473,14 @@ public:
|
|||
ExecutionCount = Count;
|
||||
}
|
||||
|
||||
bool isEntryPoint() const {
|
||||
return IsEntryPoint;
|
||||
}
|
||||
|
||||
void setEntryPoint(bool Value = true) {
|
||||
IsEntryPoint = Value;
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return IsValid;
|
||||
}
|
||||
|
@ -476,7 +488,7 @@ public:
|
|||
void markValid(const bool Valid) {
|
||||
IsValid = Valid;
|
||||
}
|
||||
|
||||
|
||||
bool isCold() const {
|
||||
return IsCold;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,17 @@ MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
|||
return Symbol;
|
||||
}
|
||||
|
||||
MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
|
||||
auto NI = GlobalAddresses.find(Address);
|
||||
if (NI == GlobalAddresses.end())
|
||||
return nullptr;
|
||||
|
||||
auto *Symbol = Ctx->lookupSymbol(NI->second);
|
||||
assert(Symbol && "symbol cannot be NULL at this point");
|
||||
|
||||
return Symbol;
|
||||
}
|
||||
|
||||
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
|
||||
for (auto &entry : GlobalSymbols) {
|
||||
OS << "(" << entry.first << " -> " << entry.second << ")\n";
|
||||
|
|
|
@ -164,6 +164,10 @@ public:
|
|||
/// return the first one.
|
||||
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix);
|
||||
|
||||
/// Return MCSymbol registered at a given \p Address or nullptr if no
|
||||
/// global symbol was registered at the location.
|
||||
MCSymbol *getGlobalSymbolAtAddress(uint64_t Address) const;
|
||||
|
||||
/// Print the global symbol table.
|
||||
void printGlobalSymbols(raw_ostream& OS) const;
|
||||
|
||||
|
@ -177,6 +181,9 @@ public:
|
|||
|
||||
// Add to the reverse map. There could multiple names at the same address.
|
||||
GlobalAddresses.emplace(std::make_pair(Address, Name));
|
||||
|
||||
// Register the name with MCContext.
|
||||
Ctx->getOrCreateSymbol(Name);
|
||||
}
|
||||
|
||||
const BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol) const {
|
||||
|
|
|
@ -191,18 +191,14 @@ BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const {
|
|||
|
||||
void BinaryFunction::markUnreachable() {
|
||||
std::stack<BinaryBasicBlock *> Stack;
|
||||
BinaryBasicBlock *Entry = *layout_begin();
|
||||
|
||||
for (auto *BB : layout()) {
|
||||
BB->markValid(false);
|
||||
}
|
||||
|
||||
Stack.push(Entry);
|
||||
Entry->markValid(true);
|
||||
|
||||
// Treat landing pads as roots.
|
||||
// Add all entries and landing pads as roots.
|
||||
for (auto *BB : BasicBlocks) {
|
||||
if (BB->isLandingPad()) {
|
||||
if (BB->isEntryPoint() || BB->isLandingPad()) {
|
||||
Stack.push(BB);
|
||||
BB->markValid(true);
|
||||
}
|
||||
|
@ -227,15 +223,14 @@ std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
|
|||
BasicBlockOrderType NewLayout;
|
||||
unsigned Count = 0;
|
||||
uint64_t Bytes = 0;
|
||||
assert(BasicBlocksLayout.front()->isValid() &&
|
||||
"unable to remove an entry basic block");
|
||||
for (auto I = BasicBlocksLayout.begin(), E = BasicBlocksLayout.end(); I != E;
|
||||
++I) {
|
||||
if ((*I)->isValid()) {
|
||||
NewLayout.push_back(*I);
|
||||
for (auto *BB : layout()) {
|
||||
assert((!BB->isEntryPoint() || BB->isValid()) &&
|
||||
"all entry blocks must be valid");
|
||||
if (BB->isValid()) {
|
||||
NewLayout.push_back(BB);
|
||||
} else {
|
||||
++Count;
|
||||
Bytes += BC.computeCodeSize((*I)->begin(), (*I)->end());
|
||||
Bytes += BC.computeCodeSize(BB->begin(), BB->end());
|
||||
}
|
||||
}
|
||||
BasicBlocksLayout = std::move(NewLayout);
|
||||
|
@ -351,9 +346,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
|||
<< BB->size() << " instructions, align : "
|
||||
<< BB->getAlignment() << ")\n";
|
||||
|
||||
if (BB->isLandingPad()) {
|
||||
if (BB->isEntryPoint())
|
||||
OS << " Entry Point\n";
|
||||
|
||||
if (BB->isLandingPad())
|
||||
OS << " Landing Pad\n";
|
||||
}
|
||||
|
||||
uint64_t BBExecCount = BB->getExecutionCount();
|
||||
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
||||
|
@ -498,8 +495,8 @@ BinaryFunction::analyzeIndirectBranch(MCInst &Instruction,
|
|||
//
|
||||
// (with any irrelevant instructions in-between)
|
||||
//
|
||||
// When we call this helper we've already determined %r1 and %r2, and reverse
|
||||
// instruction iterator \p II is pointing to the ADD instruction.
|
||||
// When we call this helper we've already determined %r1 and %r2, and
|
||||
// reverse instruction iterator \p II is pointing to the ADD instruction.
|
||||
//
|
||||
// PIC jump table looks like following:
|
||||
//
|
||||
|
@ -722,7 +719,8 @@ BinaryFunction::analyzeIndirectBranch(MCInst &Instruction,
|
|||
} else {
|
||||
// Re-use an existing jump table. Perhaps parts of it.
|
||||
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
|
||||
assert(JT->Type == JumpTable::JTT_NORMAL && "normal jump table expected");
|
||||
assert(JT->Type == JumpTable::JTT_NORMAL &&
|
||||
"normal jump table expected");
|
||||
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
||||
} else {
|
||||
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
|
||||
|
@ -840,7 +838,15 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
|
|||
if ((Offset == getSize()) && CreatePastEnd)
|
||||
return getFunctionEndLabel();
|
||||
|
||||
assert(Offset < getSize() && "address outside of function bounds");
|
||||
// Check if there's a global symbol registered at given address.
|
||||
// If so - reuse it since we want to keep the symbol value updated.
|
||||
if (Offset != 0) {
|
||||
if (auto *Symbol = BC.getGlobalSymbolAtAddress(Address)) {
|
||||
Labels[Offset] = Symbol;
|
||||
return Symbol;
|
||||
}
|
||||
}
|
||||
|
||||
auto LI = Labels.find(Offset);
|
||||
if (LI == Labels.end()) {
|
||||
Result = BC.Ctx->createTempSymbol();
|
||||
|
@ -863,6 +869,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
// Insert a label at the beginning of the function. This will be our first
|
||||
// basic block.
|
||||
Labels[0] = Ctx->createTempSymbol("BB0", false);
|
||||
addEntryPointAtOffset(0);
|
||||
|
||||
auto handleRIPOperand =
|
||||
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
|
||||
|
@ -890,7 +897,14 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
auto Section = BC.getSectionForAddress(TargetAddress);
|
||||
if (Section && Section->isText()) {
|
||||
if (containsAddress(TargetAddress)) {
|
||||
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
|
||||
if (TargetAddress != getAddress()) {
|
||||
// The address could potentially escape. Mark it as another entry
|
||||
// point into the function.
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: potentially escaped address 0x"
|
||||
<< Twine::utohexstr(TargetAddress) << " in function "
|
||||
<< *this << '\n');
|
||||
TargetSymbol = addEntryPointAtOffset(TargetAddress - getAddress());
|
||||
}
|
||||
} else {
|
||||
BC.InterproceduralReferences.insert(TargetAddress);
|
||||
}
|
||||
|
@ -1338,6 +1352,8 @@ bool BinaryFunction::buildCFG() {
|
|||
PrevBB = InsertBB;
|
||||
InsertBB = addBasicBlock(LI->first, LI->second,
|
||||
/* DeriveAlignment = */ IsLastInstrNop);
|
||||
if (hasEntryPointAtOffset(Offset))
|
||||
InsertBB->setEntryPoint();
|
||||
}
|
||||
// Ignore nops. We use nops to derive alignment of the next basic block.
|
||||
// It will not always work, as some blocks are naturally aligned, but
|
||||
|
@ -1618,6 +1634,7 @@ bool BinaryFunction::buildCFG() {
|
|||
clearList(TakenBranches);
|
||||
clearList(FTBranches);
|
||||
clearList(LPToBBIndex);
|
||||
clearList(EntryOffsets);
|
||||
|
||||
// Update the state.
|
||||
CurrentState = State::CFG;
|
||||
|
@ -2974,6 +2991,25 @@ void BinaryFunction::updateLayout(LayoutType Type,
|
|||
modifyLayout(Type, MinBranchClusters, Split);
|
||||
}
|
||||
|
||||
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
||||
uint64_t SymbolSize) const {
|
||||
// Some symbols are tolerated inside function bodies, others are not.
|
||||
// The real function boundaries may not be known at this point.
|
||||
|
||||
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
||||
// function.
|
||||
if (SymbolSize == 0 && containsAddress(*Symbol.getAddress()))
|
||||
return true;
|
||||
|
||||
if (Symbol.getType() != SymbolRef::ST_Unknown)
|
||||
return false;
|
||||
|
||||
if (Symbol.getFlags() & SymbolRef::SF_Global)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BinaryFunction::~BinaryFunction() {
|
||||
for (auto BB : BasicBlocks) {
|
||||
delete BB;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm::object;
|
||||
|
@ -363,6 +364,9 @@ private:
|
|||
void recomputeLandingPads(const unsigned StartIndex,
|
||||
const unsigned NumBlocks);
|
||||
|
||||
/// Temporary holder of offsets that are potentially entry points.
|
||||
std::unordered_set<uint64_t> EntryOffsets;
|
||||
|
||||
using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>;
|
||||
BranchListType TakenBranches; /// All local taken branches.
|
||||
BranchListType FTBranches; /// All fall-through branches.
|
||||
|
@ -887,6 +891,20 @@ public:
|
|||
Names.emplace_back(NewName);
|
||||
}
|
||||
|
||||
/// Register an entry point at a given \p Offset into the function.
|
||||
/// Return symbol associated with the entry.
|
||||
MCSymbol *addEntryPointAtOffset(uint64_t Offset) {
|
||||
EntryOffsets.emplace(Offset);
|
||||
return getOrCreateLocalLabel(getAddress() + Offset);
|
||||
}
|
||||
|
||||
/// Return true if there is a registered entry point at a given offset
|
||||
/// into the function.
|
||||
bool hasEntryPointAtOffset(uint64_t Offset) {
|
||||
assert(!EntryOffsets.empty() && "entry points uninitialized or destroyed");
|
||||
return EntryOffsets.count(Offset);
|
||||
}
|
||||
|
||||
/// Create a basic block at a given \p Offset in the
|
||||
/// function.
|
||||
/// If \p DeriveAlignment is true, set the alignment of the block based
|
||||
|
@ -1216,6 +1234,10 @@ public:
|
|||
return LSDASymbol;
|
||||
}
|
||||
|
||||
/// Return true iff the symbol could be seen inside this function otherwise
|
||||
/// it is probably another function.
|
||||
bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
|
||||
|
||||
/// Disassemble function from raw data \p FunctionData.
|
||||
/// If successful, this function will populate the list of instructions
|
||||
/// for this function together with offsets from the function start
|
||||
|
|
|
@ -789,9 +789,11 @@ void EliminateUnreachableBlocks::runOnFunction(BinaryFunction& Function) {
|
|||
DeletedBytes += Bytes;
|
||||
if (Count) {
|
||||
Modified.insert(&Function);
|
||||
DEBUG(dbgs() << "BOLT-INFO: Removed " << Count
|
||||
<< " dead basic block(s) accounting for " << Bytes
|
||||
<< " bytes in function " << Function << '\n');
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-INFO: Removed " << Count
|
||||
<< " dead basic block(s) accounting for " << Bytes
|
||||
<< " bytes in function " << Function << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1034,7 +1036,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
|
|||
|
||||
if (BB.getNumNonPseudos() != 1 || BB.isLandingPad())
|
||||
continue;
|
||||
|
||||
|
||||
auto *Inst = BB.getFirstNonPseudo();
|
||||
const bool IsTailCall = BC.MIA->isTailCall(*Inst);
|
||||
|
||||
|
@ -1463,7 +1465,7 @@ void PrintSortedBy::runOnFunctions(
|
|||
}
|
||||
outs() << ")";
|
||||
}
|
||||
|
||||
|
||||
outs() << " are:\n";
|
||||
auto SFI = Functions.begin();
|
||||
for (unsigned i = 0; i < 100 && SFI != Functions.end(); ++SFI, ++i) {
|
||||
|
|
|
@ -394,7 +394,7 @@ void BinaryFunction::updateEHRanges() {
|
|||
}
|
||||
}
|
||||
|
||||
// The code is based on EHStreamer::emitExceptionTable().
|
||||
// The code is based on EHStreamer::emitExceptionTable().
|
||||
void BinaryFunction::emitLSDA(MCStreamer *Streamer) {
|
||||
if (CallSites.empty()) {
|
||||
return;
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Object/SymbolicFile.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
|
@ -641,29 +642,61 @@ void RewriteInstance::run() {
|
|||
}
|
||||
|
||||
void RewriteInstance::discoverFileObjects() {
|
||||
std::string FileSymbolName;
|
||||
bool SeenFileName = false;
|
||||
|
||||
FileSymRefs.clear();
|
||||
BinaryFunctions.clear();
|
||||
BC->GlobalAddresses.clear();
|
||||
|
||||
// For local symbols we want to keep track of associated FILE symbol for
|
||||
// disambiguation by name.
|
||||
for (const SymbolRef &Symbol : InputFile->symbols()) {
|
||||
// Keep undefined symbols for pretty printing?
|
||||
// For local symbols we want to keep track of associated FILE symbol name for
|
||||
// disambiguation by combined name.
|
||||
StringRef FileSymbolName;
|
||||
bool SeenFileName = false;
|
||||
struct SymbolRefHash {
|
||||
std::size_t operator()(SymbolRef const &S) const {
|
||||
return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
|
||||
}
|
||||
};
|
||||
std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
|
||||
for (const auto &Symbol : InputFile->symbols()) {
|
||||
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
|
||||
continue;
|
||||
|
||||
ErrorOr<StringRef> NameOrError = Symbol.getName();
|
||||
check_error(NameOrError.getError(), "cannot get symbol name");
|
||||
|
||||
if (Symbol.getType() == SymbolRef::ST_File) {
|
||||
// Could be used for local symbol disambiguation.
|
||||
ErrorOr<StringRef> NameOrError = Symbol.getName();
|
||||
check_error(NameOrError.getError(), "cannot get symbol name for file");
|
||||
FileSymbolName = *NameOrError;
|
||||
SeenFileName = true;
|
||||
continue;
|
||||
}
|
||||
if (!FileSymbolName.empty() &&
|
||||
!(Symbol.getFlags() & SymbolRef::SF_Global)) {
|
||||
SymbolToFileName[Symbol] = FileSymbolName;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort symbols in the file by value.
|
||||
std::vector<SymbolRef> SortedFileSymbols(InputFile->symbol_begin(),
|
||||
InputFile->symbol_end());
|
||||
std::stable_sort(SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||
[](const SymbolRef &A, const SymbolRef &B) {
|
||||
// NOTYPE symbols have lower precedence.
|
||||
if (*(A.getAddress()) == *(B.getAddress())) {
|
||||
return A.getType() != SymbolRef::ST_Unknown &&
|
||||
B.getType() == SymbolRef::ST_Unknown;
|
||||
}
|
||||
return *(A.getAddress()) < *(B.getAddress());
|
||||
});
|
||||
|
||||
const BinaryFunction *PreviousFunction = nullptr;
|
||||
for (const auto &Symbol : SortedFileSymbols) {
|
||||
// Keep undefined symbols for pretty printing?
|
||||
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
|
||||
continue;
|
||||
|
||||
if (Symbol.getType() == SymbolRef::ST_File)
|
||||
continue;
|
||||
|
||||
ErrorOr<StringRef> NameOrError = Symbol.getName();
|
||||
check_error(NameOrError.getError(), "cannot get symbol name");
|
||||
|
||||
ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
|
||||
check_error(AddressOrErr.getError(), "cannot get symbol address");
|
||||
|
@ -715,8 +748,10 @@ void RewriteInstance::discoverFileObjects() {
|
|||
// (e.g. from different directories).
|
||||
std::string Prefix = Name + "/";
|
||||
std::string AltPrefix;
|
||||
if (!FileSymbolName.empty())
|
||||
AltPrefix = Prefix + FileSymbolName + "/";
|
||||
auto SFI = SymbolToFileName.find(Symbol);
|
||||
if (SFI != SymbolToFileName.end()) {
|
||||
AltPrefix = Prefix + std::string(SFI->second) + "/";
|
||||
}
|
||||
|
||||
auto uniquifyName = [&] (std::string NamePrefix) {
|
||||
unsigned LocalID = 1;
|
||||
|
@ -734,15 +769,6 @@ void RewriteInstance::discoverFileObjects() {
|
|||
if (!AlternativeName.empty())
|
||||
BC->registerNameAtAddress(AlternativeName, Address);
|
||||
|
||||
// Only consider ST_Function symbols for functions. Although this
|
||||
// assumption could be broken by assembly functions for which the type
|
||||
// could be wrong, we skip such entries till the support for
|
||||
// assembly is implemented.
|
||||
if (Symbol.getType() != SymbolRef::ST_Function)
|
||||
continue;
|
||||
|
||||
// TODO: populate address map with PLT entries for better readability.
|
||||
|
||||
ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
|
||||
check_error(SectionOrErr.getError(), "cannot get symbol section");
|
||||
section_iterator Section = *SectionOrErr;
|
||||
|
@ -751,8 +777,69 @@ void RewriteInstance::discoverFileObjects() {
|
|||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
|
||||
<< " for function\n");
|
||||
|
||||
if (!Section->isText()) {
|
||||
assert(Symbol.getType() != SymbolRef::ST_Function &&
|
||||
"unexpected function inside non-code section");
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
|
||||
|
||||
// Assembly functions could be ST_NONE with 0 size. Check that the
|
||||
// corresponding section is a code section and they are not inside any
|
||||
// other known function to consider them.
|
||||
//
|
||||
// Sometimes assembly functions are not marked as functions and neither are
|
||||
// their local labels. The only way to tell them apart is to look at
|
||||
// symbol scope - global vs local.
|
||||
if (Symbol.getType() != SymbolRef::ST_Function) {
|
||||
if (Symbol.getType() != SymbolRef::ST_Unknown) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is an object\n");
|
||||
continue;
|
||||
}
|
||||
if (PreviousFunction) {
|
||||
if (PreviousFunction->getSize() == 0) {
|
||||
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
|
||||
continue;
|
||||
}
|
||||
} else if (PreviousFunction->containsAddress(Address)) {
|
||||
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
|
||||
continue;
|
||||
} else {
|
||||
if (Address == PreviousFunction->getAddress() && SymbolSize == 0) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
|
||||
continue;
|
||||
}
|
||||
if (opts::Verbosity > 1) {
|
||||
errs() << "BOLT-WARNING: symbol " << UniqueName
|
||||
<< " seen in the middle of function "
|
||||
<< *PreviousFunction << ". Could be a new entry.\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (PreviousFunction &&
|
||||
PreviousFunction->containsAddress(Address) &&
|
||||
PreviousFunction->getAddress() != Address &&
|
||||
SymbolSize == 0) {
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-DEBUG: possibly another entry for function "
|
||||
<< *PreviousFunction << " : " << UniqueName << '\n';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: populate address map with PLT entries for better readability.
|
||||
|
||||
// Checkout for conflicts with function data from FDEs.
|
||||
bool IsSimple = true;
|
||||
auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
|
||||
|
@ -821,6 +908,8 @@ void RewriteInstance::discoverFileObjects() {
|
|||
}
|
||||
if (!AlternativeName.empty())
|
||||
BF->addAlternativeName(AlternativeName);
|
||||
|
||||
PreviousFunction = BF;
|
||||
}
|
||||
|
||||
if (!SeenFileName && BC->DR.hasLocalsWithFileName() && !opts::AllowStripped) {
|
||||
|
@ -831,6 +920,78 @@ void RewriteInstance::discoverFileObjects() {
|
|||
"wish to proceed, use -allow-stripped option.\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Now that all the functions were created - adjust their boundaries.
|
||||
adjustFunctionBoundaries();
|
||||
}
|
||||
|
||||
void RewriteInstance::adjustFunctionBoundaries() {
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
|
||||
// Check if there's a symbol with a larger address in the same section.
|
||||
// If there is - it determines the maximum size for the current function,
|
||||
// otherwise, it is the size of containing section the defines it.
|
||||
//
|
||||
// NOTE: ignore some symbols that could be tolerated inside the body
|
||||
// of a function.
|
||||
auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
|
||||
while (NextSymRefI != FileSymRefs.end()) {
|
||||
auto &Symbol = NextSymRefI->second;
|
||||
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
|
||||
|
||||
if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
|
||||
break;
|
||||
|
||||
// This is potentially another entry point into the function.
|
||||
auto EntryOffset = NextSymRefI->first - Function.getAddress();
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
|
||||
<< Function << " at offset 0x"
|
||||
<< Twine::utohexstr(EntryOffset) << '\n');
|
||||
Function.addEntryPointAtOffset(EntryOffset);
|
||||
Function.setSimple(false);
|
||||
|
||||
++NextSymRefI;
|
||||
}
|
||||
auto NextSymRefSectionI = (NextSymRefI == FileSymRefs.end())
|
||||
? InputFile->section_end()
|
||||
: NextSymRefI->second.getSection();
|
||||
|
||||
uint64_t MaxSize;
|
||||
if (NextSymRefI != FileSymRefs.end() &&
|
||||
NextSymRefI->second.getSection() &&
|
||||
*NextSymRefI->second.getSection() != InputFile->section_end() &&
|
||||
**NextSymRefI->second.getSection() == Function.getSection()) {
|
||||
MaxSize = NextSymRefI->first - Function.getAddress();
|
||||
} else {
|
||||
// Function runs till the end of the containing section.
|
||||
uint64_t SectionEnd = Function.getSection().getAddress() +
|
||||
Function.getSection().getSize();
|
||||
assert((NextSymRefI == FileSymRefs.end() ||
|
||||
NextSymRefI->first >= SectionEnd) &&
|
||||
"different sections should not overlap");
|
||||
MaxSize = SectionEnd - Function.getAddress();
|
||||
}
|
||||
|
||||
if (MaxSize < Function.getSize()) {
|
||||
if (opts::Verbosity > 1) {
|
||||
errs() << "BOLT-WARNING: symbol seen in the middle of the function "
|
||||
<< Function << ". Skipping.\n";
|
||||
}
|
||||
Function.setSimple(false);
|
||||
continue;
|
||||
}
|
||||
Function.setMaxSize(MaxSize);
|
||||
if (!Function.getSize()) {
|
||||
// Some assembly functions have their size set to 0, use the max
|
||||
// size as their real size.
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-INFO: setting size of function " << Function
|
||||
<< " to " << Function.getMaxSize() << " (was 0)\n";
|
||||
}
|
||||
Function.setSize(Function.getMaxSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BinaryFunction *RewriteInstance::createBinaryFunction(
|
||||
|
@ -931,51 +1092,6 @@ void RewriteInstance::disassembleFunctions() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Set the proper maximum size value after the whole symbol table
|
||||
// has been processed.
|
||||
auto SymRefI = FileSymRefs.upper_bound(Function.getAddress());
|
||||
if (SymRefI != FileSymRefs.end()) {
|
||||
uint64_t MaxSize;
|
||||
auto SectionIter = *SymRefI->second.getSection();
|
||||
if (SectionIter != InputFile->section_end() &&
|
||||
*SectionIter == Function.getSection()) {
|
||||
MaxSize = SymRefI->first - Function.getAddress();
|
||||
} else {
|
||||
// Function runs till the end of the containing section assuming
|
||||
// the section does not run over the next symbol.
|
||||
uint64_t SectionEnd = Function.getSection().getAddress() +
|
||||
Function.getSection().getSize();
|
||||
if (SectionEnd > SymRefI->first) {
|
||||
if (opts::Verbosity >= 1) {
|
||||
errs() << "BOLT-WARNING: symbol after " << Function
|
||||
<< " should not be in the same section.\n";
|
||||
}
|
||||
MaxSize = 0;
|
||||
} else {
|
||||
MaxSize = SectionEnd - Function.getAddress();
|
||||
}
|
||||
}
|
||||
|
||||
if (MaxSize < Function.getSize()) {
|
||||
if (opts::Verbosity >= 1) {
|
||||
errs() << "BOLT-WARNING: symbol seen in the middle of the function "
|
||||
<< Function << ". Skipping.\n";
|
||||
}
|
||||
Function.setSimple(false);
|
||||
continue;
|
||||
}
|
||||
Function.setMaxSize(MaxSize);
|
||||
if (!Function.getSize() && Function.getMaxSize()) {
|
||||
// Some assembly functions have their size set to 0, use the max
|
||||
// size as their real size.
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-INFO: setting size of function " << Function
|
||||
<< " to " << Function.getMaxSize() << " (was 0)\n";
|
||||
}
|
||||
Function.setSize(Function.getMaxSize());
|
||||
}
|
||||
}
|
||||
|
||||
// Treat zero-sized functions as non-simple ones.
|
||||
if (Function.getSize() == 0) {
|
||||
Function.setSimple(false);
|
||||
|
|
|
@ -183,6 +183,10 @@ private:
|
|||
/// new sections.
|
||||
void discoverStorage();
|
||||
|
||||
/// Adjust function sizes and set proper maximum size values after the whole
|
||||
/// symbol table has been processed.
|
||||
void adjustFunctionBoundaries();
|
||||
|
||||
/// Rewrite non-allocatable sections with modifications.
|
||||
void rewriteNoteSections();
|
||||
|
||||
|
|
Loading…
Reference in New Issue