forked from OSchip/llvm-project
[lld-macho] Implement branch-range-extension thunks
Extend the range of calls beyond an architecture's limited branch range by first calling a thunk, which loads the far address into a scratch register (x16 on ARM64) and branches through it. Other ports (COFF, ELF) use multiple passes with successively-refined guesses regarding the expansion of text-space imposed by thunk-space overhead. This MachO algorithm places thunks during MergedOutputSection::finalize() in a single pass using exact thunk-space overheads. Thunks are kept in a separate vector to avoid the overhead of inserting into the `inputs` vector of `MergedOutputSection`. FIXME: * arm64-stubs.s test is broken * add thunk tests * Handle thunks to DylibSymbol in MergedOutputSection::finalize() Differential Revision: https://reviews.llvm.org/D100818
This commit is contained in:
parent
9934571eab
commit
93c8559baf
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::MachO;
|
||||
using namespace llvm::support::endian;
|
||||
using namespace lld;
|
||||
|
@ -33,6 +34,7 @@ struct ARM64 : ARM64Common {
|
|||
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
|
||||
uint64_t entryAddr) const override;
|
||||
const RelocAttrs &getRelocAttrs(uint8_t type) const override;
|
||||
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -103,11 +105,36 @@ void ARM64::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym,
|
|||
::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
|
||||
}
|
||||
|
||||
// A thunk is the relaxed variation of stubCode. We don't need the
|
||||
// extra indirection through a lazy pointer because the target address
|
||||
// is known at link time.
|
||||
static constexpr uint32_t thunkCode[] = {
|
||||
0x90000010, // 00: adrp x16, <thunk.ptr>@page
|
||||
0x91000210, // 04: add x16, [x16,<thunk.ptr>@pageoff]
|
||||
0xd61f0200, // 08: br x16
|
||||
};
|
||||
|
||||
void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
|
||||
thunk->align = 4;
|
||||
thunk->data = {reinterpret_cast<const uint8_t *>(thunkCode),
|
||||
sizeof(thunkCode)};
|
||||
thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGEOFF12,
|
||||
/*pcrel=*/false, /*length=*/2,
|
||||
/*offset=*/4, /*addend=*/0,
|
||||
/*referent=*/funcSym});
|
||||
thunk->relocs.push_back({/*type=*/ARM64_RELOC_PAGE21,
|
||||
/*pcrel=*/true, /*length=*/2,
|
||||
/*offset=*/0, /*addend=*/0,
|
||||
/*referent=*/funcSym});
|
||||
}
|
||||
|
||||
ARM64::ARM64() : ARM64Common(LP64()) {
|
||||
cpuType = CPU_TYPE_ARM64;
|
||||
cpuSubtype = CPU_SUBTYPE_ARM64_ALL;
|
||||
|
||||
stubSize = sizeof(stubCode);
|
||||
thunkSize = sizeof(thunkCode);
|
||||
branchRange = maxIntN(28) - thunkSize;
|
||||
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
|
||||
stubHelperEntrySize = sizeof(stubHelperEntryCode);
|
||||
}
|
||||
|
|
|
@ -901,6 +901,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
|
|||
"too many errors emitted, stopping now "
|
||||
"(use --error-limit=0 to see all errors)";
|
||||
errorHandler().errorLimit = args::getInteger(args, OPT_error_limit_eq, 20);
|
||||
errorHandler().verbose = args.hasArg(OPT_verbose);
|
||||
|
||||
if (args.hasArg(OPT_help_hidden)) {
|
||||
parser.printHelp(argsArr[0], /*showHidden=*/true);
|
||||
|
|
|
@ -34,20 +34,15 @@ uint64_t InputSection::getFileSize() const {
|
|||
|
||||
uint64_t InputSection::getVA() const { return parent->addr + outSecOff; }
|
||||
|
||||
static uint64_t resolveSymbolVA(uint8_t *loc, const Symbol &sym, uint8_t type) {
|
||||
static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
|
||||
const RelocAttrs &relocAttrs = target->getRelocAttrs(type);
|
||||
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
|
||||
if (sym.isInStubs())
|
||||
return in.stubs->addr + sym.stubsIndex * target->stubSize;
|
||||
} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
|
||||
if (sym.isInGot())
|
||||
return in.got->addr + sym.gotIndex * target->wordSize;
|
||||
} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
|
||||
if (sym.isInGot())
|
||||
return in.tlvPointers->addr + sym.gotIndex * target->wordSize;
|
||||
assert(isa<Defined>(&sym));
|
||||
}
|
||||
return sym.getVA();
|
||||
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH))
|
||||
return sym->resolveBranchVA();
|
||||
else if (relocAttrs.hasAttr(RelocAttrBits::GOT))
|
||||
return sym->resolveGotVA();
|
||||
else if (relocAttrs.hasAttr(RelocAttrBits::TLV))
|
||||
return sym->resolveTlvVA();
|
||||
return sym->getVA();
|
||||
}
|
||||
|
||||
void InputSection::writeTo(uint8_t *buf) {
|
||||
|
@ -78,7 +73,7 @@ void InputSection::writeTo(uint8_t *buf) {
|
|||
if (target->hasAttr(r.type, RelocAttrBits::LOAD) &&
|
||||
!referentSym->isInGot())
|
||||
target->relaxGotLoad(loc, r.type);
|
||||
referentVA = resolveSymbolVA(loc, *referentSym, r.type);
|
||||
referentVA = resolveSymbolVA(referentSym, r.type);
|
||||
|
||||
if (isThreadLocalVariables(flags)) {
|
||||
// References from thread-local variable sections are treated as offsets
|
||||
|
|
|
@ -42,6 +42,8 @@ public:
|
|||
|
||||
uint32_t align = 1;
|
||||
uint32_t flags = 0;
|
||||
uint32_t callSiteCount = 0;
|
||||
bool isFinal = false; // is address assigned?
|
||||
|
||||
// How many symbols refer to this InputSection.
|
||||
uint32_t numRefs = 0;
|
||||
|
|
|
@ -7,12 +7,19 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MergedOutputSection.h"
|
||||
#include "Config.h"
|
||||
#include "OutputSegment.h"
|
||||
#include "SymbolTable.h"
|
||||
#include "Symbols.h"
|
||||
#include "SyntheticSections.h"
|
||||
#include "Target.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
#include "llvm/BinaryFormat/MachO.h"
|
||||
#include "llvm/Support/ScopedPrinter.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::MachO;
|
||||
using namespace lld;
|
||||
|
@ -26,29 +33,305 @@ void MergedOutputSection::mergeInput(InputSection *input) {
|
|||
align = std::max(align, input->align);
|
||||
mergeFlags(input);
|
||||
}
|
||||
|
||||
inputs.push_back(input);
|
||||
input->parent = this;
|
||||
}
|
||||
|
||||
// Branch-range extension can be implemented in two ways, either through ...
|
||||
//
|
||||
// (1) Branch islands: Single branch instructions (also of limited range),
|
||||
// that might be chained in multiple hops to reach the desired
|
||||
// destination. On ARM64, as 16 branch islands are needed to hop between
|
||||
// opposite ends of a 2 GiB program. LD64 uses branch islands exclusively,
|
||||
// even when it needs excessive hops.
|
||||
//
|
||||
// (2) Thunks: Instruction(s) to load the destination address into a scratch
|
||||
// register, followed by a register-indirect branch. Thunks are
|
||||
// constructed to reach any arbitrary address, so need not be
|
||||
// chained. Although thunks need not be chained, a program might need
|
||||
// multiple thunks to the same destination distributed throughout a large
|
||||
// program so that all call sites can have one within range.
|
||||
//
|
||||
// The optimal approach is to mix islands for distinations within two hops,
|
||||
// and use thunks for destinations at greater distance. For now, we only
|
||||
// implement thunks. TODO: Adding support for branch islands!
|
||||
//
|
||||
// Internally -- as expressed in LLD's data structures -- a
|
||||
// branch-range-extension thunk comprises ...
|
||||
//
|
||||
// (1) new Defined privateExtern symbol for the thunk named
|
||||
// <FUNCTION>.thunk.<SEQUENCE>, which references ...
|
||||
// (2) new InputSection, which contains ...
|
||||
// (3.1) new data for the instructions to load & branch to the far address +
|
||||
// (3.2) new Relocs on instructions to load the far address, which reference ...
|
||||
// (4.1) existing Defined extern symbol for the real function in __text, or
|
||||
// (4.2) existing DylibSymbol for the real function in a dylib
|
||||
//
|
||||
// Nearly-optimal thunk-placement algorithm features:
|
||||
//
|
||||
// * Single pass: O(n) on the number of call sites.
|
||||
//
|
||||
// * Accounts for the exact space overhead of thunks - no heuristics
|
||||
//
|
||||
// * Exploits the full range of call instructions - forward & backward
|
||||
//
|
||||
// Data:
|
||||
//
|
||||
// * DenseMap<Symbol *, ThunkInfo> thunkMap: Maps the function symbol
|
||||
// to its thunk bookkeeper.
|
||||
//
|
||||
// * struct ThunkInfo (bookkeeper): Call instructions have limited range, and
|
||||
// distant call sites might be unable to reach the same thunk, so multiple
|
||||
// thunks are necessary to serve all call sites in a very large program. A
|
||||
// thunkInfo stores state for all thunks associated with a particular
|
||||
// function: (a) thunk symbol, (b) input section containing stub code, and
|
||||
// (c) sequence number for the active thunk incarnation. When an old thunk
|
||||
// goes out of range, we increment the sequence number and create a new
|
||||
// thunk named <FUNCTION>.thunk.<SEQUENCE>.
|
||||
//
|
||||
// * A thunk incarnation comprises (a) private-extern Defined symbol pointing
|
||||
// to (b) an InputSection holding machine instructions (similar to a MachO
|
||||
// stub), and (c) Reloc(s) that reference the real function for fixing-up
|
||||
// the stub code.
|
||||
//
|
||||
// * std::vector<InputSection *> MergedInputSection::thunks: A vector parallel
|
||||
// to the inputs vector. We store new thunks via cheap vector append, rather
|
||||
// than costly insertion into the inputs vector.
|
||||
//
|
||||
// Control Flow:
|
||||
//
|
||||
// * During address assignment, MergedInputSection::finalize() examines call
|
||||
// sites by ascending address and creates thunks. When a function is beyond
|
||||
// the range of a call site, we need a thunk. Place it at the largest
|
||||
// available forward address from the call site. Call sites increase
|
||||
// monotonically and thunks are always placed as far forward as possible;
|
||||
// thus, we place thunks at monotonically increasing addresses. Once a thunk
|
||||
// is placed, it and all previous input-section addresses are final.
|
||||
//
|
||||
// * MergedInputSection::finalize() and MergedInputSection::writeTo() merge
|
||||
// the inputs and thunks vectors (both ordered by ascending address), which
|
||||
// is simple and cheap.
|
||||
|
||||
DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap;
|
||||
|
||||
// Determine whether we need thunks, which depends on the target arch -- RISC
|
||||
// (i.e., ARM) generally does because it has limited-range branch/call
|
||||
// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
|
||||
// thunks for programs so large that branch source & destination addresses
|
||||
// might differ more than the range of branch instruction(s).
|
||||
bool MergedOutputSection::needsThunks() const {
|
||||
if (!target->usesThunks())
|
||||
return false;
|
||||
uint64_t isecAddr = addr;
|
||||
for (InputSection *isec : inputs)
|
||||
isecAddr = alignTo(isecAddr, isec->align) + isec->getSize();
|
||||
if (isecAddr - addr + in.stubs->getSize() <= target->branchRange)
|
||||
return false;
|
||||
// Yes, this program is large enough to need thunks.
|
||||
for (InputSection *isec : inputs) {
|
||||
for (Reloc &r : isec->relocs) {
|
||||
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
|
||||
continue;
|
||||
auto *sym = r.referent.get<Symbol *>();
|
||||
// Pre-populate the thunkMap and memoize call site counts for every
|
||||
// InputSection and ThunkInfo. We do this for the benefit of
|
||||
// MergedOutputSection::estimateStubsInRangeVA()
|
||||
ThunkInfo &thunkInfo = thunkMap[sym];
|
||||
// Knowing ThunkInfo call site count will help us know whether or not we
|
||||
// might need to create more for this referent at the time we are
|
||||
// estimating distance to __stubs in .
|
||||
++thunkInfo.callSiteCount;
|
||||
// Knowing InputSection call site count will help us avoid work on those
|
||||
// that have no BRANCH relocs.
|
||||
++isec->callSiteCount;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Since __stubs is placed after __text, we must estimate the address
|
||||
// beyond which stubs are within range of a simple forward branch.
|
||||
uint64_t MergedOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
|
||||
uint64_t branchRange = target->branchRange;
|
||||
size_t endIdx = inputs.size();
|
||||
InputSection *isec = inputs[callIdx];
|
||||
uint64_t isecVA = isec->getVA();
|
||||
// Tally the non-stub functions which still have call sites
|
||||
// remaining to process, which yields the maximum number
|
||||
// of thunks we might yet place.
|
||||
size_t maxPotentialThunks = 0;
|
||||
for (auto &tp : thunkMap) {
|
||||
ThunkInfo &ti = tp.second;
|
||||
maxPotentialThunks +=
|
||||
!tp.first->isInStubs() && ti.callSitesUsed < ti.callSiteCount;
|
||||
}
|
||||
// Tally the total size of input sections remaining to process.
|
||||
uint64_t isecEnd = isec->getVA();
|
||||
for (size_t i = callIdx; i < endIdx; i++) {
|
||||
InputSection *isec = inputs[i];
|
||||
isecEnd = alignTo(isecEnd, isec->align) + isec->getSize();
|
||||
}
|
||||
// Estimate the address after which call sites can safely call stubs
|
||||
// directly rather than through intermediary thunks.
|
||||
uint64_t stubsInRangeVA = isecEnd + maxPotentialThunks * target->thunkSize +
|
||||
in.stubs->getSize() - branchRange;
|
||||
log("thunks = " + std::to_string(thunkMap.size()) +
|
||||
", potential = " + std::to_string(maxPotentialThunks) +
|
||||
", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " +
|
||||
to_hexString(isecVA) + ", threshold = " + to_hexString(stubsInRangeVA) +
|
||||
", isecEnd = " + to_hexString(isecEnd) +
|
||||
", tail = " + to_hexString(isecEnd - isecVA) +
|
||||
", slop = " + to_hexString(branchRange - (isecEnd - isecVA)));
|
||||
return stubsInRangeVA;
|
||||
}
|
||||
|
||||
void MergedOutputSection::finalize() {
|
||||
uint64_t isecAddr = addr;
|
||||
uint64_t isecFileOff = fileOff;
|
||||
for (InputSection *isec : inputs) {
|
||||
auto finalizeOne = [&](InputSection *isec) {
|
||||
isecAddr = alignTo(isecAddr, isec->align);
|
||||
isecFileOff = alignTo(isecFileOff, isec->align);
|
||||
isec->outSecOff = isecAddr - addr;
|
||||
isec->outSecFileOff = isecFileOff - fileOff;
|
||||
isec->isFinal = true;
|
||||
isecAddr += isec->getSize();
|
||||
isecFileOff += isec->getFileSize();
|
||||
};
|
||||
|
||||
if (!needsThunks()) {
|
||||
for (InputSection *isec : inputs)
|
||||
finalizeOne(isec);
|
||||
size = isecAddr - addr;
|
||||
fileSize = isecFileOff - fileOff;
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t branchRange = target->branchRange;
|
||||
uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA;
|
||||
size_t thunkSize = target->thunkSize;
|
||||
size_t relocCount = 0;
|
||||
size_t callSiteCount = 0;
|
||||
size_t thunkCallCount = 0;
|
||||
size_t thunkCount = 0;
|
||||
|
||||
// inputs[finalIdx] is for finalization (address-assignment)
|
||||
size_t finalIdx = 0;
|
||||
// Kick-off by ensuring that the first input section has an address
|
||||
for (size_t callIdx = 0, endIdx = inputs.size(); callIdx < endIdx;
|
||||
++callIdx) {
|
||||
if (finalIdx == callIdx)
|
||||
finalizeOne(inputs[finalIdx++]);
|
||||
InputSection *isec = inputs[callIdx];
|
||||
assert(isec->isFinal);
|
||||
uint64_t isecVA = isec->getVA();
|
||||
// Assign addresses up-to the forward branch-range limit
|
||||
while (finalIdx < endIdx &&
|
||||
isecAddr + inputs[finalIdx]->getSize() < isecVA + branchRange)
|
||||
finalizeOne(inputs[finalIdx++]);
|
||||
if (isec->callSiteCount == 0)
|
||||
continue;
|
||||
if (finalIdx == endIdx && stubsInRangeVA == TargetInfo::outOfRangeVA) {
|
||||
// When we have finalized all input sections, __stubs (destined
|
||||
// to follow __text) comes within range of forward branches and
|
||||
// we can estimate the threshold address after which we can
|
||||
// reach any stub with a forward branch. Note that although it
|
||||
// sits in the middle of a loop, this code executes only once.
|
||||
// It is in the loop because we need to call it at the proper
|
||||
// time: the earliest call site from which the end of __text
|
||||
// (and start of __stubs) comes within range of a forward branch.
|
||||
stubsInRangeVA = estimateStubsInRangeVA(callIdx);
|
||||
}
|
||||
// Process relocs by ascending address, i.e., ascending offset within isec
|
||||
std::vector<Reloc> &relocs = isec->relocs;
|
||||
assert(is_sorted(relocs,
|
||||
[](Reloc &a, Reloc &b) { return a.offset > b.offset; }));
|
||||
for (Reloc &r : reverse(relocs)) {
|
||||
++relocCount;
|
||||
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
|
||||
continue;
|
||||
++callSiteCount;
|
||||
// Calculate branch reachability boundaries
|
||||
uint64_t callVA = isecVA + r.offset;
|
||||
uint64_t lowVA = branchRange < callVA ? callVA - branchRange : 0;
|
||||
uint64_t highVA = callVA + branchRange;
|
||||
// Calculate our call referent address
|
||||
auto *funcSym = r.referent.get<Symbol *>();
|
||||
ThunkInfo &thunkInfo = thunkMap[funcSym];
|
||||
// The referent is not reachable, so we need to use a thunk ...
|
||||
if (funcSym->isInStubs() && callVA >= stubsInRangeVA) {
|
||||
// ... Oh, wait! We are close enough to the end that __stubs
|
||||
// are now within range of a simple forward branch.
|
||||
continue;
|
||||
}
|
||||
uint64_t funcVA = funcSym->resolveBranchVA();
|
||||
++thunkInfo.callSitesUsed;
|
||||
if (lowVA < funcVA && funcVA < highVA) {
|
||||
// The referent is reachable with a simple call instruction.
|
||||
continue;
|
||||
}
|
||||
++thunkInfo.thunkCallCount;
|
||||
++thunkCallCount;
|
||||
// If an existing thunk is reachable, use it ...
|
||||
if (thunkInfo.sym) {
|
||||
uint64_t thunkVA = thunkInfo.isec->getVA();
|
||||
if (lowVA < thunkVA && thunkVA < highVA) {
|
||||
r.referent = thunkInfo.sym;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// ... otherwise, create a new thunk
|
||||
if (isecAddr > highVA) {
|
||||
// When there is small-to-no margin between highVA and
|
||||
// isecAddr and the distance between subsequent call sites is
|
||||
// smaller than thunkSize, then a new thunk can go out of
|
||||
// range. Fix by unfinalizing inputs[finalIdx] to reduce the
|
||||
// distance between callVA and highVA, then shift some thunks
|
||||
// to occupy address-space formerly occupied by the
|
||||
// unfinalized inputs[finalIdx].
|
||||
fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun");
|
||||
}
|
||||
thunkInfo.isec = make<InputSection>();
|
||||
thunkInfo.isec->name = isec->name;
|
||||
thunkInfo.isec->segname = isec->segname;
|
||||
thunkInfo.isec->parent = this;
|
||||
StringRef thunkName = saver.save(funcSym->getName() + ".thunk." +
|
||||
std::to_string(thunkInfo.sequence++));
|
||||
r.referent = thunkInfo.sym = symtab->addDefined(
|
||||
thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
|
||||
/*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
|
||||
/*isThumb=*/false);
|
||||
target->populateThunk(thunkInfo.isec, funcSym);
|
||||
finalizeOne(thunkInfo.isec);
|
||||
thunks.push_back(thunkInfo.isec);
|
||||
++thunkCount;
|
||||
}
|
||||
}
|
||||
size = isecAddr - addr;
|
||||
fileSize = isecFileOff - fileOff;
|
||||
|
||||
log("thunks for " + parent->name + "," + name +
|
||||
": funcs = " + std::to_string(thunkMap.size()) +
|
||||
", relocs = " + std::to_string(relocCount) +
|
||||
", all calls = " + std::to_string(callSiteCount) +
|
||||
", thunk calls = " + std::to_string(thunkCallCount) +
|
||||
", thunks = " + std::to_string(thunkCount));
|
||||
}
|
||||
|
||||
void MergedOutputSection::writeTo(uint8_t *buf) const {
|
||||
for (InputSection *isec : inputs)
|
||||
isec->writeTo(buf + isec->outSecFileOff);
|
||||
// Merge input sections from thunk & ordinary vectors
|
||||
size_t i = 0, ie = inputs.size();
|
||||
size_t t = 0, te = thunks.size();
|
||||
while (i < ie || t < te) {
|
||||
while (i < ie && (t == te || inputs[i]->getSize() == 0 ||
|
||||
inputs[i]->outSecOff < thunks[t]->outSecOff)) {
|
||||
inputs[i]->writeTo(buf + inputs[i]->outSecFileOff);
|
||||
++i;
|
||||
}
|
||||
while (t < te && (i == ie || thunks[t]->outSecOff < inputs[i]->outSecOff)) {
|
||||
thunks[t]->writeTo(buf + thunks[t]->outSecFileOff);
|
||||
++t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is most likely wrong; reconsider how section flags
|
||||
|
|
|
@ -12,11 +12,14 @@
|
|||
#include "InputSection.h"
|
||||
#include "OutputSection.h"
|
||||
#include "lld/Common/LLVM.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
|
||||
namespace lld {
|
||||
namespace macho {
|
||||
|
||||
class Defined;
|
||||
|
||||
// Linking multiple files will inevitably mean resolving sections in different
|
||||
// files that are labeled with the same segment and section name. This class
|
||||
// contains all such sections and writes the data from each section sequentially
|
||||
|
@ -34,10 +37,13 @@ public:
|
|||
|
||||
void mergeInput(InputSection *input);
|
||||
void finalize() override;
|
||||
bool needsThunks() const;
|
||||
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
|
||||
|
||||
void writeTo(uint8_t *buf) const override;
|
||||
|
||||
std::vector<InputSection *> inputs;
|
||||
std::vector<InputSection *> thunks;
|
||||
|
||||
static bool classof(const OutputSection *sec) {
|
||||
return sec->kind() == MergedKind;
|
||||
|
@ -50,6 +56,30 @@ private:
|
|||
uint64_t fileSize = 0;
|
||||
};
|
||||
|
||||
// We maintain one ThunkInfo per real function.
|
||||
//
|
||||
// The "active thunk" is represented by the sym/isec pair that
|
||||
// turns-over during finalize(): as the call-site address advances,
|
||||
// the active thunk goes out of branch-range, and we create a new
|
||||
// thunk to take its place.
|
||||
//
|
||||
// The remaining members -- bools and counters -- apply to the
|
||||
// collection of thunks associated with the real function.
|
||||
|
||||
struct ThunkInfo {
|
||||
// These denote the active thunk:
|
||||
Defined *sym = nullptr; // private-extern symbol for active thunk
|
||||
InputSection *isec = nullptr; // input section for active thunk
|
||||
|
||||
// The following values are cumulative across all thunks on this function
|
||||
uint32_t callSiteCount = 0; // how many calls to the real function?
|
||||
uint32_t callSitesUsed = 0; // how many call sites processed so-far?
|
||||
uint32_t thunkCallCount = 0; // how many call sites went to thunk?
|
||||
uint8_t sequence = 0; // how many thunks created so-far?
|
||||
};
|
||||
|
||||
extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;
|
||||
|
||||
} // namespace macho
|
||||
} // namespace lld
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@ def help : Flag<["-", "--"], "help">,
|
|||
def help_hidden : Flag<["--"], "help-hidden">,
|
||||
HelpText<"Display help for hidden options">,
|
||||
Group<grp_lld>;
|
||||
def verbose : Flag<["--"], "verbose">,
|
||||
Group<grp_lld>;
|
||||
def error_limit_eq : Joined<["--"], "error-limit=">,
|
||||
HelpText<"Maximum number of errors to print before exiting (default: 20)">,
|
||||
Group<grp_lld>;
|
||||
|
|
|
@ -27,9 +27,25 @@ std::string lld::toMachOString(const object::Archive::Symbol &b) {
|
|||
return demangle(b.getName());
|
||||
}
|
||||
|
||||
uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
|
||||
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
|
||||
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
|
||||
|
||||
uint64_t Defined::getVA() const {
|
||||
if (isAbsolute())
|
||||
return value;
|
||||
|
||||
if (!isec->isFinal) {
|
||||
// A target arch that does not use thunks ought never ask for
|
||||
// the address of a function that has not yet been finalized.
|
||||
assert(target->usesThunks());
|
||||
|
||||
// MergedOutputSection::finalize() can seek the address of a
|
||||
// function before its address is assigned. The thunking algorithm
|
||||
// knows that unfinalized functions will be out of range, so it is
|
||||
// expedient to return a contrived out-of-range address.
|
||||
return TargetInfo::outOfRangeVA;
|
||||
}
|
||||
return isec->getVA() + value;
|
||||
}
|
||||
|
||||
|
@ -42,4 +58,8 @@ uint64_t Defined::getFileOffset() const {
|
|||
return isec->getFileOffset() + value;
|
||||
}
|
||||
|
||||
uint64_t DylibSymbol::getVA() const {
|
||||
return isInStubs() ? getStubVA() : Symbol::getVA();
|
||||
}
|
||||
|
||||
void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); }
|
||||
|
|
|
@ -72,6 +72,16 @@ public:
|
|||
// Whether this symbol is in the StubsSection.
|
||||
bool isInStubs() const { return stubsIndex != UINT32_MAX; }
|
||||
|
||||
uint64_t getStubVA() const;
|
||||
uint64_t getGotVA() const;
|
||||
uint64_t getTlvVA() const;
|
||||
uint64_t resolveBranchVA() const {
|
||||
assert(isa<Defined>(this) || isa<DylibSymbol>(this));
|
||||
return isInStubs() ? getStubVA() : getVA();
|
||||
}
|
||||
uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
|
||||
uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
|
||||
|
||||
// The index of this symbol in the GOT or the TLVPointer section, depending
|
||||
// on whether it is a thread-local. A given symbol cannot be referenced by
|
||||
// both these sections at once.
|
||||
|
@ -207,6 +217,7 @@ public:
|
|||
: Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
|
||||
tlv(isTlv) {}
|
||||
|
||||
uint64_t getVA() const override;
|
||||
bool isWeakDef() const override { return weakDef; }
|
||||
bool isWeakRef() const override { return refState == RefState::Weak; }
|
||||
bool isReferenced() const { return refState != RefState::Unreferenced; }
|
||||
|
|
|
@ -64,6 +64,7 @@ MachHeaderSection::MachHeaderSection()
|
|||
// Setting the index to 1 to pretend that this section is the text
|
||||
// section.
|
||||
index = 1;
|
||||
isec->isFinal = true;
|
||||
}
|
||||
|
||||
void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
|
||||
|
@ -425,6 +426,8 @@ void StubsSection::writeTo(uint8_t *buf) const {
|
|||
}
|
||||
}
|
||||
|
||||
void StubsSection::finalize() { isFinal = true; }
|
||||
|
||||
bool StubsSection::addEntry(Symbol *sym) {
|
||||
bool inserted = entries.insert(sym);
|
||||
if (inserted)
|
||||
|
@ -1101,12 +1104,12 @@ void macho::createSyntheticSymbols() {
|
|||
// __TEXT, __text)
|
||||
// Otherwise, it's an absolute symbol.
|
||||
if (config->isPic)
|
||||
symtab->addSynthetic("__mh_execute_header", in.header->isec, 0,
|
||||
symtab->addSynthetic("__mh_execute_header", in.header->isec, /*value=*/0,
|
||||
/*privateExtern=*/false,
|
||||
/*includeInSymtab=*/true);
|
||||
else
|
||||
symtab->addSynthetic("__mh_execute_header",
|
||||
/*isec*/ nullptr, 0,
|
||||
/*isec*/ nullptr, /*value=*/0,
|
||||
/*privateExtern=*/false,
|
||||
/*includeInSymtab=*/true);
|
||||
break;
|
||||
|
|
|
@ -123,6 +123,10 @@ public:
|
|||
|
||||
void addEntry(Symbol *sym);
|
||||
|
||||
uint64_t getVA(uint32_t gotIndex) const {
|
||||
return addr + gotIndex * target->wordSize;
|
||||
}
|
||||
|
||||
private:
|
||||
llvm::SetVector<const Symbol *> entries;
|
||||
};
|
||||
|
@ -285,11 +289,21 @@ public:
|
|||
StubsSection();
|
||||
uint64_t getSize() const override;
|
||||
bool isNeeded() const override { return !entries.empty(); }
|
||||
void finalize() override;
|
||||
void writeTo(uint8_t *buf) const override;
|
||||
const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
|
||||
// Returns whether the symbol was added. Note that every stubs entry will
|
||||
// have a corresponding entry in the LazyPointerSection.
|
||||
bool addEntry(Symbol *);
|
||||
uint64_t getVA(uint32_t stubsIndex) const {
|
||||
// MergedOutputSection::finalize() can seek the address of a
|
||||
// stub before its address is assigned. Before __stubs is
|
||||
// finalized, return a contrived out-of-range address.
|
||||
return isFinal ? addr + stubsIndex * target->stubSize
|
||||
: TargetInfo::outOfRangeVA;
|
||||
}
|
||||
|
||||
bool isFinal = false; // is address assigned?
|
||||
|
||||
private:
|
||||
llvm::SetVector<Symbol *> entries;
|
||||
|
|
|
@ -24,6 +24,7 @@ namespace macho {
|
|||
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
|
||||
|
||||
class Symbol;
|
||||
class Defined;
|
||||
class DylibSymbol;
|
||||
class InputSection;
|
||||
|
||||
|
@ -65,10 +66,16 @@ public:
|
|||
|
||||
virtual uint64_t getPageSize() const = 0;
|
||||
|
||||
virtual void populateThunk(InputSection *thunk, Symbol *funcSym) {
|
||||
llvm_unreachable("target does not use thunks");
|
||||
}
|
||||
|
||||
bool hasAttr(uint8_t type, RelocAttrBits bit) const {
|
||||
return getRelocAttrs(type).hasAttr(bit);
|
||||
}
|
||||
|
||||
bool usesThunks() const { return thunkSize > 0; }
|
||||
|
||||
uint32_t magic;
|
||||
llvm::MachO::CPUType cpuType;
|
||||
uint32_t cpuSubtype;
|
||||
|
@ -79,6 +86,15 @@ public:
|
|||
size_t stubHelperHeaderSize;
|
||||
size_t stubHelperEntrySize;
|
||||
size_t wordSize;
|
||||
|
||||
size_t thunkSize = 0;
|
||||
uint64_t branchRange = 0;
|
||||
|
||||
// We contrive this value as sufficiently far from any valid address that it
|
||||
// will always be out-of-range for any architecture. UINT64_MAX is not a
|
||||
// good choice because it is (a) only 1 away from wrapping to 0, and (b) the
|
||||
// tombstone value for DenseMap<> and caused weird assertions for me.
|
||||
static constexpr uint64_t outOfRangeVA = 0xfull << 60;
|
||||
};
|
||||
|
||||
TargetInfo *createX86_64TargetInfo();
|
||||
|
|
|
@ -511,7 +511,7 @@ public:
|
|||
|
||||
} // namespace
|
||||
|
||||
// Adds stubs and bindings where necessary (e.g. if the symbol is a
|
||||
// Add stubs and bindings where necessary (e.g. if the symbol is a
|
||||
// DylibSymbol.)
|
||||
static void prepareBranchTarget(Symbol *sym) {
|
||||
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
||||
|
@ -535,7 +535,7 @@ static void prepareBranchTarget(Symbol *sym) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
assert(false && "invalid symbol type for branch");
|
||||
llvm_unreachable("invalid branch target symbol type");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -958,8 +958,6 @@ void Writer::finalizeAddresses() {
|
|||
seg->vmSize = addr - seg->firstSection()->addr;
|
||||
seg->fileSize = fileOff - seg->fileOff;
|
||||
}
|
||||
|
||||
// FIXME(gkm): create branch-extension thunks here, then adjust addresses
|
||||
}
|
||||
|
||||
void Writer::finalizeLinkEditSegment() {
|
||||
|
@ -1062,7 +1060,11 @@ template <class LP> void Writer::run() {
|
|||
in.stubHelper->setup();
|
||||
scanSymbols();
|
||||
createOutputSections<LP>();
|
||||
// No more sections nor segments are created beyond this point.
|
||||
// After this point, we create no new segments; HOWEVER, we might
|
||||
// yet create branch-range extension thunks for architectures whose
|
||||
// hardware call instructions have limited range, e.g., ARM(64).
|
||||
// The thunks are created as InputSections interspersed among
|
||||
// the ordinary __TEXT,_text InputSections.
|
||||
sortSegmentsAndSections();
|
||||
createLoadCommands<LP>();
|
||||
finalizeAddresses();
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
# REQUIRES: aarch64
|
||||
|
||||
## Check for the following:
|
||||
## (1) address match between thunk definitions and call destinations
|
||||
## (2) address match between thunk page+offset computations and function definitions
|
||||
## (3) a second thunk is created when the first one goes out of range
|
||||
## (4) early calls to a dylib stub use a thunk, and later calls the stub directly
|
||||
## Notes:
|
||||
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
|
||||
|
||||
# RUN: rm -rf %t; mkdir %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
|
||||
# RUN: %lld -arch arm64 -lSystem -o %t/thunk %t/input.o
|
||||
# RUN: llvm-objdump -d --no-show-raw-insn %t/thunk | FileCheck %s
|
||||
|
||||
# CHECK: Disassembly of section __TEXT,__text:
|
||||
|
||||
# CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>:
|
||||
# CHECK: bl 0x[[#%x, A:]] <_a>
|
||||
# CHECK: bl 0x[[#%x, B:]] <_b>
|
||||
# CHECK: bl 0x[[#%x, C:]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D_THUNK_0:]] <_d.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, E_THUNK_0:]] <_e.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, F_THUNK_0:]] <_f.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, G_THUNK_0:]] <_g.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0:]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0:]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%.13x, B_PAGE:]][[#%.3x, B_OFFSET:]] <_b>:
|
||||
# CHECK: bl 0x[[#%x, A]] <_a>
|
||||
# CHECK: bl 0x[[#%x, B]] <_b>
|
||||
# CHECK: bl 0x[[#%x, C]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D_THUNK_0]] <_d.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, E_THUNK_0]] <_e.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%.13x, C_PAGE:]][[#%.3x, C_OFFSET:]] <_c>:
|
||||
# CHECK: bl 0x[[#%x, A]] <_a>
|
||||
# CHECK: bl 0x[[#%x, B]] <_b>
|
||||
# CHECK: bl 0x[[#%x, C]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D:]] <_d>
|
||||
# CHECK: bl 0x[[#%x, E:]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%x, D_THUNK_0]] <_d.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, D_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#D_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, E_THUNK_0]] <_e.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, E_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#E_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, F_THUNK_0]] <_f.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, F_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#F_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, G_THUNK_0]] <_g.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, G_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#G_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, H_THUNK_0]] <_h.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, H_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#H_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, NAN_THUNK_0]] <___nan.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, NAN_PAGE:]]
|
||||
# CHECK: add x16, x16, #[[#NAN_OFFSET:]]
|
||||
|
||||
# CHECK: [[#%x, D_PAGE + D_OFFSET]] <_d>:
|
||||
# CHECK: bl 0x[[#%x, A]] <_a>
|
||||
# CHECK: bl 0x[[#%x, B]] <_b>
|
||||
# CHECK: bl 0x[[#%x, C]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D]] <_d>
|
||||
# CHECK: bl 0x[[#%x, E]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%x, E_PAGE + E_OFFSET]] <_e>:
|
||||
# CHECK: bl 0x[[#%x, A_THUNK_0:]] <_a.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, B_THUNK_0:]] <_b.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, C]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D]] <_d>
|
||||
# CHECK: bl 0x[[#%x, E]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F:]] <_f>
|
||||
# CHECK: bl 0x[[#%x, G:]] <_g>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%x, F_PAGE + F_OFFSET]] <_f>:
|
||||
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, C]] <_c>
|
||||
# CHECK: bl 0x[[#%x, D]] <_d>
|
||||
# CHECK: bl 0x[[#%x, E]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F]] <_f>
|
||||
# CHECK: bl 0x[[#%x, G]] <_g>
|
||||
# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0>
|
||||
|
||||
# CHECK: [[#%x, G_PAGE + G_OFFSET]] <_g>:
|
||||
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, C_THUNK_0:]] <_c.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, D_THUNK_1:]] <_d.thunk.1>
|
||||
# CHECK: bl 0x[[#%x, E]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F]] <_f>
|
||||
# CHECK: bl 0x[[#%x, G]] <_g>
|
||||
# CHECK: bl 0x[[#%x, H:]] <_h>
|
||||
# CHECK: bl 0x[[#%x, STUBS:]]
|
||||
|
||||
# CHECK: [[#%x, A_THUNK_0]] <_a.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, A_PAGE]]000
|
||||
# CHECK: add x16, x16, #[[#%d, A_OFFSET]]
|
||||
|
||||
# CHECK: [[#%x, B_THUNK_0]] <_b.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, B_PAGE]]000
|
||||
# CHECK: add x16, x16, #[[#%d, B_OFFSET]]
|
||||
|
||||
# CHECK: [[#%x, H_PAGE + H_OFFSET]] <_h>:
|
||||
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, C_THUNK_0]] <_c.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, D_THUNK_1]] <_d.thunk.1>
|
||||
# CHECK: bl 0x[[#%x, E]] <_e>
|
||||
# CHECK: bl 0x[[#%x, F]] <_f>
|
||||
# CHECK: bl 0x[[#%x, G]] <_g>
|
||||
# CHECK: bl 0x[[#%x, H]] <_h>
|
||||
# CHECK: bl 0x[[#%x, STUBS]]
|
||||
|
||||
# CHECK: <_main>:
|
||||
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, C_THUNK_0]] <_c.thunk.0>
|
||||
# CHECK: bl 0x[[#%x, D_THUNK_1]] <_d.thunk.1>
|
||||
# CHECK: bl 0x[[#%x, E_THUNK_1:]] <_e.thunk.1>
|
||||
# CHECK: bl 0x[[#%x, F_THUNK_1:]] <_f.thunk.1>
|
||||
# CHECK: bl 0x[[#%x, G]] <_g>
|
||||
# CHECK: bl 0x[[#%x, H]] <_h>
|
||||
# CHECK: bl 0x[[#%x, STUBS]]
|
||||
|
||||
# CHECK: [[#%x, C_THUNK_0]] <_c.thunk.0>:
|
||||
# CHECK: adrp x16, 0x[[#%x, C_PAGE]]000
|
||||
# CHECK: add x16, x16, #[[#%d, C_OFFSET]]
|
||||
|
||||
# CHECK: [[#%x, D_THUNK_1]] <_d.thunk.1>:
|
||||
# CHECK: adrp x16, 0x[[#%x, D_PAGE]]
|
||||
# CHECK: add x16, x16, #[[#D_OFFSET]]
|
||||
|
||||
# CHECK: [[#%x, E_THUNK_1]] <_e.thunk.1>:
|
||||
# CHECK: adrp x16, 0x[[#%x, E_PAGE]]
|
||||
# CHECK: add x16, x16, #[[#E_OFFSET]]
|
||||
|
||||
# CHECK: [[#%x, F_THUNK_1]] <_f.thunk.1>:
|
||||
# CHECK: adrp x16, 0x[[#%x, F_PAGE]]
|
||||
# CHECK: add x16, x16, #[[#F_OFFSET]]
|
||||
|
||||
# CHECK: Disassembly of section __TEXT,__stubs:
|
||||
|
||||
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
|
||||
|
||||
.subsections_via_symbols
|
||||
.text
|
||||
|
||||
.globl _a
|
||||
.p2align 2
|
||||
_a:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
ret
|
||||
|
||||
.globl _b
|
||||
.p2align 2
|
||||
_b:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
.space 0x4000000-0x3c
|
||||
ret
|
||||
|
||||
.globl _c
|
||||
.p2align 2
|
||||
_c:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
ret
|
||||
|
||||
.globl _d
|
||||
.p2align 2
|
||||
_d:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
.space 0x4000000-0x38
|
||||
ret
|
||||
|
||||
.globl _e
|
||||
.p2align 2
|
||||
_e:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
ret
|
||||
|
||||
.globl _f
|
||||
.p2align 2
|
||||
_f:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
.space 0x4000000-0x34
|
||||
ret
|
||||
|
||||
.globl _g
|
||||
.p2align 2
|
||||
_g:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
ret
|
||||
|
||||
.globl _h
|
||||
.p2align 2
|
||||
_h:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
.space 0x4000000-0x30
|
||||
ret
|
||||
|
||||
.globl _main
|
||||
.p2align 2
|
||||
_main:
|
||||
bl _a
|
||||
bl _b
|
||||
bl _c
|
||||
bl _d
|
||||
bl _e
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl ___nan
|
||||
ret
|
|
@ -0,0 +1,429 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""Generate many skeletal functions with a thick call graph spanning a
|
||||
large address space to induce lld to create branch-islands for arm64.
|
||||
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import random
|
||||
import argparse
|
||||
import string
|
||||
from pprint import pprint
|
||||
from math import factorial
|
||||
from itertools import permutations
|
||||
|
||||
# This list comes from libSystem.tbd and contains a sizeable subset
|
||||
# of dylib calls available for all MacOS target archs.
|
||||
libSystem_calls = (
|
||||
"__CurrentRuneLocale", "__DefaultRuneLocale", "__Exit", "__NSGetArgc",
|
||||
"__NSGetArgv", "__NSGetEnviron", "__NSGetMachExecuteHeader",
|
||||
"__NSGetProgname", "__PathLocale", "__Read_RuneMagi", "___Balloc_D2A",
|
||||
"___Bfree_D2A", "___ULtod_D2A", "____mb_cur_max", "____mb_cur_max_l",
|
||||
"____runetype", "____runetype_l", "____tolower", "____tolower_l",
|
||||
"____toupper", "____toupper_l", "___add_ovflpage", "___addel",
|
||||
"___any_on_D2A", "___assert_rtn", "___b2d_D2A", "___big_delete",
|
||||
"___big_insert", "___big_keydata", "___big_return", "___big_split",
|
||||
"___bigtens_D2A", "___bt_close", "___bt_cmp", "___bt_defcmp",
|
||||
"___bt_defpfx", "___bt_delete", "___bt_dleaf", "___bt_fd",
|
||||
"___bt_free", "___bt_get", "___bt_new", "___bt_open", "___bt_pgin",
|
||||
"___bt_pgout", "___bt_put", "___bt_ret", "___bt_search", "___bt_seq",
|
||||
"___bt_setcur", "___bt_split", "___bt_sync", "___buf_free",
|
||||
"___call_hash", "___cleanup", "___cmp_D2A", "___collate_equiv_match",
|
||||
"___collate_load_error", "___collate_lookup", "___collate_lookup_l",
|
||||
"___copybits_D2A", "___cxa_atexit", "___cxa_finalize",
|
||||
"___cxa_finalize_ranges", "___cxa_thread_atexit", "___d2b_D2A",
|
||||
"___dbpanic", "___decrement_D2A", "___default_hash", "___default_utx",
|
||||
"___delpair", "___diff_D2A", "___dtoa", "___expand_table",
|
||||
"___fflush", "___fgetwc", "___find_bigpair", "___find_last_page",
|
||||
"___fix_locale_grouping_str", "___fread", "___free_ovflpage",
|
||||
"___freedtoa", "___gdtoa", "___gdtoa_locks", "___get_buf",
|
||||
"___get_page", "___gethex_D2A", "___getonlyClocaleconv",
|
||||
"___hash_open", "___hdtoa", "___hexdig_D2A", "___hexdig_init_D2A",
|
||||
"___hexnan_D2A", "___hi0bits_D2A", "___hldtoa", "___i2b_D2A",
|
||||
"___ibitmap", "___increment_D2A", "___isctype", "___istype",
|
||||
"___istype_l", "___ldtoa", "___libc_init", "___lo0bits_D2A",
|
||||
"___log2", "___lshift_D2A", "___maskrune", "___maskrune_l",
|
||||
"___match_D2A", "___mb_cur_max", "___mb_sb_limit", "___memccpy_chk",
|
||||
"___memcpy_chk", "___memmove_chk", "___memset_chk", "___mult_D2A",
|
||||
"___multadd_D2A", "___nrv_alloc_D2A", "___opendir2", "___ovfl_delete",
|
||||
"___ovfl_get", "___ovfl_put", "___pow5mult_D2A", "___put_page",
|
||||
"___quorem_D2A", "___ratio_D2A", "___rec_close", "___rec_delete",
|
||||
"___rec_dleaf", "___rec_fd", "___rec_fmap", "___rec_fpipe",
|
||||
"___rec_get", "___rec_iput", "___rec_open", "___rec_put",
|
||||
"___rec_ret", "___rec_search", "___rec_seq", "___rec_sync",
|
||||
"___rec_vmap", "___rec_vpipe", "___reclaim_buf", "___rshift_D2A",
|
||||
"___rv_alloc_D2A", "___s2b_D2A", "___sF", "___sclose", "___sdidinit",
|
||||
"___set_ones_D2A", "___setonlyClocaleconv", "___sflags", "___sflush",
|
||||
"___sfp", "___sfvwrite", "___sglue", "___sinit", "___slbexpand",
|
||||
"___smakebuf", "___snprintf_chk", "___snprintf_object_size_chk",
|
||||
"___split_page", "___sprintf_chk", "___sprintf_object_size_chk",
|
||||
"___sread", "___srefill", "___srget", "___sseek", "___stack_chk_fail",
|
||||
"___stack_chk_guard", "___stderrp", "___stdinp", "___stdoutp",
|
||||
"___stpcpy_chk", "___stpncpy_chk", "___strcat_chk", "___strcp_D2A",
|
||||
"___strcpy_chk", "___strlcat_chk", "___strlcpy_chk", "___strncat_chk",
|
||||
"___strncpy_chk", "___strtodg", "___strtopdd", "___sum_D2A",
|
||||
"___svfscanf", "___swbuf", "___swhatbuf", "___swrite", "___swsetup",
|
||||
"___tens_D2A", "___tinytens_D2A", "___tolower", "___tolower_l",
|
||||
"___toupper", "___toupper_l", "___trailz_D2A", "___ulp_D2A",
|
||||
"___ungetc", "___ungetwc", "___vsnprintf_chk", "___vsprintf_chk",
|
||||
"___wcwidth", "___wcwidth_l", "__allocenvstate", "__atexit_receipt",
|
||||
"__c_locale", "__cleanup", "__closeutx", "__copyenv",
|
||||
"__cthread_init_routine", "__deallocenvstate", "__endutxent",
|
||||
"__flockfile_debug_stub", "__fseeko", "__ftello", "__fwalk",
|
||||
"__getenvp", "__getutxent", "__getutxid", "__getutxline",
|
||||
"__inet_aton_check", "__init_clock_port", "__int_to_time",
|
||||
"__libc_fork_child", "__libc_initializer", "__long_to_time",
|
||||
"__mkpath_np", "__mktemp", "__openutx", "__os_assert_log",
|
||||
"__os_assert_log_ctx", "__os_assumes_log", "__os_assumes_log_ctx",
|
||||
"__os_avoid_tail_call", "__os_crash", "__os_crash_callback",
|
||||
"__os_crash_fmt", "__os_debug_log", "__os_debug_log_error_str",
|
||||
"__putenvp", "__pututxline", "__rand48_add", "__rand48_mult",
|
||||
"__rand48_seed", "__readdir_unlocked", "__reclaim_telldir",
|
||||
"__seekdir", "__setenvp", "__setutxent", "__sigaction_nobind",
|
||||
"__sigintr", "__signal_nobind", "__sigvec_nobind", "__sread",
|
||||
"__sseek", "__subsystem_init", "__swrite", "__time32_to_time",
|
||||
"__time64_to_time", "__time_to_int", "__time_to_long",
|
||||
"__time_to_time32", "__time_to_time64", "__unsetenvp", "__utmpxname",
|
||||
"_a64l", "_abort", "_abort_report_np", "_abs", "_acl_add_flag_np",
|
||||
"_acl_add_perm", "_acl_calc_mask", "_acl_clear_flags_np",
|
||||
"_acl_clear_perms", "_acl_copy_entry", "_acl_copy_ext",
|
||||
"_acl_copy_ext_native", "_acl_copy_int", "_acl_copy_int_native",
|
||||
"_acl_create_entry", "_acl_create_entry_np", "_acl_delete_def_file",
|
||||
"_acl_delete_entry", "_acl_delete_fd_np", "_acl_delete_file_np",
|
||||
"_acl_delete_flag_np", "_acl_delete_link_np", "_acl_delete_perm",
|
||||
"_acl_dup", "_acl_free", "_acl_from_text", "_acl_get_entry",
|
||||
"_acl_get_fd", "_acl_get_fd_np", "_acl_get_file", "_acl_get_flag_np",
|
||||
"_acl_get_flagset_np", "_acl_get_link_np", "_acl_get_perm_np",
|
||||
"_acl_get_permset", "_acl_get_permset_mask_np", "_acl_get_qualifier",
|
||||
"_acl_get_tag_type", "_acl_init", "_acl_maximal_permset_mask_np",
|
||||
"_acl_set_fd", "_acl_set_fd_np", "_acl_set_file", "_acl_set_flagset_np",
|
||||
"_acl_set_link_np", "_acl_set_permset", "_acl_set_permset_mask_np",
|
||||
"_acl_set_qualifier", "_acl_set_tag_type", "_acl_size", "_acl_to_text",
|
||||
"_acl_valid", "_acl_valid_fd_np", "_acl_valid_file_np",
|
||||
"_acl_valid_link", "_addr2ascii", "_alarm", "_alphasort",
|
||||
"_arc4random", "_arc4random_addrandom", "_arc4random_buf",
|
||||
"_arc4random_stir", "_arc4random_uniform", "_ascii2addr", "_asctime",
|
||||
"_asctime_r", "_asprintf", "_asprintf_l", "_asxprintf",
|
||||
"_asxprintf_exec", "_atexit", "_atexit_b", "_atof", "_atof_l",
|
||||
"_atoi", "_atoi_l", "_atol", "_atol_l", "_atoll", "_atoll_l",
|
||||
"_backtrace", "_backtrace_from_fp", "_backtrace_image_offsets",
|
||||
"_backtrace_symbols", "_backtrace_symbols_fd", "_basename",
|
||||
"_basename_r", "_bcopy", "_brk", "_bsd_signal", "_bsearch",
|
||||
"_bsearch_b", "_btowc", "_btowc_l", "_catclose", "_catgets",
|
||||
"_catopen", "_cfgetispeed", "_cfgetospeed", "_cfmakeraw",
|
||||
"_cfsetispeed", "_cfsetospeed", "_cfsetspeed", "_cgetcap",
|
||||
"_cgetclose", "_cgetent", "_cgetfirst", "_cgetmatch", "_cgetnext",
|
||||
"_cgetnum", "_cgetset", "_cgetstr", "_cgetustr", "_chmodx_np",
|
||||
"_clearerr", "_clearerr_unlocked", "_clock", "_clock_getres",
|
||||
"_clock_gettime", "_clock_gettime_nsec_np", "_clock_port",
|
||||
"_clock_sem", "_clock_settime", "_closedir", "_compat_mode",
|
||||
"_confstr", "_copy_printf_domain", "_creat", "_crypt", "_ctermid",
|
||||
"_ctermid_r", "_ctime", "_ctime_r", "_daemon", "_daylight",
|
||||
"_dbm_clearerr", "_dbm_close", "_dbm_delete", "_dbm_dirfno",
|
||||
"_dbm_error", "_dbm_fetch", "_dbm_firstkey", "_dbm_nextkey",
|
||||
"_dbm_open", "_dbm_store", "_dbopen", "_devname", "_devname_r",
|
||||
"_difftime", "_digittoint", "_digittoint_l", "_dirfd", "_dirname",
|
||||
"_dirname_r", "_div", "_dprintf", "_dprintf_l", "_drand48",
|
||||
"_duplocale", "_dxprintf", "_dxprintf_exec", "_ecvt", "_encrypt",
|
||||
"_endttyent", "_endusershell", "_endutxent", "_endutxent_wtmp",
|
||||
"_erand48", "_err", "_err_set_exit", "_err_set_exit_b",
|
||||
"_err_set_file", "_errc", "_errx", "_execl", "_execle", "_execlp",
|
||||
"_execv", "_execvP", "_execvp", "_exit", "_f_prealloc", "_fchmodx_np",
|
||||
"_fclose", "_fcvt", "_fdopen", "_fdopendir", "_feof", "_feof_unlocked",
|
||||
"_ferror", "_ferror_unlocked", "_fflagstostr", "_fflush", "_fgetc",
|
||||
"_fgetln", "_fgetpos", "_fgetrune", "_fgets", "_fgetwc", "_fgetwc_l",
|
||||
"_fgetwln", "_fgetwln_l", "_fgetws", "_fgetws_l", "_fileno",
|
||||
"_fileno_unlocked", "_filesec_dup", "_filesec_free",
|
||||
"_filesec_get_property", "_filesec_init", "_filesec_query_property",
|
||||
"_filesec_set_property", "_filesec_unset_property", "_flockfile",
|
||||
"_fmemopen", "_fmtcheck", "_fmtmsg", "_fnmatch", "_fopen", "_fork",
|
||||
"_forkpty", "_fparseln", "_fprintf", "_fprintf_l", "_fpurge",
|
||||
"_fputc", "_fputrune", "_fputs", "_fputwc", "_fputwc_l", "_fputws",
|
||||
"_fputws_l", "_fread", "_free_printf_comp", "_free_printf_domain",
|
||||
"_freelocale", "_freopen", "_fscanf", "_fscanf_l", "_fseek",
|
||||
"_fseeko", "_fsetpos", "_fstatvfs", "_fstatx_np", "_fsync_volume_np",
|
||||
"_ftell", "_ftello", "_ftime", "_ftok", "_ftrylockfile",
|
||||
"_fts_children", "_fts_close", "_fts_open", "_fts_open_b",
|
||||
"_fts_read", "_fts_set", "_ftw", "_fungetrune", "_funlockfile",
|
||||
"_funopen", "_fwide", "_fwprintf", "_fwprintf_l", "_fwrite",
|
||||
"_fwscanf", "_fwscanf_l", "_fxprintf", "_fxprintf_exec", "_gcvt",
|
||||
"_getbsize", "_getc", "_getc_unlocked", "_getchar", "_getchar_unlocked",
|
||||
"_getcwd", "_getdate", "_getdate_err", "_getdelim", "_getdiskbyname",
|
||||
"_getenv", "_gethostid", "_gethostname", "_getipv4sourcefilter",
|
||||
"_getlastlogx", "_getlastlogxbyname", "_getline", "_getloadavg",
|
||||
"_getlogin", "_getlogin_r", "_getmntinfo", "_getmntinfo_r_np",
|
||||
"_getmode", "_getopt", "_getopt_long", "_getopt_long_only",
|
||||
"_getpagesize", "_getpass", "_getpeereid", "_getprogname", "_gets",
|
||||
"_getsourcefilter", "_getsubopt", "_gettimeofday", "_getttyent",
|
||||
"_getttynam", "_getusershell", "_getutmp", "_getutmpx", "_getutxent",
|
||||
"_getutxent_wtmp", "_getutxid", "_getutxline", "_getvfsbyname",
|
||||
"_getw", "_getwc", "_getwc_l", "_getwchar", "_getwchar_l", "_getwd",
|
||||
"_glob", "_glob_b", "_globfree", "_gmtime", "_gmtime_r", "_grantpt",
|
||||
"_hash_create", "_hash_destroy", "_hash_purge", "_hash_search",
|
||||
"_hash_stats", "_hash_traverse", "_hcreate", "_hdestroy",
|
||||
"_heapsort", "_heapsort_b", "_hsearch", "_imaxabs", "_imaxdiv",
|
||||
"_inet_addr", "_inet_aton", "_inet_lnaof", "_inet_makeaddr",
|
||||
"_inet_net_ntop", "_inet_net_pton", "_inet_neta", "_inet_netof",
|
||||
"_inet_network", "_inet_nsap_addr", "_inet_nsap_ntoa", "_inet_ntoa",
|
||||
"_inet_ntop", "_inet_ntop4", "_inet_ntop6", "_inet_pton",
|
||||
"_initstate", "_insque", "_isalnum", "_isalnum_l", "_isalpha",
|
||||
"_isalpha_l", "_isascii", "_isatty", "_isblank", "_isblank_l",
|
||||
"_iscntrl", "_iscntrl_l", "_isdigit", "_isdigit_l", "_isgraph",
|
||||
"_isgraph_l", "_ishexnumber", "_ishexnumber_l", "_isideogram",
|
||||
"_isideogram_l", "_islower", "_islower_l", "_isnumber", "_isnumber_l",
|
||||
"_isphonogram", "_isphonogram_l", "_isprint", "_isprint_l",
|
||||
"_ispunct", "_ispunct_l", "_isrune", "_isrune_l", "_isspace",
|
||||
"_isspace_l", "_isspecial", "_isspecial_l", "_isupper", "_isupper_l",
|
||||
"_iswalnum", "_iswalnum_l", "_iswalpha", "_iswalpha_l", "_iswascii",
|
||||
"_iswblank", "_iswblank_l", "_iswcntrl", "_iswcntrl_l", "_iswctype",
|
||||
"_iswctype_l", "_iswdigit", "_iswdigit_l", "_iswgraph", "_iswgraph_l",
|
||||
"_iswhexnumber", "_iswhexnumber_l", "_iswideogram", "_iswideogram_l",
|
||||
"_iswlower", "_iswlower_l", "_iswnumber", "_iswnumber_l",
|
||||
"_iswphonogram", "_iswphonogram_l", "_iswprint", "_iswprint_l",
|
||||
"_iswpunct", "_iswpunct_l", "_iswrune", "_iswrune_l", "_iswspace",
|
||||
"_iswspace_l", "_iswspecial", "_iswspecial_l", "_iswupper",
|
||||
"_iswupper_l", "_iswxdigit", "_iswxdigit_l", "_isxdigit",
|
||||
"_isxdigit_l", "_jrand48", "_kOSThermalNotificationPressureLevelName",
|
||||
"_killpg", "_l64a", "_labs", "_lchflags", "_lchmod", "_lcong48",
|
||||
"_ldiv", "_lfind", "_link_addr", "_link_ntoa", "_llabs", "_lldiv",
|
||||
"_localeconv", "_localeconv_l", "_localtime", "_localtime_r",
|
||||
"_lockf", "_login", "_login_tty", "_logout", "_logwtmp", "_lrand48",
|
||||
"_lsearch", "_lstatx_np", "_lutimes", "_mblen", "_mblen_l",
|
||||
"_mbmb", "_mbrlen", "_mbrlen_l", "_mbrrune", "_mbrtowc", "_mbrtowc_l",
|
||||
"_mbrune", "_mbsinit", "_mbsinit_l", "_mbsnrtowcs", "_mbsnrtowcs_l",
|
||||
"_mbsrtowcs", "_mbsrtowcs_l", "_mbstowcs", "_mbstowcs_l", "_mbtowc",
|
||||
"_mbtowc_l", "_memmem", "_memset_s", "_mergesort", "_mergesort_b",
|
||||
"_mkdirx_np", "_mkdtemp", "_mkdtempat_np", "_mkfifox_np",
|
||||
"_mkostemp", "_mkostemps", "_mkostempsat_np", "_mkpath_np",
|
||||
"_mkpathat_np", "_mkstemp", "_mkstemp_dprotected_np", "_mkstemps",
|
||||
"_mkstempsat_np", "_mktemp", "_mktime", "_monaddition", "_moncontrol",
|
||||
"_moncount", "_moninit", "_monitor", "_monoutput", "_monreset",
|
||||
"_monstartup", "_mpool_close", "_mpool_filter", "_mpool_get",
|
||||
"_mpool_new", "_mpool_open", "_mpool_put", "_mpool_sync", "_mrand48",
|
||||
"_nanosleep", "_new_printf_comp", "_new_printf_domain", "_newlocale",
|
||||
"_nextwctype", "_nextwctype_l", "_nftw", "_nice", "_nl_langinfo",
|
||||
"_nl_langinfo_l", "_nrand48", "_nvis", "_off32", "_off64",
|
||||
"_offtime", "_open_memstream", "_open_with_subsystem",
|
||||
"_open_wmemstream", "_opendev", "_opendir", "_openpty", "_openx_np",
|
||||
"_optarg", "_opterr", "_optind", "_optopt", "_optreset", "_pause",
|
||||
"_pclose", "_perror", "_popen", "_posix2time", "_posix_openpt",
|
||||
"_posix_spawnp", "_printf", "_printf_l", "_psignal", "_psort",
|
||||
"_psort_b", "_psort_r", "_ptsname", "_ptsname_r", "_putc",
|
||||
"_putc_unlocked", "_putchar", "_putchar_unlocked", "_putenv",
|
||||
"_puts", "_pututxline", "_putw", "_putwc", "_putwc_l", "_putwchar",
|
||||
"_putwchar_l", "_qsort", "_qsort_b", "_qsort_r", "_querylocale",
|
||||
"_radixsort", "_raise", "_rand", "_rand_r", "_random", "_rb_tree_count",
|
||||
"_rb_tree_find_node", "_rb_tree_find_node_geq", "_rb_tree_find_node_leq",
|
||||
"_rb_tree_init", "_rb_tree_insert_node", "_rb_tree_iterate",
|
||||
"_rb_tree_remove_node", "_readdir", "_readdir_r", "_readpassphrase",
|
||||
"_reallocf", "_realpath", "_recv", "_regcomp", "_regcomp_l",
|
||||
"_regerror", "_regexec", "_regfree", "_register_printf_domain_function",
|
||||
"_register_printf_domain_render_std", "_regncomp", "_regncomp_l",
|
||||
"_regnexec", "_regwcomp", "_regwcomp_l", "_regwexec", "_regwncomp",
|
||||
"_regwncomp_l", "_regwnexec", "_remove", "_remque", "_rewind",
|
||||
"_rewinddir", "_rindex", "_rpmatch", "_sbrk", "_scandir",
|
||||
"_scandir_b", "_scanf", "_scanf_l", "_seed48", "_seekdir", "_send",
|
||||
"_setbuf", "_setbuffer", "_setenv", "_sethostid", "_sethostname",
|
||||
"_setinvalidrune", "_setipv4sourcefilter", "_setkey", "_setlinebuf",
|
||||
"_setlocale", "_setlogin", "_setmode", "_setpgrp", "_setprogname",
|
||||
"_setrgid", "_setruid", "_setrunelocale", "_setsourcefilter",
|
||||
"_setstate", "_settimeofday", "_setttyent", "_setusershell",
|
||||
"_setutxent", "_setutxent_wtmp", "_setvbuf", "_sigaction",
|
||||
"_sigaddset", "_sigaltstack", "_sigblock", "_sigdelset",
|
||||
"_sigemptyset", "_sigfillset", "_sighold", "_sigignore",
|
||||
"_siginterrupt", "_sigismember", "_signal", "_sigpause", "_sigrelse",
|
||||
"_sigset", "_sigsetmask", "_sigvec", "_skip", "_sl_add", "_sl_find",
|
||||
"_sl_free", "_sl_init", "_sleep", "_snprintf", "_snprintf_l",
|
||||
"_snvis", "_sockatmark", "_sprintf", "_sprintf_l", "_sradixsort",
|
||||
"_srand", "_srand48", "_sranddev", "_srandom", "_srandomdev",
|
||||
"_sscanf", "_sscanf_l", "_stat_with_subsystem", "_statvfs",
|
||||
"_statx_np", "_stpcpy", "_stpncpy", "_strcasecmp", "_strcasecmp_l",
|
||||
"_strcasestr", "_strcasestr_l", "_strcat", "_strcoll", "_strcoll_l",
|
||||
"_strcspn", "_strdup", "_strenvisx", "_strerror", "_strerror_r",
|
||||
"_strfmon", "_strfmon_l", "_strftime", "_strftime_l", "_strmode",
|
||||
"_strncasecmp", "_strncasecmp_l", "_strncat", "_strndup", "_strnstr",
|
||||
"_strnunvis", "_strnunvisx", "_strnvis", "_strnvisx", "_strpbrk",
|
||||
"_strptime", "_strptime_l", "_strrchr", "_strsenvisx", "_strsep",
|
||||
"_strsignal", "_strsignal_r", "_strsnvis", "_strsnvisx", "_strspn",
|
||||
"_strsvis", "_strsvisx", "_strtod", "_strtod_l", "_strtof",
|
||||
"_strtof_l", "_strtofflags", "_strtoimax", "_strtoimax_l",
|
||||
"_strtok", "_strtok_r", "_strtol", "_strtol_l", "_strtold",
|
||||
"_strtold_l", "_strtoll", "_strtoll_l", "_strtonum", "_strtoq",
|
||||
"_strtoq_l", "_strtoul", "_strtoul_l", "_strtoull", "_strtoull_l",
|
||||
"_strtoumax", "_strtoumax_l", "_strtouq", "_strtouq_l", "_strunvis",
|
||||
"_strunvisx", "_strvis", "_strvisx", "_strxfrm", "_strxfrm_l",
|
||||
"_suboptarg", "_svis", "_swab", "_swprintf", "_swprintf_l",
|
||||
"_swscanf", "_swscanf_l", "_sxprintf", "_sxprintf_exec",
|
||||
"_sync_volume_np", "_sys_errlist", "_sys_nerr", "_sys_siglist",
|
||||
"_sys_signame", "_sysconf", "_sysctl", "_sysctlbyname",
|
||||
"_sysctlnametomib", "_system", "_tcdrain", "_tcflow", "_tcflush",
|
||||
"_tcgetattr", "_tcgetpgrp", "_tcgetsid", "_tcsendbreak", "_tcsetattr",
|
||||
"_tcsetpgrp", "_tdelete", "_telldir", "_tempnam", "_tfind",
|
||||
"_thread_stack_pcs", "_time", "_time2posix", "_timegm", "_timelocal",
|
||||
"_timeoff", "_times", "_timespec_get", "_timezone", "_timingsafe_bcmp",
|
||||
"_tmpfile", "_tmpnam", "_toascii", "_tolower", "_tolower_l",
|
||||
"_toupper", "_toupper_l", "_towctrans", "_towctrans_l", "_towlower",
|
||||
"_towlower_l", "_towupper", "_towupper_l", "_tre_ast_new_catenation",
|
||||
"_tre_ast_new_iter", "_tre_ast_new_literal", "_tre_ast_new_node",
|
||||
"_tre_ast_new_union", "_tre_compile", "_tre_fill_pmatch",
|
||||
"_tre_free", "_tre_mem_alloc_impl", "_tre_mem_destroy",
|
||||
"_tre_mem_new_impl", "_tre_parse", "_tre_stack_destroy",
|
||||
"_tre_stack_new", "_tre_stack_num_objects", "_tre_tnfa_run_backtrack",
|
||||
"_tre_tnfa_run_parallel", "_tsearch", "_ttyname", "_ttyname_r",
|
||||
"_ttyslot", "_twalk", "_tzname", "_tzset", "_tzsetwall", "_ualarm",
|
||||
"_ulimit", "_umaskx_np", "_uname", "_ungetc", "_ungetwc",
|
||||
"_ungetwc_l", "_unlockpt", "_unsetenv", "_unvis", "_uselocale",
|
||||
"_usleep", "_utime", "_utmpxname", "_uuid_clear", "_uuid_compare",
|
||||
"_uuid_copy", "_uuid_generate", "_uuid_generate_random",
|
||||
"_uuid_generate_time", "_uuid_is_null", "_uuid_pack", "_uuid_parse",
|
||||
"_uuid_unpack", "_uuid_unparse", "_uuid_unparse_lower",
|
||||
"_uuid_unparse_upper", "_vasprintf", "_vasprintf_l", "_vasxprintf",
|
||||
"_vasxprintf_exec", "_vdprintf", "_vdprintf_l", "_vdxprintf",
|
||||
"_vdxprintf_exec", "_verr", "_verrc", "_verrx", "_vfprintf",
|
||||
"_vfprintf_l", "_vfscanf", "_vfscanf_l", "_vfwprintf", "_vfwprintf_l",
|
||||
"_vfwscanf", "_vfwscanf_l", "_vfxprintf", "_vfxprintf_exec",
|
||||
"_vis", "_vprintf", "_vprintf_l", "_vscanf", "_vscanf_l",
|
||||
"_vsnprintf", "_vsnprintf_l", "_vsprintf", "_vsprintf_l", "_vsscanf",
|
||||
"_vsscanf_l", "_vswprintf", "_vswprintf_l", "_vswscanf",
|
||||
"_vswscanf_l", "_vsxprintf", "_vsxprintf_exec", "_vwarn", "_vwarnc",
|
||||
"_vwarnx", "_vwprintf", "_vwprintf_l", "_vwscanf", "_vwscanf_l",
|
||||
"_vxprintf", "_vxprintf_exec", "_wait", "_wait3", "_waitpid",
|
||||
"_warn", "_warnc", "_warnx", "_wcpcpy", "_wcpncpy", "_wcrtomb",
|
||||
"_wcrtomb_l", "_wcscasecmp", "_wcscasecmp_l", "_wcscat", "_wcschr",
|
||||
"_wcscmp", "_wcscoll", "_wcscoll_l", "_wcscpy", "_wcscspn",
|
||||
"_wcsdup", "_wcsftime", "_wcsftime_l", "_wcslcat", "_wcslcpy",
|
||||
"_wcslen", "_wcsncasecmp", "_wcsncasecmp_l", "_wcsncat", "_wcsncmp",
|
||||
"_wcsncpy", "_wcsnlen", "_wcsnrtombs", "_wcsnrtombs_l", "_wcspbrk",
|
||||
"_wcsrchr", "_wcsrtombs", "_wcsrtombs_l", "_wcsspn", "_wcsstr",
|
||||
"_wcstod", "_wcstod_l", "_wcstof", "_wcstof_l", "_wcstoimax",
|
||||
"_wcstoimax_l", "_wcstok", "_wcstol", "_wcstol_l", "_wcstold",
|
||||
"_wcstold_l", "_wcstoll", "_wcstoll_l", "_wcstombs", "_wcstombs_l",
|
||||
"_wcstoul", "_wcstoul_l", "_wcstoull", "_wcstoull_l", "_wcstoumax",
|
||||
"_wcstoumax_l", "_wcswidth", "_wcswidth_l", "_wcsxfrm", "_wcsxfrm_l",
|
||||
"_wctob", "_wctob_l", "_wctomb", "_wctomb_l", "_wctrans",
|
||||
"_wctrans_l", "_wctype", "_wctype_l", "_wcwidth", "_wcwidth_l",
|
||||
"_wmemchr", "_wmemcmp", "_wmemcpy", "_wmemmove", "_wmemset",
|
||||
"_wordexp", "_wordfree", "_wprintf", "_wprintf_l", "_wscanf",
|
||||
"_wscanf_l", "_wtmpxname", "_xprintf", "_xprintf_exec"
|
||||
)
|
||||
|
||||
def print_here_head(name):
|
||||
print("""\
|
||||
(tee %s.s |llvm-mc -filetype=obj -triple %s -o %s.o) <<END_OF_FILE &""" % (name, triple, name))
|
||||
|
||||
def print_here_tail():
|
||||
print("""\
|
||||
END_OF_FILE
|
||||
""")
|
||||
|
||||
def print_function_head(p2align, name):
|
||||
if args.os == "macos":
|
||||
print("""\
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.p2align %d, 0x90
|
||||
.globl _%s
|
||||
_%s:""" % (p2align, name, name))
|
||||
elif args.os == "windows":
|
||||
print("""\
|
||||
.text
|
||||
.def %s;
|
||||
.scl 2;
|
||||
.type 32;
|
||||
.endef
|
||||
.globl %s
|
||||
.p2align %d
|
||||
%s:""" % (name, name, p2align, name))
|
||||
elif args.os == "linux":
|
||||
print("""\
|
||||
.text
|
||||
.p2align %d
|
||||
.globl %s
|
||||
%s:""" % (p2align, name, name))
|
||||
|
||||
def print_function(addr, size, addrs):
|
||||
name = "x%08x" % addr
|
||||
calls = random.randint(0, size>>12)
|
||||
print_here_head(name)
|
||||
print("""\
|
||||
### %s size=%x calls=%x""" % (name, size, calls))
|
||||
print_function_head(4, name)
|
||||
for i in range(calls):
|
||||
print(" bl %sx%08x\n .p2align 4" %
|
||||
("_" if args.os == "macos" else "",
|
||||
addrs[random.randint(0, len(addrs)-1)]))
|
||||
if args.os == "macos":
|
||||
print(" bl %s\n .p2align 4" %
|
||||
(libSystem_calls[random.randint(0, len(libSystem_calls)-1)]))
|
||||
fill = size - 4 * (calls + 1)
|
||||
assert fill > 0
|
||||
print("""\
|
||||
.fill 0x%x
|
||||
ret""" % (fill))
|
||||
print_here_tail()
|
||||
|
||||
def random_seed():
|
||||
"""Generate a seed that can easily be passsed back in via --seed=STRING"""
|
||||
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))
|
||||
|
||||
def generate_sizes(base, megabytes):
|
||||
total = 0
|
||||
while total < megabytes:
|
||||
size = random.randint(0x100, 0x10000) * 0x10
|
||||
yield size
|
||||
total += size
|
||||
|
||||
def generate_addrs(addr, sizes):
|
||||
i = 0
|
||||
while i < len(sizes):
|
||||
yield addr
|
||||
addr += sizes[i]
|
||||
i += 1
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
epilog="""\
|
||||
WRITEME
|
||||
""")
|
||||
parser.add_argument('--seed', type=str, default=random_seed(),
|
||||
help='Seed the random number generator')
|
||||
parser.add_argument('--size', type=int, default=None,
|
||||
help='Total text size to generate, in megabytes')
|
||||
parser.add_argument('--os', type=str, default="macos",
|
||||
help='Target OS: macos, windows, or linux')
|
||||
global args
|
||||
args = parser.parse_args()
|
||||
triples = {
|
||||
"macos": "arm64-apple-macos",
|
||||
"linux": "aarch64-pc-linux",
|
||||
"windows": "aarch64-pc-windows"
|
||||
}
|
||||
global triple
|
||||
triple = triples.get(args.os)
|
||||
|
||||
print("""\
|
||||
### seed=%s triple=%s
|
||||
""" % (args.seed, triple))
|
||||
|
||||
random.seed(args.seed)
|
||||
|
||||
base = 0x4010
|
||||
megabytes = (int(args.size) if args.size else 512) * 1024 * 1024
|
||||
sizes = [size for size in generate_sizes(base, megabytes)]
|
||||
addrs = [addr for addr in generate_addrs(base, sizes)]
|
||||
|
||||
for i in range(len(addrs)):
|
||||
print_function(addrs[i], sizes[i], addrs)
|
||||
|
||||
print_here_head("main")
|
||||
print("""\
|
||||
### _x%08x
|
||||
""" % (addrs[-1] + sizes[-1]))
|
||||
print_function_head(14 if args.os == "macos" else 4, "main")
|
||||
print(" ret")
|
||||
print_here_tail()
|
||||
print("wait")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue