Add support for chained fixup load commands to MachOObjectFile

This is part of a series of patches to upstream support for Mach-O chained fixups.

This patch adds support for parsing the chained fixup load command and
parsing the chained fixups header. It also puts into place the
abstract interface that will be used to iterate over the fixups.

Differential Revision: https://reviews.llvm.org/D113630
This commit is contained in:
Adrian Prantl 2021-11-10 16:25:26 -08:00
parent 621e2de138
commit a3bfb01d94
7 changed files with 507 additions and 8 deletions

View File

@ -255,7 +255,8 @@ enum BindType {
enum BindSpecialDylib {
BIND_SPECIAL_DYLIB_SELF = 0,
BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2,
BIND_SPECIAL_DYLIB_WEAK_LOOKUP = -3
};
enum {
@ -1001,6 +1002,19 @@ struct nlist_64 {
uint64_t n_value;
};
/// Structs for dyld chained fixups.
/// dyld_chained_fixups_header is the data pointed to by LC_DYLD_CHAINED_FIXUPS
/// load command.
struct dyld_chained_fixups_header {
uint32_t fixups_version; ///< 0
uint32_t starts_offset; ///< Offset of dyld_chained_starts_in_image.
uint32_t imports_offset; ///< Offset of imports table in chain_data.
uint32_t symbols_offset; ///< Offset of symbol strings in chain_data.
uint32_t imports_count; ///< Number of imported symbol names.
uint32_t imports_format; ///< DYLD_CHAINED_IMPORT*
uint32_t symbols_format; ///< 0 => uncompressed, 1 => zlib compressed
};
// Byte order swapping functions for MachO structs
inline void swapStruct(fat_header &mh) {
@ -2008,6 +2022,16 @@ union alignas(4) macho_load_command {
};
LLVM_PACKED_END
inline void swapStruct(dyld_chained_fixups_header &C) {
sys::swapByteOrder(C.fixups_version);
sys::swapByteOrder(C.starts_offset);
sys::swapByteOrder(C.imports_offset);
sys::swapByteOrder(C.symbols_offset);
sys::swapByteOrder(C.imports_count);
sys::swapByteOrder(C.imports_format);
sys::swapByteOrder(C.symbols_format);
}
/* code signing attributes of a process */
enum CodeSignAttrs {

View File

@ -260,6 +260,126 @@ private:
};
using bind_iterator = content_iterator<MachOBindEntry>;
/// ChainedFixupTarget holds all the information about an external symbol
/// necessary to bind this binary to that symbol. These values are referenced
/// indirectly by chained fixup binds. This structure captures values from all
/// import and symbol formats.
///
/// Be aware there are two notions of weak here:
/// WeakImport == true
/// The associated bind may be set to 0 if this symbol is missing from its
/// parent library. This is called a "weak import."
/// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP
/// This symbol may be coalesced with other libraries vending the same
/// symbol. E.g., C++'s "operator new". This is called a "weak bind."
struct ChainedFixupTarget {
public:
ChainedFixupTarget(int LibOrdinal, StringRef Symbol, uint64_t Addend,
bool WeakImport)
: LibOrdinal(LibOrdinal), SymbolName(Symbol), Addend(Addend),
WeakImport(WeakImport) {}
int libOrdinal() { return LibOrdinal; }
StringRef symbolName() { return SymbolName; }
uint64_t addend() { return Addend; }
bool weakImport() { return WeakImport; }
bool weakBind() {
return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
}
private:
int LibOrdinal;
StringRef SymbolName;
uint64_t Addend;
bool WeakImport;
};
/// MachOAbstractFixupEntry is an abstract class representing a fixup in a
/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also
/// subdivide into additional subtypes (weak, lazy, reexport).
///
/// The two concrete subclasses of MachOAbstractFixupEntry are:
///
/// MachORebaseBindEntry - for dyld opcode-based tables, including threaded-
/// rebase, where rebases are mixed in with other
/// bind opcodes.
/// MachOChainedFixupEntry - for pointer chains embedded in data pages.
class MachOAbstractFixupEntry {
public:
MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O);
int32_t segmentIndex() const;
uint64_t segmentOffset() const;
uint64_t segmentAddress() const;
StringRef segmentName() const;
StringRef sectionName() const;
StringRef typeName() const;
StringRef symbolName() const;
uint32_t flags() const;
int64_t addend() const;
int ordinal() const;
/// \return the location of this fixup as a VM Address. For the VM
/// Address this fixup is pointing to, use pointerValue().
uint64_t address() const;
/// \return the VM Address pointed to by this fixup. Use
/// pointerValue() to compare against other VM Addresses, such as
/// section addresses or segment vmaddrs.
uint64_t pointerValue() const { return PointerValue; }
/// \return the raw "on-disk" representation of the fixup. For
/// Threaded rebases and Chained pointers these values are generally
/// encoded into various different pointer formats. This value is
/// exposed in API for tools that want to display and annotate the
/// raw bits.
uint64_t rawValue() const { return RawValue; }
void moveNext();
protected:
Error *E;
const MachOObjectFile *O;
uint64_t SegmentOffset = 0;
int32_t SegmentIndex = -1;
StringRef SymbolName;
int32_t Ordinal = 0;
uint32_t Flags = 0;
int64_t Addend = 0;
uint64_t PointerValue = 0;
uint64_t RawValue = 0;
bool Done = false;
void moveToFirst();
void moveToEnd();
/// \return the vm address of the start of __TEXT segment.
uint64_t textAddress() const { return TextAddress; }
private:
uint64_t TextAddress;
};
class MachOChainedFixupEntry : public MachOAbstractFixupEntry {
public:
enum class FixupKind { All, Bind, WeakBind, Rebase };
MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, FixupKind Kind,
bool Parse);
bool operator==(const MachOChainedFixupEntry &) const;
void moveNext();
void moveToFirst();
void moveToEnd();
private:
std::vector<ChainedFixupTarget> FixupTargets;
uint32_t FixupIndex = 0;
FixupKind Kind;
};
using fixup_iterator = content_iterator<MachOChainedFixupEntry>;
class MachOObjectFile : public ObjectFile {
public:
struct LoadCommandInfo {
@ -402,6 +522,10 @@ public:
/// For use iterating over all bind table entries.
iterator_range<bind_iterator> bindTable(Error &Err);
/// For iterating over all chained fixups.
iterator_range<fixup_iterator>
fixupTable(Error &Err, MachOChainedFixupEntry::FixupKind Kind);
/// For use iterating over all lazy bind table entries.
iterator_range<bind_iterator> lazyBindTable(Error &Err);
@ -562,6 +686,7 @@ public:
ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const;
ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
SmallVector<uint64_t> getFunctionStarts() const;
ArrayRef<uint8_t> getUuid() const;
@ -691,6 +816,7 @@ private:
const char *LinkOptHintsLoadCmd = nullptr;
const char *DyldInfoLoadCmd = nullptr;
const char *FuncStartsLoadCmd = nullptr;
const char *DyldChainedFixupsLoadCmd = nullptr;
const char *UuidLoadCmd = nullptr;
bool HasPageZeroSegment = false;
};

View File

@ -1380,6 +1380,11 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
"LC_DYLD_INFO_ONLY", Elements)))
return;
} else if (Load.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
if ((Err = checkLinkeditDataCommand(
*this, Load, I, &DyldChainedFixupsLoadCmd,
"LC_DYLD_CHAINED_FIXUPS", Elements, "chained fixups")))
return;
} else if (Load.C.cmd == MachO::LC_UUID) {
if (Load.C.cmdsize != sizeof(MachO::uuid_command)) {
Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect "
@ -1595,9 +1600,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
return;
// Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported.
} else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) {
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_IDENT) {
// Note: LC_IDENT is ignored.
continue;
@ -3185,6 +3190,106 @@ iterator_range<export_iterator> MachOObjectFile::exports(Error &Err) const {
return exports(Err, getDyldInfoExportsTrie(), this);
}
MachOAbstractFixupEntry::MachOAbstractFixupEntry(Error *E,
const MachOObjectFile *O)
: E(E), O(O) {
// Cache the vmaddress of __TEXT
for (const auto &Command : O->load_commands()) {
if (Command.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command SLC = O->getSegmentLoadCommand(Command);
if (StringRef(SLC.segname) == StringRef("__TEXT")) {
TextAddress = SLC.vmaddr;
break;
}
} else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 SLC_64 = O->getSegment64LoadCommand(Command);
if (StringRef(SLC_64.segname) == StringRef("__TEXT")) {
TextAddress = SLC_64.vmaddr;
break;
}
}
}
}
int32_t MachOAbstractFixupEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachOAbstractFixupEntry::segmentOffset() const {
return SegmentOffset;
}
uint64_t MachOAbstractFixupEntry::segmentAddress() const {
return O->BindRebaseAddress(SegmentIndex, 0);
}
StringRef MachOAbstractFixupEntry::segmentName() const {
return O->BindRebaseSegmentName(SegmentIndex);
}
StringRef MachOAbstractFixupEntry::sectionName() const {
return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
}
uint64_t MachOAbstractFixupEntry::address() const {
return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
}
StringRef MachOAbstractFixupEntry::symbolName() const { return SymbolName; }
int64_t MachOAbstractFixupEntry::addend() const { return Addend; }
uint32_t MachOAbstractFixupEntry::flags() const { return Flags; }
int MachOAbstractFixupEntry::ordinal() const { return Ordinal; }
StringRef MachOAbstractFixupEntry::typeName() const { return "unknown"; }
void MachOAbstractFixupEntry::moveToFirst() {
SegmentOffset = 0;
SegmentIndex = -1;
Ordinal = 0;
Flags = 0;
Addend = 0;
Done = false;
}
void MachOAbstractFixupEntry::moveToEnd() { Done = true; }
MachOChainedFixupEntry::MachOChainedFixupEntry(Error *E,
const MachOObjectFile *O,
FixupKind Kind, bool Parse)
: MachOAbstractFixupEntry(E, O), Kind(Kind) {
ErrorAsOutParameter e(E);
if (Parse) {
if (auto FixupTargetsOrErr = O->getDyldChainedFixupTargets())
FixupTargets = *FixupTargetsOrErr;
else {
*E = FixupTargetsOrErr.takeError();
return;
}
}
}
void MachOChainedFixupEntry::moveToFirst() {
MachOAbstractFixupEntry::moveToFirst();
FixupIndex = 0;
moveNext();
}
void MachOChainedFixupEntry::moveToEnd() {
MachOAbstractFixupEntry::moveToEnd();
}
void MachOChainedFixupEntry::moveNext() { Done = true; }
bool MachOChainedFixupEntry::operator==(
const MachOChainedFixupEntry &Other) const {
if (Done == Other.Done)
return true;
if ((FixupIndex == Other.FixupIndex))
return true;
return false;
}
MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit)
: E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
@ -4193,6 +4298,18 @@ iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) {
MachOBindEntry::Kind::Weak);
}
iterator_range<fixup_iterator>
MachOObjectFile::fixupTable(Error &Err,
MachOChainedFixupEntry::FixupKind Kind) {
MachOChainedFixupEntry Start(&Err, this, Kind, true);
Start.moveToFirst();
MachOChainedFixupEntry Finish(&Err, this, Kind, false);
Finish.moveToEnd();
return make_range(fixup_iterator(Start), fixup_iterator(Finish));
}
MachOObjectFile::load_command_iterator
MachOObjectFile::begin_load_commands() const {
return LoadCommands.begin();
@ -4648,6 +4765,44 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
}
Expected<std::vector<ChainedFixupTarget>>
MachOObjectFile::getDyldChainedFixupTargets() const {
// Load the dyld chained fixups load command.
if (!DyldChainedFixupsLoadCmd)
return std::vector<ChainedFixupTarget>();
auto DyldChainedFixupsOrErr = getStructOrErr<MachO::linkedit_data_command>(
*this, DyldChainedFixupsLoadCmd);
if (!DyldChainedFixupsOrErr)
return DyldChainedFixupsOrErr.takeError();
MachO::linkedit_data_command DyldChainedFixups = DyldChainedFixupsOrErr.get();
// If the load command is present but the data offset has been zeroed out,
// as is the case for dylib stubs, return an empty list of targets.
uint64_t CFHeaderOffset = DyldChainedFixups.dataoff;
std::vector<ChainedFixupTarget> Targets;
if (CFHeaderOffset == 0)
return Targets;
// Load the dyld chained fixups header.
const char *CFHeaderPtr = getPtr(*this, CFHeaderOffset);
auto CFHeaderOrErr =
getStructOrErr<MachO::dyld_chained_fixups_header>(*this, CFHeaderPtr);
if (!CFHeaderOrErr)
return CFHeaderOrErr.takeError();
MachO::dyld_chained_fixups_header CFHeader = CFHeaderOrErr.get();
// Reject unknown chained fixup formats.
if (CFHeader.fixups_version != 0)
return malformedError(Twine("bad chained fixups: unknown version: ") +
Twine(CFHeader.fixups_version));
if (CFHeader.imports_format < 1 || CFHeader.imports_format > 3)
return malformedError(
Twine("bad chained fixups: unknown imports format: ") +
Twine(CFHeader.imports_format));
return Targets;
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return None;

View File

@ -0,0 +1,11 @@
RUN: cat %p/../Inputs/MachO/chained-fixups.yaml \
RUN: | sed 's/__LINKEDIT: 00000000/__LINKEDIT: AB000000/' \
RUN: | yaml2obj | not llvm-objdump --macho --dyld_info - 2>&1 \
RUN: | FileCheck %s --check-prefix=HEADER1
HEADER1: truncated or malformed object (bad chained fixups: unknown version: 171)
RUN: cat %p/../Inputs/MachO/chained-fixups.yaml \
RUN: | sed 's/1000000010000000/1000000AB0000000/' \
RUN: | yaml2obj | not llvm-objdump --macho --dyld_info - 2>&1 \
RUN: | FileCheck %s --check-prefix=HEADER2
HEADER2: truncated or malformed object (bad chained fixups: unknown imports format: 171)

View File

@ -0,0 +1,173 @@
# This file was produced using:
# echo "int ext;" > a.c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib
# echo "extern int ext;" > b.c
# echo "int padding;" >> b.c
# echo "int *p = &ext + 4;" >> b.c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c
# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x100000C
cpusubtype: 0x0
filetype: 0x6
ncmds: 16
sizeofcmds: 816
flags: 0x100085
reserved: 0x0
LoadCommands:
- cmd: LC_SEGMENT_64
cmdsize: 152
segname: __TEXT
vmaddr: 0
vmsize: 16384
fileoff: 0
filesize: 16384
maxprot: 5
initprot: 5
nsects: 1
flags: 0
Sections:
- sectname: __text
segname: __TEXT
addr: 0x4000
size: 0
offset: 0x4000
align: 0
reloff: 0x0
nreloc: 0
flags: 0x80000400
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: ''
- cmd: LC_SEGMENT_64
cmdsize: 152
segname: __DATA
vmaddr: 16384
vmsize: 16384
fileoff: 16384
filesize: 16384
maxprot: 3
initprot: 3
nsects: 1
flags: 0
Sections:
- sectname: __data
segname: __DATA
addr: 0x4000
size: 8
offset: 0x4000
align: 3
reloff: 0x0
nreloc: 0
flags: 0x0
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: '0000001000000080'
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __LINKEDIT
vmaddr: 32768
vmsize: 16384
fileoff: 32768
filesize: 160
maxprot: 1
initprot: 1
nsects: 0
flags: 0
- cmd: LC_ID_DYLIB
cmdsize: 64
dylib:
name: 24
timestamp: 1
current_version: 0
compatibility_version: 0
Content: '@executable_path/libfixups.dylib'
ZeroPadBytes: 8
- cmd: LC_DYLD_CHAINED_FIXUPS
cmdsize: 16
dataoff: 32768
datasize: 88
- cmd: LC_DYLD_EXPORTS_TRIE
cmdsize: 16
dataoff: 32856
datasize: 16
- cmd: LC_SYMTAB
cmdsize: 24
symoff: 32880
nsyms: 2
stroff: 32912
strsize: 16
- cmd: LC_DYSYMTAB
cmdsize: 80
ilocalsym: 0
nlocalsym: 0
iextdefsym: 0
nextdefsym: 1
iundefsym: 1
nundefsym: 1
tocoff: 0
ntoc: 0
modtaboff: 0
nmodtab: 0
extrefsymoff: 0
nextrefsyms: 0
indirectsymoff: 0
nindirectsyms: 0
extreloff: 0
nextrel: 0
locreloff: 0
nlocrel: 0
- cmd: LC_UUID
cmdsize: 24
uuid: 56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9
- cmd: LC_BUILD_VERSION
cmdsize: 32
platform: 2
minos: 983296
sdk: 983552
ntools: 1
Tools:
- tool: 3
version: 46596096
- cmd: LC_SOURCE_VERSION
cmdsize: 16
version: 0
- cmd: LC_ENCRYPTION_INFO_64
cmdsize: 24
cryptoff: 16384
cryptsize: 0
cryptid: 0
pad: 0
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 0
compatibility_version: 0
Content: '@executable_path/liba.dylib'
ZeroPadBytes: 5
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 85917696
compatibility_version: 65536
Content: '/usr/lib/libSystem.B.dylib'
ZeroPadBytes: 6
- cmd: LC_FUNCTION_STARTS
cmdsize: 16
dataoff: 32872
datasize: 8
- cmd: LC_DATA_IN_CODE
cmdsize: 16
dataoff: 32880
datasize: 0
__LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
...

View File

@ -3,5 +3,4 @@ RUN: | FileCheck %s --match-full-lines --strict-whitespace \
RUN: --implicit-check-not={{.}}
CHECK:{{.*}}bind.macho-x86_64:
CHECK-NEXT:dyld information:
CHECK-NEXT:[not yet implemented].
CHECK:dyld information:

View File

@ -1184,9 +1184,20 @@ static void PrintLinkOptHints(MachOObjectFile *O) {
}
}
static void printMachOChainedFixups(object::MachOObjectFile *Obj,
MachOChainedFixupEntry::FixupKind Type) {
Error Err = Error::success();
for (const object::MachOChainedFixupEntry &Entry :
Obj->fixupTable(Err, Type)) {
(void)Entry;
}
if (Err)
reportError(std::move(Err), Obj->getFileName());
}
static void PrintDyldInfo(MachOObjectFile *O) {
outs() << "dyld information:\n";
outs() << "[not yet implemented].\n";
outs() << "dyld information:" << '\n';
printMachOChainedFixups(O, MachOChainedFixupEntry::FixupKind::Bind);
}
static void PrintDylibs(MachOObjectFile *O, bool JustId) {