llvm-project/llvm/lib/Object/MachOObjectFile.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

4665 lines
177 KiB
C++
Raw Normal View History

//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the MachOObjectFile class, which binds the MachOObject
// class to the generic ObjectFile wrapper.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <list>
#include <memory>
#include <string>
#include <system_error>
using namespace llvm;
using namespace object;
namespace {
struct section_base {
char sectname[16];
char segname[16];
};
} // end anonymous namespace
static Error malformedError(const Twine &Msg) {
return make_error<GenericBinaryError>("truncated or malformed object (" +
Msg + ")",
object_error::parse_failed);
}
// FIXME: Replace all uses of this function with getStructOrErr.
template <typename T>
static T getStruct(const MachOObjectFile &O, const char *P) {
// Don't read before the beginning or past the end of the file
if (P < O.getData().begin() || P + sizeof(T) > O.getData().end())
report_fatal_error("Malformed MachO file.");
T Cmd;
memcpy(&Cmd, P, sizeof(T));
if (O.isLittleEndian() != sys::IsLittleEndianHost)
MachO::swapStruct(Cmd);
return Cmd;
}
template <typename T>
static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) {
// Don't read before the beginning or past the end of the file
if (P < O.getData().begin() || P + sizeof(T) > O.getData().end())
return malformedError("Structure read out-of-range");
T Cmd;
memcpy(&Cmd, P, sizeof(T));
if (O.isLittleEndian() != sys::IsLittleEndianHost)
MachO::swapStruct(Cmd);
return Cmd;
}
static const char *
getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L,
unsigned Sec) {
uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
bool Is64 = O.is64Bit();
unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) :
sizeof(MachO::segment_command);
unsigned SectionSize = Is64 ? sizeof(MachO::section_64) :
sizeof(MachO::section);
uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
return reinterpret_cast<const char*>(SectionAddr);
}
static const char *getPtr(const MachOObjectFile &O, size_t Offset) {
assert(Offset <= O.getData().size());
return O.getData().data() + Offset;
}
static MachO::nlist_base
getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) {
const char *P = reinterpret_cast<const char *>(DRI.p);
return getStruct<MachO::nlist_base>(O, P);
}
static StringRef parseSegmentOrSectionName(const char *P) {
if (P[15] == 0)
// Null terminated.
return P;
// Not null terminated, so this is a 16 char string.
return StringRef(P, 16);
}
static unsigned getCPUType(const MachOObjectFile &O) {
return O.getHeader().cputype;
}
static unsigned getCPUSubType(const MachOObjectFile &O) {
return O.getHeader().cpusubtype;
}
static uint32_t
getPlainRelocationAddress(const MachO::any_relocation_info &RE) {
return RE.r_word0;
}
static unsigned
getScatteredRelocationAddress(const MachO::any_relocation_info &RE) {
return RE.r_word0 & 0xffffff;
}
static bool getPlainRelocationPCRel(const MachOObjectFile &O,
const MachO::any_relocation_info &RE) {
if (O.isLittleEndian())
return (RE.r_word1 >> 24) & 1;
return (RE.r_word1 >> 7) & 1;
}
static bool
getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) {
return (RE.r_word0 >> 30) & 1;
}
static unsigned getPlainRelocationLength(const MachOObjectFile &O,
const MachO::any_relocation_info &RE) {
if (O.isLittleEndian())
return (RE.r_word1 >> 25) & 3;
return (RE.r_word1 >> 5) & 3;
}
static unsigned
getScatteredRelocationLength(const MachO::any_relocation_info &RE) {
return (RE.r_word0 >> 28) & 3;
}
static unsigned getPlainRelocationType(const MachOObjectFile &O,
const MachO::any_relocation_info &RE) {
if (O.isLittleEndian())
return RE.r_word1 >> 28;
return RE.r_word1 & 0xf;
}
static uint32_t getSectionFlags(const MachOObjectFile &O,
DataRefImpl Sec) {
if (O.is64Bit()) {
MachO::section_64 Sect = O.getSection64(Sec);
return Sect.flags;
}
MachO::section Sect = O.getSection(Sec);
return Sect.flags;
}
static Expected<MachOObjectFile::LoadCommandInfo>
getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
uint32_t LoadCommandIndex) {
if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) {
if (CmdOrErr->cmdsize + Ptr > Obj.getData().end())
return malformedError("load command " + Twine(LoadCommandIndex) +
" extends past end of file");
if (CmdOrErr->cmdsize < 8)
return malformedError("load command " + Twine(LoadCommandIndex) +
" with size less than 8 bytes");
return MachOObjectFile::LoadCommandInfo({Ptr, *CmdOrErr});
} else
return CmdOrErr.takeError();
}
static Expected<MachOObjectFile::LoadCommandInfo>
getFirstLoadCommandInfo(const MachOObjectFile &Obj) {
unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
: sizeof(MachO::mach_header);
if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds)
return malformedError("load command 0 extends past the end all load "
"commands in the file");
return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0);
}
static Expected<MachOObjectFile::LoadCommandInfo>
getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex,
const MachOObjectFile::LoadCommandInfo &L) {
unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
: sizeof(MachO::mach_header);
if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) >
Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds)
return malformedError("load command " + Twine(LoadCommandIndex + 1) +
" extends past the end all load commands in the file");
return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1);
}
template <typename T>
static void parseHeader(const MachOObjectFile &Obj, T &Header,
Error &Err) {
if (sizeof(T) > Obj.getData().size()) {
Err = malformedError("the mach header extends past the end of the "
"file");
Start to add real error messages for malformed Mach-O files. And update the existing test cases in test/Object/macho-invalid.test to use llvm-objdump with the -macho option to produce these error messages and stop producing the generic "Invalid data was encountered while parsing the file" message. Working from the beginning of the file, if the mach header is too large for the size of the file and then if the load commands that follow extend past the end of the file these two errors now generate correct error messages. Both of these have existing test cases in test/Object/macho-invalid.test . But the first with macho-invalid-header it will never trigger the error message "mach header extends past the end of the file" using any of the llvm tools as they all use identify_magic() which rejects files with the correct magic number that are too small in size. So I tested this by hacking that code and seeing the error message down in parseHeader() really does happen. So in case there is ever code in llvm that directly calls createMachOObjectFile() this error message will be correctly produced. The second error message of "load commands extends past the end of the file" is triggered by a number of existing tests cases in test/Object/macho-invalid.test . Also other tests trigger different error messages now like "ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table". There are two existing test cases that still get the "Invalid data was encountered ..." error messages that I will tackle next. But they will involve a bit of pluming an Expect<...> up through the call stack and I want to do those as separate changes. FYI, for those test cases that were trying to test specific errors that now get different errors I’ll fix those in follow on changes and create new test cases for those so they test the error they were meant to test. llvm-svn: 266248
2016-04-14 05:17:58 +08:00
return;
}
if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0)))
Header = *HeaderOrErr;
else
Err = HeaderOrErr.takeError();
}
// This is used to check for overlapping of Mach-O elements.
struct MachOElement {
uint64_t Offset;
uint64_t Size;
const char *Name;
};
static Error checkOverlappingElement(std::list<MachOElement> &Elements,
uint64_t Offset, uint64_t Size,
const char *Name) {
if (Size == 0)
return Error::success();
for (auto it=Elements.begin() ; it != Elements.end(); ++it) {
auto E = *it;
if ((Offset >= E.Offset && Offset < E.Offset + E.Size) ||
(Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) ||
(Offset <= E.Offset && Offset + Size >= E.Offset + E.Size))
return malformedError(Twine(Name) + " at offset " + Twine(Offset) +
" with a size of " + Twine(Size) + ", overlaps " +
E.Name + " at offset " + Twine(E.Offset) + " with "
"a size of " + Twine(E.Size));
auto nt = it;
nt++;
if (nt != Elements.end()) {
auto N = *nt;
if (Offset + Size <= N.Offset) {
Elements.insert(nt, {Offset, Size, Name});
return Error::success();
}
}
}
Elements.push_back({Offset, Size, Name});
return Error::success();
}
// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all
// sections to \param Sections, and optionally sets
// \param IsPageZeroSegment to true.
template <typename Segment, typename Section>
static Error parseSegmentLoadCommand(
const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load,
SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment,
uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders,
std::list<MachOElement> &Elements) {
const unsigned SegmentLoadSize = sizeof(Segment);
if (Load.C.cmdsize < SegmentLoadSize)
return malformedError("load command " + Twine(LoadCommandIndex) +
" " + CmdName + " cmdsize too small");
if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) {
Segment S = SegOrErr.get();
const unsigned SectionSize = sizeof(Section);
uint64_t FileSize = Obj.getData().size();
if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize ||
S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize)
return malformedError("load command " + Twine(LoadCommandIndex) +
2016-08-22 08:58:47 +08:00
" inconsistent cmdsize in " + CmdName +
" for the number of sections");
for (unsigned J = 0; J < S.nsects; ++J) {
const char *Sec = getSectionPtr(Obj, Load, J);
Sections.push_back(Sec);
auto SectionOrErr = getStructOrErr<Section>(Obj, Sec);
if (!SectionOrErr)
return SectionOrErr.takeError();
Section s = SectionOrErr.get();
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
s.offset > FileSize)
return malformedError("offset field of section " + Twine(J) + " in " +
CmdName + " command " + Twine(LoadCommandIndex) +
" extends past the end of the file");
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
2016-08-22 08:58:04 +08:00
s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 &&
s.offset < SizeOfHeaders && s.size != 0)
return malformedError("offset field of section " + Twine(J) + " in " +
CmdName + " command " + Twine(LoadCommandIndex) +
" not past the headers of the file");
uint64_t BigSize = s.offset;
BigSize += s.size;
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
BigSize > FileSize)
return malformedError("offset field plus size field of section " +
Twine(J) + " in " + CmdName + " command " +
Twine(LoadCommandIndex) +
" extends past the end of the file");
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
s.size > S.filesize)
return malformedError("size field of section " +
Twine(J) + " in " + CmdName + " command " +
Twine(LoadCommandIndex) +
" greater than the segment");
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 &&
2016-08-22 08:58:04 +08:00
s.addr < S.vmaddr)
return malformedError("addr field of section " + Twine(J) + " in " +
CmdName + " command " + Twine(LoadCommandIndex) +
" less than the segment's vmaddr");
BigSize = s.addr;
BigSize += s.size;
uint64_t BigEnd = S.vmaddr;
BigEnd += S.vmsize;
if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd)
2016-08-22 08:58:04 +08:00
return malformedError("addr field plus size of section " + Twine(J) +
" in " + CmdName + " command " +
Twine(LoadCommandIndex) +
" greater than than "
"the segment's vmaddr plus vmsize");
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
s.flags != MachO::S_THREAD_LOCAL_ZEROFILL)
if (Error Err = checkOverlappingElement(Elements, s.offset, s.size,
"section contents"))
return Err;
if (s.reloff > FileSize)
2016-08-22 08:58:04 +08:00
return malformedError("reloff field of section " + Twine(J) + " in " +
CmdName + " command " + Twine(LoadCommandIndex) +
" extends past the end of the file");
BigSize = s.nreloc;
BigSize *= sizeof(struct MachO::relocation_info);
BigSize += s.reloff;
if (BigSize > FileSize)
return malformedError("reloff field plus nreloc field times sizeof("
"struct relocation_info) of section " +
Twine(J) + " in " + CmdName + " command " +
2016-08-22 08:58:04 +08:00
Twine(LoadCommandIndex) +
" extends past the end of the file");
if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc *
sizeof(struct
MachO::relocation_info),
"section relocation entries"))
return Err;
}
if (S.fileoff > FileSize)
return malformedError("load command " + Twine(LoadCommandIndex) +
2016-08-22 08:58:47 +08:00
" fileoff field in " + CmdName +
" extends past the end of the file");
uint64_t BigSize = S.fileoff;
BigSize += S.filesize;
if (BigSize > FileSize)
return malformedError("load command " + Twine(LoadCommandIndex) +
" fileoff field plus filesize field in " +
CmdName + " extends past the end of the file");
if (S.vmsize != 0 && S.filesize > S.vmsize)
return malformedError("load command " + Twine(LoadCommandIndex) +
" filesize field in " + CmdName +
" greater than vmsize field");
IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname);
} else
return SegOrErr.takeError();
return Error::success();
}
static Error checkSymtabCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **SymtabLoadCmd,
std::list<MachOElement> &Elements) {
if (Load.C.cmdsize < sizeof(MachO::symtab_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_SYMTAB cmdsize too small");
if (*SymtabLoadCmd != nullptr)
return malformedError("more than one LC_SYMTAB command");
auto SymtabOrErr = getStructOrErr<MachO::symtab_command>(Obj, Load.Ptr);
if (!SymtabOrErr)
return SymtabOrErr.takeError();
MachO::symtab_command Symtab = SymtabOrErr.get();
if (Symtab.cmdsize != sizeof(MachO::symtab_command))
return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) +
" has incorrect cmdsize");
uint64_t FileSize = Obj.getData().size();
if (Symtab.symoff > FileSize)
return malformedError("symoff field of LC_SYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end "
"of the file");
uint64_t SymtabSize = Symtab.nsyms;
const char *struct_nlist_name;
if (Obj.is64Bit()) {
SymtabSize *= sizeof(MachO::nlist_64);
struct_nlist_name = "struct nlist_64";
} else {
SymtabSize *= sizeof(MachO::nlist);
struct_nlist_name = "struct nlist";
}
uint64_t BigSize = SymtabSize;
BigSize += Symtab.symoff;
if (BigSize > FileSize)
return malformedError("symoff field plus nsyms field times sizeof(" +
Twine(struct_nlist_name) + ") of LC_SYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end "
"of the file");
if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize,
"symbol table"))
return Err;
if (Symtab.stroff > FileSize)
return malformedError("stroff field of LC_SYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end "
"of the file");
BigSize = Symtab.stroff;
BigSize += Symtab.strsize;
if (BigSize > FileSize)
return malformedError("stroff field plus strsize field of LC_SYMTAB "
"command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
if (Error Err = checkOverlappingElement(Elements, Symtab.stroff,
Symtab.strsize, "string table"))
return Err;
*SymtabLoadCmd = Load.Ptr;
return Error::success();
}
static Error checkDysymtabCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **DysymtabLoadCmd,
std::list<MachOElement> &Elements) {
if (Load.C.cmdsize < sizeof(MachO::dysymtab_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_DYSYMTAB cmdsize too small");
if (*DysymtabLoadCmd != nullptr)
return malformedError("more than one LC_DYSYMTAB command");
auto DysymtabOrErr =
getStructOrErr<MachO::dysymtab_command>(Obj, Load.Ptr);
if (!DysymtabOrErr)
return DysymtabOrErr.takeError();
MachO::dysymtab_command Dysymtab = DysymtabOrErr.get();
if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command))
return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) +
" has incorrect cmdsize");
uint64_t FileSize = Obj.getData().size();
if (Dysymtab.tocoff > FileSize)
return malformedError("tocoff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
uint64_t BigSize = Dysymtab.ntoc;
BigSize *= sizeof(MachO::dylib_table_of_contents);
BigSize += Dysymtab.tocoff;
if (BigSize > FileSize)
return malformedError("tocoff field plus ntoc field times sizeof(struct "
"dylib_table_of_contents) of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff,
Dysymtab.ntoc * sizeof(struct
2017-10-18 21:31:28 +08:00
MachO::dylib_table_of_contents),
"table of contents"))
return Err;
if (Dysymtab.modtaboff > FileSize)
return malformedError("modtaboff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
BigSize = Dysymtab.nmodtab;
const char *struct_dylib_module_name;
uint64_t sizeof_modtab;
if (Obj.is64Bit()) {
sizeof_modtab = sizeof(MachO::dylib_module_64);
struct_dylib_module_name = "struct dylib_module_64";
} else {
sizeof_modtab = sizeof(MachO::dylib_module);
struct_dylib_module_name = "struct dylib_module";
}
BigSize *= sizeof_modtab;
BigSize += Dysymtab.modtaboff;
if (BigSize > FileSize)
return malformedError("modtaboff field plus nmodtab field times sizeof(" +
Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB "
"command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff,
Dysymtab.nmodtab * sizeof_modtab,
2017-08-28 14:47:47 +08:00
"module table"))
return Err;
if (Dysymtab.extrefsymoff > FileSize)
return malformedError("extrefsymoff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
BigSize = Dysymtab.nextrefsyms;
BigSize *= sizeof(MachO::dylib_reference);
BigSize += Dysymtab.extrefsymoff;
if (BigSize > FileSize)
return malformedError("extrefsymoff field plus nextrefsyms field times "
"sizeof(struct dylib_reference) of LC_DYSYMTAB "
"command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff,
Dysymtab.nextrefsyms *
2017-08-28 14:47:47 +08:00
sizeof(MachO::dylib_reference),
"reference table"))
return Err;
if (Dysymtab.indirectsymoff > FileSize)
return malformedError("indirectsymoff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
BigSize = Dysymtab.nindirectsyms;
BigSize *= sizeof(uint32_t);
BigSize += Dysymtab.indirectsymoff;
if (BigSize > FileSize)
return malformedError("indirectsymoff field plus nindirectsyms field times "
"sizeof(uint32_t) of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff,
Dysymtab.nindirectsyms *
sizeof(uint32_t),
2017-10-18 21:31:28 +08:00
"indirect table"))
return Err;
if (Dysymtab.extreloff > FileSize)
return malformedError("extreloff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
BigSize = Dysymtab.nextrel;
BigSize *= sizeof(MachO::relocation_info);
BigSize += Dysymtab.extreloff;
if (BigSize > FileSize)
return malformedError("extreloff field plus nextrel field times sizeof"
"(struct relocation_info) of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff,
Dysymtab.nextrel *
2017-08-28 14:47:47 +08:00
sizeof(MachO::relocation_info),
"external relocation table"))
return Err;
if (Dysymtab.locreloff > FileSize)
return malformedError("locreloff field of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
BigSize = Dysymtab.nlocrel;
BigSize *= sizeof(MachO::relocation_info);
BigSize += Dysymtab.locreloff;
if (BigSize > FileSize)
return malformedError("locreloff field plus nlocrel field times sizeof"
"(struct relocation_info) of LC_DYSYMTAB command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff,
Dysymtab.nlocrel *
2017-08-28 14:47:47 +08:00
sizeof(MachO::relocation_info),
"local relocation table"))
return Err;
*DysymtabLoadCmd = Load.Ptr;
return Error::success();
}
static Error checkLinkeditDataCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **LoadCmd, const char *CmdName,
std::list<MachOElement> &Elements,
const char *ElementName) {
if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
if (*LoadCmd != nullptr)
return malformedError("more than one " + Twine(CmdName) + " command");
auto LinkDataOrError =
getStructOrErr<MachO::linkedit_data_command>(Obj, Load.Ptr);
if (!LinkDataOrError)
return LinkDataOrError.takeError();
MachO::linkedit_data_command LinkData = LinkDataOrError.get();
if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command))
return malformedError(Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " has incorrect cmdsize");
uint64_t FileSize = Obj.getData().size();
if (LinkData.dataoff > FileSize)
return malformedError("dataoff field of " + Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
uint64_t BigSize = LinkData.dataoff;
BigSize += LinkData.datasize;
if (BigSize > FileSize)
return malformedError("dataoff field plus datasize field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff,
LinkData.datasize, ElementName))
return Err;
*LoadCmd = Load.Ptr;
return Error::success();
}
static Error checkDyldInfoCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **LoadCmd, const char *CmdName,
std::list<MachOElement> &Elements) {
if (Load.C.cmdsize < sizeof(MachO::dyld_info_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
if (*LoadCmd != nullptr)
return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY "
"command");
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(Obj, Load.Ptr);
if (!DyldInfoOrErr)
return DyldInfoOrErr.takeError();
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command))
return malformedError(Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " has incorrect cmdsize");
uint64_t FileSize = Obj.getData().size();
if (DyldInfo.rebase_off > FileSize)
return malformedError("rebase_off field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
uint64_t BigSize = DyldInfo.rebase_off;
BigSize += DyldInfo.rebase_size;
if (BigSize > FileSize)
return malformedError("rebase_off field plus rebase_size field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off,
DyldInfo.rebase_size,
"dyld rebase info"))
return Err;
if (DyldInfo.bind_off > FileSize)
return malformedError("bind_off field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
BigSize = DyldInfo.bind_off;
BigSize += DyldInfo.bind_size;
if (BigSize > FileSize)
return malformedError("bind_off field plus bind_size field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off,
DyldInfo.bind_size,
"dyld bind info"))
return Err;
if (DyldInfo.weak_bind_off > FileSize)
return malformedError("weak_bind_off field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
BigSize = DyldInfo.weak_bind_off;
BigSize += DyldInfo.weak_bind_size;
if (BigSize > FileSize)
return malformedError("weak_bind_off field plus weak_bind_size field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off,
DyldInfo.weak_bind_size,
"dyld weak bind info"))
return Err;
if (DyldInfo.lazy_bind_off > FileSize)
return malformedError("lazy_bind_off field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
BigSize = DyldInfo.lazy_bind_off;
BigSize += DyldInfo.lazy_bind_size;
if (BigSize > FileSize)
return malformedError("lazy_bind_off field plus lazy_bind_size field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off,
DyldInfo.lazy_bind_size,
"dyld lazy bind info"))
return Err;
if (DyldInfo.export_off > FileSize)
return malformedError("export_off field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
BigSize = DyldInfo.export_off;
BigSize += DyldInfo.export_size;
if (BigSize > FileSize)
return malformedError("export_off field plus export_size field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off,
DyldInfo.export_size,
"dyld export info"))
return Err;
*LoadCmd = Load.Ptr;
return Error::success();
}
static Error checkDylibCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex, const char *CmdName) {
if (Load.C.cmdsize < sizeof(MachO::dylib_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
auto CommandOrErr = getStructOrErr<MachO::dylib_command>(Obj, Load.Ptr);
if (!CommandOrErr)
return CommandOrErr.takeError();
MachO::dylib_command D = CommandOrErr.get();
if (D.dylib.name < sizeof(MachO::dylib_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field too small, not past "
"the end of the dylib_command struct");
if (D.dylib.name >= D.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field extends past the end "
"of the load command");
// Make sure there is a null between the starting offset of the name and
// the end of the load command.
uint32_t i;
const char *P = (const char *)Load.Ptr;
for (i = D.dylib.name; i < D.cmdsize; i++)
if (P[i] == '\0')
break;
if (i >= D.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " library name extends past the end of the "
"load command");
return Error::success();
}
static Error checkDylibIdCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **LoadCmd) {
if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex,
"LC_ID_DYLIB"))
return Err;
if (*LoadCmd != nullptr)
return malformedError("more than one LC_ID_DYLIB command");
if (Obj.getHeader().filetype != MachO::MH_DYLIB &&
Obj.getHeader().filetype != MachO::MH_DYLIB_STUB)
return malformedError("LC_ID_DYLIB load command in non-dynamic library "
"file type");
*LoadCmd = Load.Ptr;
return Error::success();
}
static Error checkDyldCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex, const char *CmdName) {
if (Load.C.cmdsize < sizeof(MachO::dylinker_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
auto CommandOrErr = getStructOrErr<MachO::dylinker_command>(Obj, Load.Ptr);
if (!CommandOrErr)
return CommandOrErr.takeError();
MachO::dylinker_command D = CommandOrErr.get();
if (D.name < sizeof(MachO::dylinker_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field too small, not past "
"the end of the dylinker_command struct");
if (D.name >= D.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field extends past the end "
"of the load command");
// Make sure there is a null between the starting offset of the name and
// the end of the load command.
uint32_t i;
const char *P = (const char *)Load.Ptr;
for (i = D.name; i < D.cmdsize; i++)
if (P[i] == '\0')
break;
if (i >= D.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " dyld name extends past the end of the "
"load command");
return Error::success();
}
static Error checkVersCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char **LoadCmd, const char *CmdName) {
if (Load.C.cmdsize != sizeof(MachO::version_min_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " has incorrect cmdsize");
if (*LoadCmd != nullptr)
return malformedError("more than one LC_VERSION_MIN_MACOSX, "
"LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or "
"LC_VERSION_MIN_WATCHOS command");
*LoadCmd = Load.Ptr;
return Error::success();
}
static Error checkNoteCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
std::list<MachOElement> &Elements) {
if (Load.C.cmdsize != sizeof(MachO::note_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_NOTE has incorrect cmdsize");
auto NoteCmdOrErr = getStructOrErr<MachO::note_command>(Obj, Load.Ptr);
if (!NoteCmdOrErr)
return NoteCmdOrErr.takeError();
MachO::note_command Nt = NoteCmdOrErr.get();
uint64_t FileSize = Obj.getData().size();
if (Nt.offset > FileSize)
return malformedError("offset field of LC_NOTE command " +
Twine(LoadCommandIndex) + " extends "
"past the end of the file");
uint64_t BigSize = Nt.offset;
BigSize += Nt.size;
if (BigSize > FileSize)
return malformedError("size field plus offset field of LC_NOTE command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size,
"LC_NOTE data"))
return Err;
return Error::success();
}
static Error
parseBuildVersionCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
SmallVectorImpl<const char*> &BuildTools,
uint32_t LoadCommandIndex) {
auto BVCOrErr =
getStructOrErr<MachO::build_version_command>(Obj, Load.Ptr);
if (!BVCOrErr)
return BVCOrErr.takeError();
MachO::build_version_command BVC = BVCOrErr.get();
if (Load.C.cmdsize !=
sizeof(MachO::build_version_command) +
BVC.ntools * sizeof(MachO::build_tool_version))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_BUILD_VERSION_COMMAND has incorrect cmdsize");
auto Start = Load.Ptr + sizeof(MachO::build_version_command);
BuildTools.resize(BVC.ntools);
for (unsigned i = 0; i < BVC.ntools; ++i)
BuildTools[i] = Start + i * sizeof(MachO::build_tool_version);
return Error::success();
}
static Error checkRpathCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex) {
if (Load.C.cmdsize < sizeof(MachO::rpath_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH cmdsize too small");
auto ROrErr = getStructOrErr<MachO::rpath_command>(Obj, Load.Ptr);
if (!ROrErr)
return ROrErr.takeError();
MachO::rpath_command R = ROrErr.get();
if (R.path < sizeof(MachO::rpath_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH path.offset field too small, not past "
"the end of the rpath_command struct");
if (R.path >= R.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH path.offset field extends past the end "
"of the load command");
// Make sure there is a null between the starting offset of the path and
// the end of the load command.
uint32_t i;
const char *P = (const char *)Load.Ptr;
for (i = R.path; i < R.cmdsize; i++)
if (P[i] == '\0')
break;
if (i >= R.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH library name extends past the end of the "
"load command");
return Error::success();
}
static Error checkEncryptCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
uint64_t cryptoff, uint64_t cryptsize,
const char **LoadCmd, const char *CmdName) {
if (*LoadCmd != nullptr)
return malformedError("more than one LC_ENCRYPTION_INFO and or "
"LC_ENCRYPTION_INFO_64 command");
uint64_t FileSize = Obj.getData().size();
if (cryptoff > FileSize)
return malformedError("cryptoff field of " + Twine(CmdName) +
" command " + Twine(LoadCommandIndex) + " extends "
"past the end of the file");
uint64_t BigSize = cryptoff;
BigSize += cryptsize;
if (BigSize > FileSize)
return malformedError("cryptoff field plus cryptsize field of " +
Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
*LoadCmd = Load.Ptr;
return Error::success();
}
static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex) {
if (Load.C.cmdsize < sizeof(MachO::linker_option_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_LINKER_OPTION cmdsize too small");
auto LinkOptionOrErr =
getStructOrErr<MachO::linker_option_command>(Obj, Load.Ptr);
if (!LinkOptionOrErr)
return LinkOptionOrErr.takeError();
MachO::linker_option_command L = LinkOptionOrErr.get();
// Make sure the count of strings is correct.
const char *string = (const char *)Load.Ptr +
sizeof(struct MachO::linker_option_command);
uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command);
uint32_t i = 0;
while (left > 0) {
while (*string == '\0' && left > 0) {
string++;
left--;
}
if (left > 0) {
i++;
uint32_t NullPos = StringRef(string, left).find('\0');
if (0xffffffff == NullPos)
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_LINKER_OPTION string #" + Twine(i) +
" is not NULL terminated");
uint32_t len = std::min(NullPos, left) + 1;
string += len;
left -= len;
}
}
if (L.count != i)
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_LINKER_OPTION string count " + Twine(L.count) +
" does not match number of strings");
return Error::success();
}
static Error checkSubCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex, const char *CmdName,
size_t SizeOfCmd, const char *CmdStructName,
uint32_t PathOffset, const char *PathFieldName) {
if (PathOffset < SizeOfCmd)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " " + PathFieldName + ".offset field too "
"small, not past the end of the " + CmdStructName);
if (PathOffset >= Load.C.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " " + PathFieldName + ".offset field "
"extends past the end of the load command");
// Make sure there is a null between the starting offset of the path and
// the end of the load command.
uint32_t i;
const char *P = (const char *)Load.Ptr;
for (i = PathOffset; i < Load.C.cmdsize; i++)
if (P[i] == '\0')
break;
if (i >= Load.C.cmdsize)
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " " + PathFieldName + " name extends past "
"the end of the load command");
return Error::success();
}
static Error checkThreadCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex,
const char *CmdName) {
if (Load.C.cmdsize < sizeof(MachO::thread_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
CmdName + " cmdsize too small");
auto ThreadCommandOrErr =
getStructOrErr<MachO::thread_command>(Obj, Load.Ptr);
if (!ThreadCommandOrErr)
return ThreadCommandOrErr.takeError();
MachO::thread_command T = ThreadCommandOrErr.get();
const char *state = Load.Ptr + sizeof(MachO::thread_command);
const char *end = Load.Ptr + T.cmdsize;
uint32_t nflavor = 0;
uint32_t cputype = getCPUType(Obj);
while (state < end) {
if(state + sizeof(uint32_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
"flavor in " + CmdName + " extends past end of "
"command");
uint32_t flavor;
memcpy(&flavor, state, sizeof(uint32_t));
if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(flavor);
state += sizeof(uint32_t);
if(state + sizeof(uint32_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count in " + CmdName + " extends past end of "
"command");
uint32_t count;
memcpy(&count, state, sizeof(uint32_t));
if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(count);
state += sizeof(uint32_t);
if (cputype == MachO::CPU_TYPE_I386) {
if (flavor == MachO::x86_THREAD_STATE32) {
if (count != MachO::x86_THREAD_STATE32_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_THREAD_STATE32_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_THREAD_STATE32 flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_thread_state32_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_THREAD_STATE32 extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_thread_state32_t);
} else {
return malformedError("load command " + Twine(LoadCommandIndex) +
" unknown flavor (" + Twine(flavor) + ") for "
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
} else if (cputype == MachO::CPU_TYPE_X86_64) {
if (flavor == MachO::x86_THREAD_STATE) {
if (count != MachO::x86_THREAD_STATE_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_THREAD_STATE_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_THREAD_STATE flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_thread_state_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_THREAD_STATE extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_thread_state_t);
} else if (flavor == MachO::x86_FLOAT_STATE) {
if (count != MachO::x86_FLOAT_STATE_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_FLOAT_STATE_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_FLOAT_STATE flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_float_state_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_FLOAT_STATE extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_float_state_t);
} else if (flavor == MachO::x86_EXCEPTION_STATE) {
if (count != MachO::x86_EXCEPTION_STATE_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_EXCEPTION_STATE_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_EXCEPTION_STATE flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_exception_state_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_EXCEPTION_STATE extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_exception_state_t);
} else if (flavor == MachO::x86_THREAD_STATE64) {
if (count != MachO::x86_THREAD_STATE64_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_THREAD_STATE64_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_THREAD_STATE64 flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_thread_state64_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_THREAD_STATE64 extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_thread_state64_t);
} else if (flavor == MachO::x86_EXCEPTION_STATE64) {
if (count != MachO::x86_EXCEPTION_STATE64_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not x86_EXCEPTION_STATE64_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a x86_EXCEPTION_STATE64 flavor in " + CmdName +
" command");
if (state + sizeof(MachO::x86_exception_state64_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" x86_EXCEPTION_STATE64 extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::x86_exception_state64_t);
} else {
return malformedError("load command " + Twine(LoadCommandIndex) +
" unknown flavor (" + Twine(flavor) + ") for "
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
} else if (cputype == MachO::CPU_TYPE_ARM) {
if (flavor == MachO::ARM_THREAD_STATE) {
if (count != MachO::ARM_THREAD_STATE_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not ARM_THREAD_STATE_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a ARM_THREAD_STATE flavor in " + CmdName +
" command");
if (state + sizeof(MachO::arm_thread_state32_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" ARM_THREAD_STATE extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::arm_thread_state32_t);
} else {
return malformedError("load command " + Twine(LoadCommandIndex) +
" unknown flavor (" + Twine(flavor) + ") for "
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
} else if (cputype == MachO::CPU_TYPE_ARM64 ||
cputype == MachO::CPU_TYPE_ARM64_32) {
if (flavor == MachO::ARM_THREAD_STATE64) {
if (count != MachO::ARM_THREAD_STATE64_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not ARM_THREAD_STATE64_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a ARM_THREAD_STATE64 flavor in " + CmdName +
" command");
if (state + sizeof(MachO::arm_thread_state64_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" ARM_THREAD_STATE64 extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::arm_thread_state64_t);
} else {
return malformedError("load command " + Twine(LoadCommandIndex) +
" unknown flavor (" + Twine(flavor) + ") for "
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
} else if (cputype == MachO::CPU_TYPE_POWERPC) {
if (flavor == MachO::PPC_THREAD_STATE) {
if (count != MachO::PPC_THREAD_STATE_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
" count not PPC_THREAD_STATE_COUNT for "
"flavor number " + Twine(nflavor) + " which is "
"a PPC_THREAD_STATE flavor in " + CmdName +
" command");
if (state + sizeof(MachO::ppc_thread_state32_t) > end)
return malformedError("load command " + Twine(LoadCommandIndex) +
" PPC_THREAD_STATE extends past end of "
"command in " + CmdName + " command");
state += sizeof(MachO::ppc_thread_state32_t);
} else {
return malformedError("load command " + Twine(LoadCommandIndex) +
" unknown flavor (" + Twine(flavor) + ") for "
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
} else {
return malformedError("unknown cputype (" + Twine(cputype) + ") load "
"command " + Twine(LoadCommandIndex) + " for " +
CmdName + " command can't be checked");
}
nflavor++;
}
return Error::success();
}
static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo
&Load,
uint32_t LoadCommandIndex,
const char **LoadCmd,
std::list<MachOElement> &Elements) {
if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_TWOLEVEL_HINTS has incorrect cmdsize");
if (*LoadCmd != nullptr)
return malformedError("more than one LC_TWOLEVEL_HINTS command");
auto HintsOrErr = getStructOrErr<MachO::twolevel_hints_command>(Obj, Load.Ptr);
if(!HintsOrErr)
return HintsOrErr.takeError();
MachO::twolevel_hints_command Hints = HintsOrErr.get();
uint64_t FileSize = Obj.getData().size();
if (Hints.offset > FileSize)
return malformedError("offset field of LC_TWOLEVEL_HINTS command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
uint64_t BigSize = Hints.nhints;
BigSize *= sizeof(MachO::twolevel_hint);
BigSize += Hints.offset;
if (BigSize > FileSize)
return malformedError("offset field plus nhints times sizeof(struct "
"twolevel_hint) field of LC_TWOLEVEL_HINTS command " +
Twine(LoadCommandIndex) + " extends past the end of "
"the file");
if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints *
sizeof(MachO::twolevel_hint),
"two level hints"))
return Err;
*LoadCmd = Load.Ptr;
return Error::success();
}
// Returns true if the libObject code does not support the load command and its
// contents. The cmd value it is treated as an unknown load command but with
// an error message that says the cmd value is obsolete.
static bool isLoadCommandObsolete(uint32_t cmd) {
if (cmd == MachO::LC_SYMSEG ||
cmd == MachO::LC_LOADFVMLIB ||
cmd == MachO::LC_IDFVMLIB ||
cmd == MachO::LC_IDENT ||
cmd == MachO::LC_FVMFILE ||
cmd == MachO::LC_PREPAGE ||
cmd == MachO::LC_PREBOUND_DYLIB ||
cmd == MachO::LC_TWOLEVEL_HINTS ||
cmd == MachO::LC_PREBIND_CKSUM)
return true;
return false;
}
Expected<std::unique_ptr<MachOObjectFile>>
MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64Bits, uint32_t UniversalCputype,
uint32_t UniversalIndex) {
Error Err = Error::success();
std::unique_ptr<MachOObjectFile> Obj(
new MachOObjectFile(std::move(Object), IsLittleEndian,
Is64Bits, Err, UniversalCputype,
UniversalIndex));
if (Err)
return Err;
return Obj;
}
MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64bits, Error &Err,
uint32_t UniversalCputype,
uint32_t UniversalIndex)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) {
ErrorAsOutParameter ErrAsOutParam(&Err);
uint64_t SizeOfHeaders;
uint32_t cputype;
Start to add real error messages for malformed Mach-O files. And update the existing test cases in test/Object/macho-invalid.test to use llvm-objdump with the -macho option to produce these error messages and stop producing the generic "Invalid data was encountered while parsing the file" message. Working from the beginning of the file, if the mach header is too large for the size of the file and then if the load commands that follow extend past the end of the file these two errors now generate correct error messages. Both of these have existing test cases in test/Object/macho-invalid.test . But the first with macho-invalid-header it will never trigger the error message "mach header extends past the end of the file" using any of the llvm tools as they all use identify_magic() which rejects files with the correct magic number that are too small in size. So I tested this by hacking that code and seeing the error message down in parseHeader() really does happen. So in case there is ever code in llvm that directly calls createMachOObjectFile() this error message will be correctly produced. The second error message of "load commands extends past the end of the file" is triggered by a number of existing tests cases in test/Object/macho-invalid.test . Also other tests trigger different error messages now like "ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table". There are two existing test cases that still get the "Invalid data was encountered ..." error messages that I will tackle next. But they will involve a bit of pluming an Expect<...> up through the call stack and I want to do those as separate changes. FYI, for those test cases that were trying to test specific errors that now get different errors I’ll fix those in follow on changes and create new test cases for those so they test the error they were meant to test. llvm-svn: 266248
2016-04-14 05:17:58 +08:00
if (is64Bit()) {
parseHeader(*this, Header64, Err);
SizeOfHeaders = sizeof(MachO::mach_header_64);
cputype = Header64.cputype;
Start to add real error messages for malformed Mach-O files. And update the existing test cases in test/Object/macho-invalid.test to use llvm-objdump with the -macho option to produce these error messages and stop producing the generic "Invalid data was encountered while parsing the file" message. Working from the beginning of the file, if the mach header is too large for the size of the file and then if the load commands that follow extend past the end of the file these two errors now generate correct error messages. Both of these have existing test cases in test/Object/macho-invalid.test . But the first with macho-invalid-header it will never trigger the error message "mach header extends past the end of the file" using any of the llvm tools as they all use identify_magic() which rejects files with the correct magic number that are too small in size. So I tested this by hacking that code and seeing the error message down in parseHeader() really does happen. So in case there is ever code in llvm that directly calls createMachOObjectFile() this error message will be correctly produced. The second error message of "load commands extends past the end of the file" is triggered by a number of existing tests cases in test/Object/macho-invalid.test . Also other tests trigger different error messages now like "ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table". There are two existing test cases that still get the "Invalid data was encountered ..." error messages that I will tackle next. But they will involve a bit of pluming an Expect<...> up through the call stack and I want to do those as separate changes. FYI, for those test cases that were trying to test specific errors that now get different errors I’ll fix those in follow on changes and create new test cases for those so they test the error they were meant to test. llvm-svn: 266248
2016-04-14 05:17:58 +08:00
} else {
parseHeader(*this, Header, Err);
SizeOfHeaders = sizeof(MachO::mach_header);
cputype = Header.cputype;
Start to add real error messages for malformed Mach-O files. And update the existing test cases in test/Object/macho-invalid.test to use llvm-objdump with the -macho option to produce these error messages and stop producing the generic "Invalid data was encountered while parsing the file" message. Working from the beginning of the file, if the mach header is too large for the size of the file and then if the load commands that follow extend past the end of the file these two errors now generate correct error messages. Both of these have existing test cases in test/Object/macho-invalid.test . But the first with macho-invalid-header it will never trigger the error message "mach header extends past the end of the file" using any of the llvm tools as they all use identify_magic() which rejects files with the correct magic number that are too small in size. So I tested this by hacking that code and seeing the error message down in parseHeader() really does happen. So in case there is ever code in llvm that directly calls createMachOObjectFile() this error message will be correctly produced. The second error message of "load commands extends past the end of the file" is triggered by a number of existing tests cases in test/Object/macho-invalid.test . Also other tests trigger different error messages now like "ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table". There are two existing test cases that still get the "Invalid data was encountered ..." error messages that I will tackle next. But they will involve a bit of pluming an Expect<...> up through the call stack and I want to do those as separate changes. FYI, for those test cases that were trying to test specific errors that now get different errors I’ll fix those in follow on changes and create new test cases for those so they test the error they were meant to test. llvm-svn: 266248
2016-04-14 05:17:58 +08:00
}
if (Err)
return;
SizeOfHeaders += getHeader().sizeofcmds;
if (getData().data() + SizeOfHeaders > getData().end()) {
Err = malformedError("load commands extend past the end of the file");
Start to add real error messages for malformed Mach-O files. And update the existing test cases in test/Object/macho-invalid.test to use llvm-objdump with the -macho option to produce these error messages and stop producing the generic "Invalid data was encountered while parsing the file" message. Working from the beginning of the file, if the mach header is too large for the size of the file and then if the load commands that follow extend past the end of the file these two errors now generate correct error messages. Both of these have existing test cases in test/Object/macho-invalid.test . But the first with macho-invalid-header it will never trigger the error message "mach header extends past the end of the file" using any of the llvm tools as they all use identify_magic() which rejects files with the correct magic number that are too small in size. So I tested this by hacking that code and seeing the error message down in parseHeader() really does happen. So in case there is ever code in llvm that directly calls createMachOObjectFile() this error message will be correctly produced. The second error message of "load commands extends past the end of the file" is triggered by a number of existing tests cases in test/Object/macho-invalid.test . Also other tests trigger different error messages now like "ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table". There are two existing test cases that still get the "Invalid data was encountered ..." error messages that I will tackle next. But they will involve a bit of pluming an Expect<...> up through the call stack and I want to do those as separate changes. FYI, for those test cases that were trying to test specific errors that now get different errors I’ll fix those in follow on changes and create new test cases for those so they test the error they were meant to test. llvm-svn: 266248
2016-04-14 05:17:58 +08:00
return;
}
if (UniversalCputype != 0 && cputype != UniversalCputype) {
Err = malformedError("universal header architecture: " +
Twine(UniversalIndex) + "'s cputype does not match "
"object file's mach header");
return;
}
std::list<MachOElement> Elements;
Elements.push_back({0, SizeOfHeaders, "Mach-O headers"});
uint32_t LoadCommandCount = getHeader().ncmds;
LoadCommandInfo Load;
if (LoadCommandCount != 0) {
if (auto LoadOrErr = getFirstLoadCommandInfo(*this))
Load = *LoadOrErr;
else {
Err = LoadOrErr.takeError();
return;
}
}
const char *DyldIdLoadCmd = nullptr;
const char *FuncStartsLoadCmd = nullptr;
const char *SplitInfoLoadCmd = nullptr;
const char *CodeSignDrsLoadCmd = nullptr;
const char *CodeSignLoadCmd = nullptr;
const char *VersLoadCmd = nullptr;
const char *SourceLoadCmd = nullptr;
const char *EntryPointLoadCmd = nullptr;
const char *EncryptLoadCmd = nullptr;
const char *RoutinesLoadCmd = nullptr;
const char *UnixThreadLoadCmd = nullptr;
const char *TwoLevelHintsLoadCmd = nullptr;
for (unsigned I = 0; I < LoadCommandCount; ++I) {
if (is64Bit()) {
if (Load.C.cmdsize % 8 != 0) {
// We have a hack here to allow 64-bit Mach-O core files to have
// LC_THREAD commands that are only a multiple of 4 and not 8 to be
// allowed since the macOS kernel produces them.
if (getHeader().filetype != MachO::MH_CORE ||
Load.C.cmd != MachO::LC_THREAD || Load.C.cmdsize % 4) {
Err = malformedError("load command " + Twine(I) + " cmdsize not a "
"multiple of 8");
return;
}
}
} else {
if (Load.C.cmdsize % 4 != 0) {
Err = malformedError("load command " + Twine(I) + " cmdsize not a "
"multiple of 4");
return;
}
}
LoadCommands.push_back(Load);
if (Load.C.cmd == MachO::LC_SYMTAB) {
if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_DYSYMTAB) {
if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd,
Elements)))
return;
} else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd,
"LC_DATA_IN_CODE", Elements,
"data in code info")))
return;
} else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd,
"LC_LINKER_OPTIMIZATION_HINT",
Elements, "linker optimization "
"hints")))
return;
} else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd,
"LC_FUNCTION_STARTS", Elements,
"function starts data")))
return;
} else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd,
"LC_SEGMENT_SPLIT_INFO", Elements,
"split info data")))
return;
} else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd,
"LC_DYLIB_CODE_SIGN_DRS", Elements,
"code signing RDs data")))
return;
} else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) {
if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd,
"LC_CODE_SIGNATURE", Elements,
"code signature data")))
return;
} else if (Load.C.cmd == MachO::LC_DYLD_INFO) {
if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
"LC_DYLD_INFO", Elements)))
return;
} else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
"LC_DYLD_INFO_ONLY", Elements)))
return;
} else if (Load.C.cmd == MachO::LC_UUID) {
if (Load.C.cmdsize != sizeof(MachO::uuid_command)) {
Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect "
"cmdsize");
return;
}
if (UuidLoadCmd) {
Err = malformedError("more than one LC_UUID command");
return;
}
UuidLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_SEGMENT_64) {
if ((Err = parseSegmentLoadCommand<MachO::segment_command_64,
MachO::section_64>(
*this, Load, Sections, HasPageZeroSegment, I,
"LC_SEGMENT_64", SizeOfHeaders, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_SEGMENT) {
if ((Err = parseSegmentLoadCommand<MachO::segment_command,
MachO::section>(
*this, Load, Sections, HasPageZeroSegment, I,
"LC_SEGMENT", SizeOfHeaders, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_ID_DYLIB) {
if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd)))
return;
} else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) {
if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB")))
return;
Libraries.push_back(Load.Ptr);
} else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) {
if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB")))
return;
Libraries.push_back(Load.Ptr);
} else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) {
if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB")))
return;
Libraries.push_back(Load.Ptr);
} else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) {
if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB")))
return;
Libraries.push_back(Load.Ptr);
} else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB")))
return;
Libraries.push_back(Load.Ptr);
} else if (Load.C.cmd == MachO::LC_ID_DYLINKER) {
if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER")))
return;
} else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) {
if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER")))
return;
} else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) {
if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT")))
return;
} else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) {
if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
"LC_VERSION_MIN_MACOSX")))
return;
} else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) {
if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
"LC_VERSION_MIN_IPHONEOS")))
return;
} else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) {
if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
"LC_VERSION_MIN_TVOS")))
return;
} else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) {
if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
"LC_VERSION_MIN_WATCHOS")))
return;
} else if (Load.C.cmd == MachO::LC_NOTE) {
if ((Err = checkNoteCommand(*this, Load, I, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_BUILD_VERSION) {
if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I)))
return;
} else if (Load.C.cmd == MachO::LC_RPATH) {
if ((Err = checkRpathCommand(*this, Load, I)))
return;
} else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) {
if (Load.C.cmdsize != sizeof(MachO::source_version_command)) {
Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) +
" has incorrect cmdsize");
return;
}
if (SourceLoadCmd) {
Err = malformedError("more than one LC_SOURCE_VERSION command");
return;
}
SourceLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_MAIN) {
if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) {
Err = malformedError("LC_MAIN command " + Twine(I) +
" has incorrect cmdsize");
return;
}
if (EntryPointLoadCmd) {
Err = malformedError("more than one LC_MAIN command");
return;
}
EntryPointLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) {
if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) {
Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) +
" has incorrect cmdsize");
return;
}
MachO::encryption_info_command E =
getStruct<MachO::encryption_info_command>(*this, Load.Ptr);
if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize,
&EncryptLoadCmd, "LC_ENCRYPTION_INFO")))
return;
} else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) {
if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) {
Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) +
" has incorrect cmdsize");
return;
}
MachO::encryption_info_command_64 E =
getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr);
if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize,
&EncryptLoadCmd, "LC_ENCRYPTION_INFO_64")))
return;
} else if (Load.C.cmd == MachO::LC_LINKER_OPTION) {
if ((Err = checkLinkerOptCommand(*this, Load, I)))
return;
} else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) {
if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) {
Err = malformedError("load command " + Twine(I) +
" LC_SUB_FRAMEWORK cmdsize too small");
return;
}
MachO::sub_framework_command S =
getStruct<MachO::sub_framework_command>(*this, Load.Ptr);
if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK",
sizeof(MachO::sub_framework_command),
"sub_framework_command", S.umbrella,
"umbrella")))
return;
} else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) {
if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) {
Err = malformedError("load command " + Twine(I) +
" LC_SUB_UMBRELLA cmdsize too small");
return;
}
MachO::sub_umbrella_command S =
getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr);
if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA",
sizeof(MachO::sub_umbrella_command),
"sub_umbrella_command", S.sub_umbrella,
"sub_umbrella")))
return;
} else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) {
if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) {
Err = malformedError("load command " + Twine(I) +
" LC_SUB_LIBRARY cmdsize too small");
return;
}
MachO::sub_library_command S =
getStruct<MachO::sub_library_command>(*this, Load.Ptr);
if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY",
sizeof(MachO::sub_library_command),
"sub_library_command", S.sub_library,
"sub_library")))
return;
} else if (Load.C.cmd == MachO::LC_SUB_CLIENT) {
if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) {
Err = malformedError("load command " + Twine(I) +
" LC_SUB_CLIENT cmdsize too small");
return;
}
MachO::sub_client_command S =
getStruct<MachO::sub_client_command>(*this, Load.Ptr);
if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT",
sizeof(MachO::sub_client_command),
"sub_client_command", S.client, "client")))
return;
} else if (Load.C.cmd == MachO::LC_ROUTINES) {
if (Load.C.cmdsize != sizeof(MachO::routines_command)) {
Err = malformedError("LC_ROUTINES command " + Twine(I) +
" has incorrect cmdsize");
return;
}
if (RoutinesLoadCmd) {
Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 "
"command");
return;
}
RoutinesLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_ROUTINES_64) {
if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) {
Err = malformedError("LC_ROUTINES_64 command " + Twine(I) +
" has incorrect cmdsize");
return;
}
if (RoutinesLoadCmd) {
Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES "
"command");
return;
}
RoutinesLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_UNIXTHREAD) {
if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD")))
return;
if (UnixThreadLoadCmd) {
Err = malformedError("more than one LC_UNIXTHREAD command");
return;
}
UnixThreadLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_THREAD) {
if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD")))
return;
// Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported.
} else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) {
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
} else if (isLoadCommandObsolete(Load.C.cmd)) {
Err = malformedError("load command " + Twine(I) + " for cmd value of: " +
Twine(Load.C.cmd) + " is obsolete and not "
"supported");
return;
}
// TODO: generate a error for unknown load commands by default. But still
// need work out an approach to allow or not allow unknown values like this
// as an option for some uses like lldb.
if (I < LoadCommandCount - 1) {
if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load))
Load = *LoadOrErr;
else {
Err = LoadOrErr.takeError();
return;
}
}
}
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
if (!SymtabLoadCmd) {
if (DysymtabLoadCmd) {
Err = malformedError("contains LC_DYSYMTAB load command without a "
"LC_SYMTAB load command");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
} else if (DysymtabLoadCmd) {
MachO::symtab_command Symtab =
getStruct<MachO::symtab_command>(*this, SymtabLoadCmd);
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
MachO::dysymtab_command Dysymtab =
getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd);
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) {
Err = malformedError("ilocalsym in LC_DYSYMTAB load command "
"extends past the end of the symbol table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
uint64_t BigSize = Dysymtab.ilocalsym;
BigSize += Dysymtab.nlocalsym;
if (Dysymtab.nlocalsym != 0 && BigSize > Symtab.nsyms) {
Err = malformedError("ilocalsym plus nlocalsym in LC_DYSYMTAB load "
"command extends past the end of the symbol table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
if (Dysymtab.nextdefsym != 0 && Dysymtab.iextdefsym > Symtab.nsyms) {
Err = malformedError("iextdefsym in LC_DYSYMTAB load command "
"extends past the end of the symbol table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
BigSize = Dysymtab.iextdefsym;
BigSize += Dysymtab.nextdefsym;
if (Dysymtab.nextdefsym != 0 && BigSize > Symtab.nsyms) {
Err = malformedError("iextdefsym plus nextdefsym in LC_DYSYMTAB "
"load command extends past the end of the symbol "
"table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) {
Err = malformedError("iundefsym in LC_DYSYMTAB load command "
"extends past the end of the symbol table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
BigSize = Dysymtab.iundefsym;
BigSize += Dysymtab.nundefsym;
if (Dysymtab.nundefsym != 0 && BigSize > Symtab.nsyms) {
Err = malformedError("iundefsym plus nundefsym in LC_DYSYMTAB load "
" command extends past the end of the symbol table");
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
return;
}
}
if ((getHeader().filetype == MachO::MH_DYLIB ||
getHeader().filetype == MachO::MH_DYLIB_STUB) &&
DyldIdLoadCmd == nullptr) {
Err = malformedError("no LC_ID_DYLIB load command in dynamic library "
"filetype");
return;
}
assert(LoadCommands.size() == LoadCommandCount);
Err = Error::success();
}
Add a checkSymbolTable() method to the MachOObjectFile class. The philosophy of the error checking in libObject for Mach-O files is that the constructor will check the load commands so for their tables the offsets and sizes are properly contained in the file. But there is no checking of the entries of any of the tables. For the contents of the tables themselves the methods accessing the contents of the entries return errors as needed. In some cases this however makes it difficult or cumbersome to produce a good error message which would include the tool name, file name, archive member, and name of the architecture of a slice of a universal file the error occurred in. So idea is that there will be a method to check a table which can be called up front before using it allowing a good error message to be produced before a table is used. And if only verification of the Mach-O file and its tables are wanted a new possible method checkAllTables() could be added to call all of the methods to check all the tables at some time when such methods exist. The checkSymbolTable() is the first of such methods to check one of the Mach-O file tables. This method initially will used in llvm-objdump’s DisassembleMachO() routine before it gets the section and symbol information. As if there are problems with the symbol table currently the error is first encountered by the bool operator() in the SymbolSorter() struct which passed to std::sort(). In this case there is no context as to the file name the symbol which results a poor error message: LLVM ERROR: truncated or malformed object (bad string index: 22 for symbol at index 1) with the added call to the checkSymbolTable() method the error message includes the tool name and file name: llvm-objdump: 'macho-invalid-symbol-strx': truncated or malformed object (bad string table index: 22 past the end of string table, for symbol at index 1) llvm-svn: 286887
2016-11-15 04:57:04 +08:00
Error MachOObjectFile::checkSymbolTable() const {
uint32_t Flags = 0;
if (is64Bit()) {
MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64();
Flags = H_64.flags;
} else {
MachO::mach_header H = MachOObjectFile::getHeader();
Flags = H.flags;
}
uint8_t NType = 0;
uint8_t NSect = 0;
uint16_t NDesc = 0;
uint32_t NStrx = 0;
uint64_t NValue = 0;
uint32_t SymbolIndex = 0;
MachO::symtab_command S = getSymtabLoadCommand();
for (const SymbolRef &Symbol : symbols()) {
DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
if (is64Bit()) {
MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI);
NType = STE_64.n_type;
NSect = STE_64.n_sect;
NDesc = STE_64.n_desc;
NStrx = STE_64.n_strx;
NValue = STE_64.n_value;
} else {
MachO::nlist STE = getSymbolTableEntry(SymDRI);
NType = STE.n_type;
NSect = STE.n_sect;
NDesc = STE.n_desc;
NStrx = STE.n_strx;
NValue = STE.n_value;
}
if ((NType & MachO::N_STAB) == 0) {
if ((NType & MachO::N_TYPE) == MachO::N_SECT) {
if (NSect == 0 || NSect > Sections.size())
return malformedError("bad section index: " + Twine((int)NSect) +
" for symbol at index " + Twine(SymbolIndex));
}
if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
if (NValue >= S.strsize)
return malformedError("bad n_value: " + Twine((int)NValue) + " past "
"the end of string table, for N_INDR symbol at "
"index " + Twine(SymbolIndex));
Add a checkSymbolTable() method to the MachOObjectFile class. The philosophy of the error checking in libObject for Mach-O files is that the constructor will check the load commands so for their tables the offsets and sizes are properly contained in the file. But there is no checking of the entries of any of the tables. For the contents of the tables themselves the methods accessing the contents of the entries return errors as needed. In some cases this however makes it difficult or cumbersome to produce a good error message which would include the tool name, file name, archive member, and name of the architecture of a slice of a universal file the error occurred in. So idea is that there will be a method to check a table which can be called up front before using it allowing a good error message to be produced before a table is used. And if only verification of the Mach-O file and its tables are wanted a new possible method checkAllTables() could be added to call all of the methods to check all the tables at some time when such methods exist. The checkSymbolTable() is the first of such methods to check one of the Mach-O file tables. This method initially will used in llvm-objdump’s DisassembleMachO() routine before it gets the section and symbol information. As if there are problems with the symbol table currently the error is first encountered by the bool operator() in the SymbolSorter() struct which passed to std::sort(). In this case there is no context as to the file name the symbol which results a poor error message: LLVM ERROR: truncated or malformed object (bad string index: 22 for symbol at index 1) with the added call to the checkSymbolTable() method the error message includes the tool name and file name: llvm-objdump: 'macho-invalid-symbol-strx': truncated or malformed object (bad string table index: 22 past the end of string table, for symbol at index 1) llvm-svn: 286887
2016-11-15 04:57:04 +08:00
}
if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
(((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
(NType & MachO::N_TYPE) == MachO::N_PBUD)) {
uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
if (LibraryOrdinal != 0 &&
LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
LibraryOrdinal - 1 >= Libraries.size() ) {
return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
" for symbol at index " + Twine(SymbolIndex));
}
}
Add a checkSymbolTable() method to the MachOObjectFile class. The philosophy of the error checking in libObject for Mach-O files is that the constructor will check the load commands so for their tables the offsets and sizes are properly contained in the file. But there is no checking of the entries of any of the tables. For the contents of the tables themselves the methods accessing the contents of the entries return errors as needed. In some cases this however makes it difficult or cumbersome to produce a good error message which would include the tool name, file name, archive member, and name of the architecture of a slice of a universal file the error occurred in. So idea is that there will be a method to check a table which can be called up front before using it allowing a good error message to be produced before a table is used. And if only verification of the Mach-O file and its tables are wanted a new possible method checkAllTables() could be added to call all of the methods to check all the tables at some time when such methods exist. The checkSymbolTable() is the first of such methods to check one of the Mach-O file tables. This method initially will used in llvm-objdump’s DisassembleMachO() routine before it gets the section and symbol information. As if there are problems with the symbol table currently the error is first encountered by the bool operator() in the SymbolSorter() struct which passed to std::sort(). In this case there is no context as to the file name the symbol which results a poor error message: LLVM ERROR: truncated or malformed object (bad string index: 22 for symbol at index 1) with the added call to the checkSymbolTable() method the error message includes the tool name and file name: llvm-objdump: 'macho-invalid-symbol-strx': truncated or malformed object (bad string table index: 22 past the end of string table, for symbol at index 1) llvm-svn: 286887
2016-11-15 04:57:04 +08:00
}
if (NStrx >= S.strsize)
return malformedError("bad string table index: " + Twine((int)NStrx) +
" past the end of string table, for symbol at "
"index " + Twine(SymbolIndex));
SymbolIndex++;
}
return Error::success();
}
void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
unsigned SymbolTableEntrySize = is64Bit() ?
sizeof(MachO::nlist_64) :
sizeof(MachO::nlist);
Symb.p += SymbolTableEntrySize;
}
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const {
StringRef StringTable = getStringTableData();
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
if (Entry.n_strx == 0)
// A n_strx value of 0 indicates that no name is associated with a
// particular symbol table entry.
return StringRef();
const char *Start = &StringTable.data()[Entry.n_strx];
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
if (Start < getData().begin() || Start >= getData().end()) {
return malformedError("bad string index: " + Twine(Entry.n_strx) +
" for symbol at index " + Twine(getSymbolIndex(Symb)));
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
}
return StringRef(Start);
}
unsigned MachOObjectFile::getSectionType(SectionRef Sec) const {
DataRefImpl DRI = Sec.getRawDataRefImpl();
uint32_t Flags = getSectionFlags(*this, DRI);
return Flags & MachO::SECTION_TYPE;
}
uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const {
if (is64Bit()) {
MachO::nlist_64 Entry = getSymbol64TableEntry(Sym);
return Entry.n_value;
}
MachO::nlist Entry = getSymbolTableEntry(Sym);
return Entry.n_value;
}
// getIndirectName() returns the name of the alias'ed symbol who's string table
// index is in the n_value field.
std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
StringRef &Res) const {
StringRef StringTable = getStringTableData();
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
return object_error::parse_failed;
uint64_t NValue = getNValue(Symb);
if (NValue >= StringTable.size())
return object_error::parse_failed;
const char *Start = &StringTable.data()[NValue];
Res = StringRef(Start);
return std::error_code();
}
uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const {
return getNValue(Sym);
}
Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const {
return getSymbolValue(Sym);
}
uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
uint32_t flags = getSymbolFlags(DRI);
if (flags & SymbolRef::SF_Common) {
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI);
return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
}
return 0;
}
uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const {
return getNValue(DRI);
}
Expected<SymbolRef::Type>
Fix a crash in running llvm-objdump -t with an invalid Mach-O file already in the test suite. While this is not really an interesting tool and option to run on a Mach-O file to show the symbol table in a generic libObject format it shouldn’t crash. The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187
2016-03-24 04:27:00 +08:00
MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
uint8_t n_type = Entry.n_type;
// If this is a STAB debugging symbol, we can do nothing more.
if (n_type & MachO::N_STAB)
return SymbolRef::ST_Debug;
switch (n_type & MachO::N_TYPE) {
case MachO::N_UNDF :
return SymbolRef::ST_Unknown;
case MachO::N_SECT :
Expected<section_iterator> SecOrError = getSymbolSection(Symb);
Fix a crash in running llvm-objdump -t with an invalid Mach-O file already in the test suite. While this is not really an interesting tool and option to run on a Mach-O file to show the symbol table in a generic libObject format it shouldn’t crash. The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187
2016-03-24 04:27:00 +08:00
if (!SecOrError)
return SecOrError.takeError();
Fix a crash in running llvm-objdump -t with an invalid Mach-O file already in the test suite. While this is not really an interesting tool and option to run on a Mach-O file to show the symbol table in a generic libObject format it shouldn’t crash. The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187
2016-03-24 04:27:00 +08:00
section_iterator Sec = *SecOrError;
if (Sec->isData() || Sec->isBSS())
return SymbolRef::ST_Data;
return SymbolRef::ST_Function;
}
return SymbolRef::ST_Other;
}
uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI);
uint8_t MachOType = Entry.n_type;
uint16_t MachOFlags = Entry.n_desc;
uint32_t Result = SymbolRef::SF_None;
if ((MachOType & MachO::N_TYPE) == MachO::N_INDR)
Result |= SymbolRef::SF_Indirect;
if (MachOType & MachO::N_STAB)
Result |= SymbolRef::SF_FormatSpecific;
if (MachOType & MachO::N_EXT) {
Result |= SymbolRef::SF_Global;
if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
if (getNValue(DRI))
Result |= SymbolRef::SF_Common;
else
Result |= SymbolRef::SF_Undefined;
}
if (!(MachOType & MachO::N_PEXT))
Result |= SymbolRef::SF_Exported;
}
if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
Result |= SymbolRef::SF_Weak;
if (MachOFlags & (MachO::N_ARM_THUMB_DEF))
Result |= SymbolRef::SF_Thumb;
if ((MachOType & MachO::N_TYPE) == MachO::N_ABS)
Result |= SymbolRef::SF_Absolute;
return Result;
}
Expected<section_iterator>
MachOObjectFile::getSymbolSection(DataRefImpl Symb) const {
MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
uint8_t index = Entry.n_sect;
if (index == 0)
return section_end();
DataRefImpl DRI;
DRI.d.a = index - 1;
Fix a crash in running llvm-objdump -t with an invalid Mach-O file already in the test suite. While this is not really an interesting tool and option to run on a Mach-O file to show the symbol table in a generic libObject format it shouldn’t crash. The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187
2016-03-24 04:27:00 +08:00
if (DRI.d.a >= Sections.size()){
return malformedError("bad section index: " + Twine((int)index) +
" for symbol at index " + Twine(getSymbolIndex(Symb)));
Fix a crash in running llvm-objdump -t with an invalid Mach-O file already in the test suite. While this is not really an interesting tool and option to run on a Mach-O file to show the symbol table in a generic libObject format it shouldn’t crash. The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187
2016-03-24 04:27:00 +08:00
}
return section_iterator(SectionRef(DRI, this));
}
unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const {
MachO::nlist_base Entry =
getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl());
return Entry.n_sect - 1;
}
void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
Sec.d.a++;
}
Expected<StringRef> MachOObjectFile::getSectionName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawName(Sec);
return parseSegmentOrSectionName(Raw.data());
}
uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
if (is64Bit())
return getSection64(Sec).addr;
return getSection(Sec).addr;
}
uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
return Sec.d.a;
}
uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
// In the case if a malformed Mach-O file where the section offset is past
// the end of the file or some part of the section size is past the end of
// the file return a size of zero or a size that covers the rest of the file
// but does not extend past the end of the file.
uint32_t SectOffset, SectType;
uint64_t SectSize;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
SectOffset = Sect.offset;
SectSize = Sect.size;
SectType = Sect.flags & MachO::SECTION_TYPE;
} else {
MachO::section Sect = getSection(Sec);
SectOffset = Sect.offset;
SectSize = Sect.size;
SectType = Sect.flags & MachO::SECTION_TYPE;
}
if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL)
return SectSize;
uint64_t FileSize = getData().size();
if (SectOffset > FileSize)
return 0;
if (FileSize - SectOffset < SectSize)
return FileSize - SectOffset;
return SectSize;
}
ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint32_t Offset,
uint64_t Size) const {
return arrayRefFromStringRef(getData().substr(Offset, Size));
}
Expected<ArrayRef<uint8_t>>
MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
uint32_t Offset;
uint64_t Size;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Offset = Sect.offset;
Size = Sect.size;
} else {
MachO::section Sect = getSection(Sec);
Offset = Sect.offset;
Size = Sect.size;
}
return getSectionContents(Offset, Size);
}
uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
uint32_t Align;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Align = Sect.align;
} else {
MachO::section Sect = getSection(Sec);
Align = Sect.align;
}
return uint64_t(1) << Align;
}
Expected<SectionRef> MachOObjectFile::getSection(unsigned SectionIndex) const {
if (SectionIndex < 1 || SectionIndex > Sections.size())
return malformedError("bad section index: " + Twine((int)SectionIndex));
DataRefImpl DRI;
DRI.d.a = SectionIndex - 1;
return SectionRef(DRI, this);
}
Expected<SectionRef> MachOObjectFile::getSection(StringRef SectionName) const {
for (const SectionRef &Section : sections()) {
auto NameOrErr = Section.getName();
if (!NameOrErr)
return NameOrErr.takeError();
if (*NameOrErr == SectionName)
return Section;
}
return errorCodeToError(object_error::parse_failed);
}
bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const {
return false;
}
bool MachOObjectFile::isSectionText(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(*this, Sec);
return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
}
bool MachOObjectFile::isSectionData(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(*this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
!(SectionType == MachO::S_ZEROFILL ||
SectionType == MachO::S_GB_ZEROFILL);
}
bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(*this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
(SectionType == MachO::S_ZEROFILL ||
SectionType == MachO::S_GB_ZEROFILL);
}
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
return Sec.getRawDataRefImpl().d.a;
}
bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(*this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
return SectionType == MachO::S_ZEROFILL ||
SectionType == MachO::S_GB_ZEROFILL;
}
bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const {
StringRef SegmentName = getSectionFinalSegmentName(Sec);
if (Expected<StringRef> NameOrErr = getSectionName(Sec))
return (SegmentName == "__LLVM" && *NameOrErr == "__bitcode");
return false;
}
bool MachOObjectFile::isSectionStripped(DataRefImpl Sec) const {
if (is64Bit())
return getSection64(Sec).offset == 0;
return getSection(Sec).offset == 0;
}
relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const {
DataRefImpl Ret;
Ret.d.a = Sec.d.a;
Ret.d.b = 0;
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator
MachOObjectFile::section_rel_end(DataRefImpl Sec) const {
uint32_t Num;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Num = Sect.nreloc;
} else {
MachO::section Sect = getSection(Sec);
Num = Sect.nreloc;
}
DataRefImpl Ret;
Ret.d.a = Sec.d.a;
Ret.d.b = Num;
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator MachOObjectFile::extrel_begin() const {
DataRefImpl Ret;
// for DYSYMTAB symbols, Ret.d.a == 0 for external relocations
Ret.d.a = 0; // Would normally be a section index.
Ret.d.b = 0; // Index into the external relocations
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator MachOObjectFile::extrel_end() const {
MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand();
DataRefImpl Ret;
// for DYSYMTAB symbols, Ret.d.a == 0 for external relocations
Ret.d.a = 0; // Would normally be a section index.
Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator MachOObjectFile::locrel_begin() const {
DataRefImpl Ret;
// for DYSYMTAB symbols, Ret.d.a == 1 for local relocations
Ret.d.a = 1; // Would normally be a section index.
Ret.d.b = 0; // Index into the local relocations
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator MachOObjectFile::locrel_end() const {
MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand();
DataRefImpl Ret;
// for DYSYMTAB symbols, Ret.d.a == 1 for local relocations
Ret.d.a = 1; // Would normally be a section index.
Ret.d.b = DysymtabLoadCmd.nlocrel; // Index into the local relocations
return relocation_iterator(RelocationRef(Ret, this));
}
void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
++Rel.d.b;
}
uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const {
assert((getHeader().filetype == MachO::MH_OBJECT ||
getHeader().filetype == MachO::MH_KEXT_BUNDLE) &&
"Only implemented for MH_OBJECT && MH_KEXT_BUNDLE");
MachO::any_relocation_info RE = getRelocation(Rel);
return getAnyRelocationAddress(RE);
}
symbol_iterator
MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
MachO::any_relocation_info RE = getRelocation(Rel);
if (isRelocationScattered(RE))
return symbol_end();
uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
bool isExtern = getPlainRelocationExternal(RE);
if (!isExtern)
return symbol_end();
MachO::symtab_command S = getSymtabLoadCommand();
unsigned SymbolTableEntrySize = is64Bit() ?
sizeof(MachO::nlist_64) :
sizeof(MachO::nlist);
uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize;
DataRefImpl Sym;
Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
return symbol_iterator(SymbolRef(Sym, this));
}
section_iterator
MachOObjectFile::getRelocationSection(DataRefImpl Rel) const {
return section_iterator(getAnyRelocationSection(getRelocation(Rel)));
}
uint64_t MachOObjectFile::getRelocationType(DataRefImpl Rel) const {
MachO::any_relocation_info RE = getRelocation(Rel);
return getAnyRelocationType(RE);
}
void MachOObjectFile::getRelocationTypeName(
DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
StringRef res;
uint64_t RType = getRelocationType(Rel);
unsigned Arch = this->getArch();
switch (Arch) {
case Triple::x86: {
static const char *const Table[] = {
"GENERIC_RELOC_VANILLA",
"GENERIC_RELOC_PAIR",
"GENERIC_RELOC_SECTDIFF",
"GENERIC_RELOC_PB_LA_PTR",
"GENERIC_RELOC_LOCAL_SECTDIFF",
"GENERIC_RELOC_TLV" };
if (RType > 5)
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::x86_64: {
static const char *const Table[] = {
"X86_64_RELOC_UNSIGNED",
"X86_64_RELOC_SIGNED",
"X86_64_RELOC_BRANCH",
"X86_64_RELOC_GOT_LOAD",
"X86_64_RELOC_GOT",
"X86_64_RELOC_SUBTRACTOR",
"X86_64_RELOC_SIGNED_1",
"X86_64_RELOC_SIGNED_2",
"X86_64_RELOC_SIGNED_4",
"X86_64_RELOC_TLV" };
if (RType > 9)
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::arm: {
static const char *const Table[] = {
"ARM_RELOC_VANILLA",
"ARM_RELOC_PAIR",
"ARM_RELOC_SECTDIFF",
"ARM_RELOC_LOCAL_SECTDIFF",
"ARM_RELOC_PB_LA_PTR",
"ARM_RELOC_BR24",
"ARM_THUMB_RELOC_BR22",
"ARM_THUMB_32BIT_BRANCH",
"ARM_RELOC_HALF",
"ARM_RELOC_HALF_SECTDIFF" };
if (RType > 9)
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::aarch64:
case Triple::aarch64_32: {
static const char *const Table[] = {
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
"ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21",
"ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT",
"ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
"ARM64_RELOC_ADDEND"
};
if (RType >= array_lengthof(Table))
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::ppc: {
static const char *const Table[] = {
"PPC_RELOC_VANILLA",
"PPC_RELOC_PAIR",
"PPC_RELOC_BR14",
"PPC_RELOC_BR24",
"PPC_RELOC_HI16",
"PPC_RELOC_LO16",
"PPC_RELOC_HA16",
"PPC_RELOC_LO14",
"PPC_RELOC_SECTDIFF",
"PPC_RELOC_PB_LA_PTR",
"PPC_RELOC_HI16_SECTDIFF",
"PPC_RELOC_LO16_SECTDIFF",
"PPC_RELOC_HA16_SECTDIFF",
"PPC_RELOC_JBSR",
"PPC_RELOC_LO14_SECTDIFF",
"PPC_RELOC_LOCAL_SECTDIFF" };
if (RType > 15)
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::UnknownArch:
res = "Unknown";
break;
}
Result.append(res.begin(), res.end());
}
uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
MachO::any_relocation_info RE = getRelocation(Rel);
return getAnyRelocationLength(RE);
}
//
// guessLibraryShortName() is passed a name of a dynamic library and returns a
// guess on what the short name is. Then name is returned as a substring of the
// StringRef Name passed in. The name of the dynamic library is recognized as
// a framework if it has one of the two following forms:
// Foo.framework/Versions/A/Foo
// Foo.framework/Foo
// Where A and Foo can be any string. And may contain a trailing suffix
// starting with an underbar. If the Name is recognized as a framework then
// isFramework is set to true else it is set to false. If the Name has a
// suffix then Suffix is set to the substring in Name that contains the suffix
// else it is set to a NULL StringRef.
//
// The Name of the dynamic library is recognized as a library name if it has
// one of the two following forms:
// libFoo.A.dylib
// libFoo.dylib
Limit dyld image suffixes guessed by guessLibraryShortName() Summary: guessLibraryShortName() separates a full Mach-O dylib install name path into a short name and a dyld image suffix. The short name is the name of the dylib without its path or extension. The dyld image suffix is a string used by dyld to load variants of dylibs if available at runtime; for example, "when binding this process, load 'debug' variants of all required dylibs." dyld knows exactly what the image suffix is, but by convention diagnostic tools such as llvm-nm attempt to guess suffix names by looking at the install name path. These dyld image suffixes are separated from the short name by a '_' character. Because the '_' character is commonly used to separate words in filenames guessLibraryShortName() cannot reliably separate a dylib's short name from an arbitrary image suffix; imagine if both the short name and the suffix contains an '_' character! To better deal with this ambiguity, guessLibraryShortName() will recognize only "_debug" and "_profile" as valid Suffix values. Calling code needs to be tolerant of guessLibraryShortName() guessing incorrectly. The previous implementation of guessLibraryShortName() did not allow '_' characters to appear in short names. When present, the short name would be truncated, e.g., "libcompiler_rt" => "libcompiler". This change allows "libcompiler_rt" and "libcompiler_rt_debug" to both be recognized as "libcompiler_rt". rdar://47412244 Reviewers: kledzik, lhames, pete Reviewed By: pete Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56978 llvm-svn: 352104
2019-01-25 04:59:44 +08:00
//
// The library may have a suffix trailing the name Foo of the form:
// libFoo_profile.A.dylib
// libFoo_profile.dylib
Limit dyld image suffixes guessed by guessLibraryShortName() Summary: guessLibraryShortName() separates a full Mach-O dylib install name path into a short name and a dyld image suffix. The short name is the name of the dylib without its path or extension. The dyld image suffix is a string used by dyld to load variants of dylibs if available at runtime; for example, "when binding this process, load 'debug' variants of all required dylibs." dyld knows exactly what the image suffix is, but by convention diagnostic tools such as llvm-nm attempt to guess suffix names by looking at the install name path. These dyld image suffixes are separated from the short name by a '_' character. Because the '_' character is commonly used to separate words in filenames guessLibraryShortName() cannot reliably separate a dylib's short name from an arbitrary image suffix; imagine if both the short name and the suffix contains an '_' character! To better deal with this ambiguity, guessLibraryShortName() will recognize only "_debug" and "_profile" as valid Suffix values. Calling code needs to be tolerant of guessLibraryShortName() guessing incorrectly. The previous implementation of guessLibraryShortName() did not allow '_' characters to appear in short names. When present, the short name would be truncated, e.g., "libcompiler_rt" => "libcompiler". This change allows "libcompiler_rt" and "libcompiler_rt_debug" to both be recognized as "libcompiler_rt". rdar://47412244 Reviewers: kledzik, lhames, pete Reviewed By: pete Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56978 llvm-svn: 352104
2019-01-25 04:59:44 +08:00
// These dyld image suffixes are separated from the short name by a '_'
// character. Because the '_' character is commonly used to separate words in
// filenames guessLibraryShortName() cannot reliably separate a dylib's short
// name from an arbitrary image suffix; imagine if both the short name and the
// suffix contains an '_' character! To better deal with this ambiguity,
// guessLibraryShortName() will recognize only "_debug" and "_profile" as valid
// Suffix values. Calling code needs to be tolerant of guessLibraryShortName()
// guessing incorrectly.
//
// The Name of the dynamic library is also recognized as a library name if it
// has the following form:
// Foo.qtx
//
// If the Name of the dynamic library is none of the forms above then a NULL
// StringRef is returned.
StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
bool &isFramework,
StringRef &Suffix) {
StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx;
size_t a, b, c, d, Idx;
isFramework = false;
Suffix = StringRef();
// Pull off the last component and make Foo point to it
a = Name.rfind('/');
if (a == Name.npos || a == 0)
goto guess_library;
Foo = Name.slice(a+1, Name.npos);
// Look for a suffix starting with a '_'
Idx = Foo.rfind('_');
if (Idx != Foo.npos && Foo.size() >= 2) {
Suffix = Foo.slice(Idx, Foo.npos);
Limit dyld image suffixes guessed by guessLibraryShortName() Summary: guessLibraryShortName() separates a full Mach-O dylib install name path into a short name and a dyld image suffix. The short name is the name of the dylib without its path or extension. The dyld image suffix is a string used by dyld to load variants of dylibs if available at runtime; for example, "when binding this process, load 'debug' variants of all required dylibs." dyld knows exactly what the image suffix is, but by convention diagnostic tools such as llvm-nm attempt to guess suffix names by looking at the install name path. These dyld image suffixes are separated from the short name by a '_' character. Because the '_' character is commonly used to separate words in filenames guessLibraryShortName() cannot reliably separate a dylib's short name from an arbitrary image suffix; imagine if both the short name and the suffix contains an '_' character! To better deal with this ambiguity, guessLibraryShortName() will recognize only "_debug" and "_profile" as valid Suffix values. Calling code needs to be tolerant of guessLibraryShortName() guessing incorrectly. The previous implementation of guessLibraryShortName() did not allow '_' characters to appear in short names. When present, the short name would be truncated, e.g., "libcompiler_rt" => "libcompiler". This change allows "libcompiler_rt" and "libcompiler_rt_debug" to both be recognized as "libcompiler_rt". rdar://47412244 Reviewers: kledzik, lhames, pete Reviewed By: pete Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56978 llvm-svn: 352104
2019-01-25 04:59:44 +08:00
if (Suffix != "_debug" && Suffix != "_profile")
Suffix = StringRef();
else
Foo = Foo.slice(0, Idx);
}
// First look for the form Foo.framework/Foo
b = Name.rfind('/', a);
if (b == Name.npos)
Idx = 0;
else
Idx = b+1;
F = Name.slice(Idx, Idx + Foo.size());
DotFramework = Name.slice(Idx + Foo.size(),
Idx + Foo.size() + sizeof(".framework/")-1);
if (F == Foo && DotFramework == ".framework/") {
isFramework = true;
return Foo;
}
// Next look for the form Foo.framework/Versions/A/Foo
if (b == Name.npos)
goto guess_library;
c = Name.rfind('/', b);
if (c == Name.npos || c == 0)
goto guess_library;
V = Name.slice(c+1, Name.npos);
if (!V.startswith("Versions/"))
goto guess_library;
d = Name.rfind('/', c);
if (d == Name.npos)
Idx = 0;
else
Idx = d+1;
F = Name.slice(Idx, Idx + Foo.size());
DotFramework = Name.slice(Idx + Foo.size(),
Idx + Foo.size() + sizeof(".framework/")-1);
if (F == Foo && DotFramework == ".framework/") {
isFramework = true;
return Foo;
}
guess_library:
// pull off the suffix after the "." and make a point to it
a = Name.rfind('.');
if (a == Name.npos || a == 0)
return StringRef();
Dylib = Name.slice(a, Name.npos);
if (Dylib != ".dylib")
goto guess_qtx;
// First pull off the version letter for the form Foo.A.dylib if any.
if (a >= 3) {
Dot = Name.slice(a-2, a-1);
if (Dot == ".")
a = a - 2;
}
b = Name.rfind('/', a);
if (b == Name.npos)
b = 0;
else
b = b+1;
// ignore any suffix after an underbar like Foo_profile.A.dylib
Limit dyld image suffixes guessed by guessLibraryShortName() Summary: guessLibraryShortName() separates a full Mach-O dylib install name path into a short name and a dyld image suffix. The short name is the name of the dylib without its path or extension. The dyld image suffix is a string used by dyld to load variants of dylibs if available at runtime; for example, "when binding this process, load 'debug' variants of all required dylibs." dyld knows exactly what the image suffix is, but by convention diagnostic tools such as llvm-nm attempt to guess suffix names by looking at the install name path. These dyld image suffixes are separated from the short name by a '_' character. Because the '_' character is commonly used to separate words in filenames guessLibraryShortName() cannot reliably separate a dylib's short name from an arbitrary image suffix; imagine if both the short name and the suffix contains an '_' character! To better deal with this ambiguity, guessLibraryShortName() will recognize only "_debug" and "_profile" as valid Suffix values. Calling code needs to be tolerant of guessLibraryShortName() guessing incorrectly. The previous implementation of guessLibraryShortName() did not allow '_' characters to appear in short names. When present, the short name would be truncated, e.g., "libcompiler_rt" => "libcompiler". This change allows "libcompiler_rt" and "libcompiler_rt_debug" to both be recognized as "libcompiler_rt". rdar://47412244 Reviewers: kledzik, lhames, pete Reviewed By: pete Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56978 llvm-svn: 352104
2019-01-25 04:59:44 +08:00
Idx = Name.rfind('_');
if (Idx != Name.npos && Idx != b) {
Lib = Name.slice(b, Idx);
Suffix = Name.slice(Idx, a);
Limit dyld image suffixes guessed by guessLibraryShortName() Summary: guessLibraryShortName() separates a full Mach-O dylib install name path into a short name and a dyld image suffix. The short name is the name of the dylib without its path or extension. The dyld image suffix is a string used by dyld to load variants of dylibs if available at runtime; for example, "when binding this process, load 'debug' variants of all required dylibs." dyld knows exactly what the image suffix is, but by convention diagnostic tools such as llvm-nm attempt to guess suffix names by looking at the install name path. These dyld image suffixes are separated from the short name by a '_' character. Because the '_' character is commonly used to separate words in filenames guessLibraryShortName() cannot reliably separate a dylib's short name from an arbitrary image suffix; imagine if both the short name and the suffix contains an '_' character! To better deal with this ambiguity, guessLibraryShortName() will recognize only "_debug" and "_profile" as valid Suffix values. Calling code needs to be tolerant of guessLibraryShortName() guessing incorrectly. The previous implementation of guessLibraryShortName() did not allow '_' characters to appear in short names. When present, the short name would be truncated, e.g., "libcompiler_rt" => "libcompiler". This change allows "libcompiler_rt" and "libcompiler_rt_debug" to both be recognized as "libcompiler_rt". rdar://47412244 Reviewers: kledzik, lhames, pete Reviewed By: pete Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56978 llvm-svn: 352104
2019-01-25 04:59:44 +08:00
if (Suffix != "_debug" && Suffix != "_profile") {
Suffix = StringRef();
Lib = Name.slice(b, a);
}
}
else
Lib = Name.slice(b, a);
// There are incorrect library names of the form:
// libATS.A_profile.dylib so check for these.
if (Lib.size() >= 3) {
Dot = Lib.slice(Lib.size()-2, Lib.size()-1);
if (Dot == ".")
Lib = Lib.slice(0, Lib.size()-2);
}
return Lib;
guess_qtx:
Qtx = Name.slice(a, Name.npos);
if (Qtx != ".qtx")
return StringRef();
b = Name.rfind('/', a);
if (b == Name.npos)
Lib = Name.slice(0, a);
else
Lib = Name.slice(b+1, a);
// There are library names of the form: QT.A.qtx so check for these.
if (Lib.size() >= 3) {
Dot = Lib.slice(Lib.size()-2, Lib.size()-1);
if (Dot == ".")
Lib = Lib.slice(0, Lib.size()-2);
}
return Lib;
}
// getLibraryShortNameByIndex() is used to get the short name of the library
// for an undefined symbol in a linked Mach-O binary that was linked with the
// normal two-level namespace default (that is MH_TWOLEVEL in the header).
// It is passed the index (0 - based) of the library as translated from
// GET_LIBRARY_ORDINAL (1 - based).
std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
StringRef &Res) const {
if (Index >= Libraries.size())
return object_error::parse_failed;
// If the cache of LibrariesShortNames is not built up do that first for
// all the Libraries.
if (LibrariesShortNames.size() == 0) {
for (unsigned i = 0; i < Libraries.size(); i++) {
auto CommandOrErr =
getStructOrErr<MachO::dylib_command>(*this, Libraries[i]);
if (!CommandOrErr)
return object_error::parse_failed;
MachO::dylib_command D = CommandOrErr.get();
if (D.dylib.name >= D.cmdsize)
return object_error::parse_failed;
const char *P = (const char *)(Libraries[i]) + D.dylib.name;
StringRef Name = StringRef(P);
if (D.dylib.name+Name.size() >= D.cmdsize)
return object_error::parse_failed;
StringRef Suffix;
bool isFramework;
StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix);
if (shortName.empty())
LibrariesShortNames.push_back(Name);
else
LibrariesShortNames.push_back(shortName);
}
}
Res = LibrariesShortNames[Index];
return std::error_code();
}
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
uint32_t MachOObjectFile::getLibraryCount() const {
return Libraries.size();
}
Remove getRelocationAddress. Originally added in r139314. Back then it didn't actually get the address, it got whatever value the relocation used: address or offset. The values in different object formats are: * MachO: Always an offset. * COFF: Always an address, but when talking about the virtual address of sections it says: "for simplicity, compilers should set this to zero". * ELF: An offset for .o files and and address for .so files. In the case of the .so, the relocation in not linked to any section (sh_info is 0). We can't really compute an offset. Some API mappings would be: * Use getAddress for everything. It would be quite cumbersome. To compute the address elf has to follow sh_info, which can be corrupted and therefore the method has to return an ErrorOr. The address of the section is also the same for every relocation in a section, so we shouldn't have to check the error and fetch the value for every relocation. * Use a getValue and make it up to the user to know what it is getting. * Use a getOffset and: * Assert for dynamic ELF objects. That is a very peculiar case and it is probably fair to ask any tool that wants to support it to use ELF.h. The only tool we have that reads those (llvm-readobj) already does that. The only other use case I can think of is a dynamic linker. * Check that COFF .obj files have sections with zero virtual address spaces. If it turns out that some assembler/compiler produces these, we can change COFFObjectFile::getRelocationOffset to subtract it. Given COFF format, this can be done without the need for ErrorOr. The getRelocationAddress method was never implemented for COFF. It also had exactly one use in a very peculiar case: a shortcut for adding the section value to a pcrel reloc on MachO. Given that, I don't expect that there is any use out there of the C API. If that is not the case, let me know and I will add it back with the implementation inlined and do a proper deprecation. llvm-svn: 241450
2015-07-06 22:55:37 +08:00
section_iterator
MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const {
DataRefImpl Sec;
Sec.d.a = Rel->getRawDataRefImpl().d.a;
return section_iterator(SectionRef(Sec, this));
}
basic_symbol_iterator MachOObjectFile::symbol_begin() const {
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
DataRefImpl DRI;
MachO::symtab_command Symtab = getSymtabLoadCommand();
if (!SymtabLoadCmd || Symtab.nsyms == 0)
return basic_symbol_iterator(SymbolRef(DRI, this));
return getSymbolByIndex(0);
}
basic_symbol_iterator MachOObjectFile::symbol_end() const {
DataRefImpl DRI;
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
MachO::symtab_command Symtab = getSymtabLoadCommand();
if (!SymtabLoadCmd || Symtab.nsyms == 0)
return basic_symbol_iterator(SymbolRef(DRI, this));
unsigned SymbolTableEntrySize = is64Bit() ?
sizeof(MachO::nlist_64) :
sizeof(MachO::nlist);
unsigned Offset = Symtab.symoff +
Symtab.nsyms * SymbolTableEntrySize;
DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
return basic_symbol_iterator(SymbolRef(DRI, this));
}
symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
MachO::symtab_command Symtab = getSymtabLoadCommand();
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
if (!SymtabLoadCmd || Index >= Symtab.nsyms)
report_fatal_error("Requested symbol index is out of range.");
unsigned SymbolTableEntrySize =
is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. llvm-svn: 258576
2016-01-23 06:49:55 +08:00
DataRefImpl DRI;
DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff));
DRI.p += Index * SymbolTableEntrySize;
return basic_symbol_iterator(SymbolRef(DRI, this));
}
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const {
MachO::symtab_command Symtab = getSymtabLoadCommand();
if (!SymtabLoadCmd)
report_fatal_error("getSymbolIndex() called with no symbol table symbol");
unsigned SymbolTableEntrySize =
is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
DataRefImpl DRIstart;
DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff));
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize;
return Index;
}
section_iterator MachOObjectFile::section_begin() const {
DataRefImpl DRI;
return section_iterator(SectionRef(DRI, this));
}
section_iterator MachOObjectFile::section_end() const {
DataRefImpl DRI;
DRI.d.a = Sections.size();
return section_iterator(SectionRef(DRI, this));
}
uint8_t MachOObjectFile::getBytesInAddress() const {
return is64Bit() ? 8 : 4;
}
StringRef MachOObjectFile::getFileFormatName() const {
unsigned CPUType = getCPUType(*this);
if (!is64Bit()) {
switch (CPUType) {
case MachO::CPU_TYPE_I386:
return "Mach-O 32-bit i386";
case MachO::CPU_TYPE_ARM:
return "Mach-O arm";
case MachO::CPU_TYPE_ARM64_32:
return "Mach-O arm64 (ILP32)";
case MachO::CPU_TYPE_POWERPC:
return "Mach-O 32-bit ppc";
default:
return "Mach-O 32-bit unknown";
}
}
switch (CPUType) {
case MachO::CPU_TYPE_X86_64:
return "Mach-O 64-bit x86-64";
case MachO::CPU_TYPE_ARM64:
return "Mach-O arm64";
case MachO::CPU_TYPE_POWERPC64:
return "Mach-O 64-bit ppc64";
default:
return "Mach-O 64-bit unknown";
}
}
Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
switch (CPUType) {
case MachO::CPU_TYPE_I386:
return Triple::x86;
case MachO::CPU_TYPE_X86_64:
return Triple::x86_64;
case MachO::CPU_TYPE_ARM:
return Triple::arm;
case MachO::CPU_TYPE_ARM64:
return Triple::aarch64;
case MachO::CPU_TYPE_ARM64_32:
return Triple::aarch64_32;
case MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
case MachO::CPU_TYPE_POWERPC64:
return Triple::ppc64;
default:
return Triple::UnknownArch;
}
}
Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
const char **McpuDefault,
const char **ArchFlag) {
if (McpuDefault)
*McpuDefault = nullptr;
if (ArchFlag)
*ArchFlag = nullptr;
switch (CPUType) {
case MachO::CPU_TYPE_I386:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_I386_ALL:
if (ArchFlag)
*ArchFlag = "i386";
return Triple("i386-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_X86_64:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_X86_64_ALL:
if (ArchFlag)
*ArchFlag = "x86_64";
return Triple("x86_64-apple-darwin");
case MachO::CPU_SUBTYPE_X86_64_H:
if (ArchFlag)
*ArchFlag = "x86_64h";
return Triple("x86_64h-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_ARM:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_ARM_V4T:
if (ArchFlag)
*ArchFlag = "armv4t";
return Triple("armv4t-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V5TEJ:
if (ArchFlag)
*ArchFlag = "armv5e";
return Triple("armv5e-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_XSCALE:
if (ArchFlag)
*ArchFlag = "xscale";
return Triple("xscale-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V6:
if (ArchFlag)
*ArchFlag = "armv6";
return Triple("armv6-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V6M:
if (McpuDefault)
*McpuDefault = "cortex-m0";
if (ArchFlag)
*ArchFlag = "armv6m";
return Triple("armv6m-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7:
if (ArchFlag)
*ArchFlag = "armv7";
return Triple("armv7-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7EM:
if (McpuDefault)
*McpuDefault = "cortex-m4";
if (ArchFlag)
*ArchFlag = "armv7em";
return Triple("thumbv7em-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7K:
if (McpuDefault)
*McpuDefault = "cortex-a7";
if (ArchFlag)
*ArchFlag = "armv7k";
return Triple("armv7k-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7M:
if (McpuDefault)
*McpuDefault = "cortex-m3";
if (ArchFlag)
*ArchFlag = "armv7m";
return Triple("thumbv7m-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7S:
if (McpuDefault)
*McpuDefault = "cortex-a7";
if (ArchFlag)
*ArchFlag = "armv7s";
return Triple("armv7s-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_ARM64:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_ARM64_ALL:
if (McpuDefault)
*McpuDefault = "cyclone";
if (ArchFlag)
*ArchFlag = "arm64";
return Triple("arm64-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_ARM64_32:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_ARM64_32_V8:
if (McpuDefault)
*McpuDefault = "cyclone";
if (ArchFlag)
*ArchFlag = "arm64_32";
return Triple("arm64_32-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_POWERPC:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_POWERPC_ALL:
if (ArchFlag)
*ArchFlag = "ppc";
return Triple("ppc-apple-darwin");
default:
return Triple();
}
case MachO::CPU_TYPE_POWERPC64:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_POWERPC_ALL:
if (ArchFlag)
*ArchFlag = "ppc64";
return Triple("ppc64-apple-darwin");
default:
return Triple();
}
default:
return Triple();
}
}
Triple MachOObjectFile::getHostArch() {
return Triple(sys::getDefaultTargetTriple());
}
bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
auto validArchs = getValidArchs();
return llvm::find(validArchs, ArchFlag) != validArchs.end();
}
ArrayRef<StringRef> MachOObjectFile::getValidArchs() {
static const std::array<StringRef, 17> validArchs = {{
"i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e",
"armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m",
"armv7s", "arm64", "arm64_32", "ppc", "ppc64",
}};
return validArchs;
}
Triple::ArchType MachOObjectFile::getArch() const {
return getArch(getCPUType(*this), getCPUSubType(*this));
}
Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const {
return getArchTriple(Header.cputype, Header.cpusubtype, McpuDefault);
}
relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const {
DataRefImpl DRI;
DRI.d.a = Index;
return section_rel_begin(DRI);
}
relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const {
DataRefImpl DRI;
DRI.d.a = Index;
return section_rel_end(DRI);
}
dice_iterator MachOObjectFile::begin_dices() const {
DataRefImpl DRI;
if (!DataInCodeLoadCmd)
return dice_iterator(DiceRef(DRI, this));
MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff));
return dice_iterator(DiceRef(DRI, this));
}
dice_iterator MachOObjectFile::end_dices() const {
DataRefImpl DRI;
if (!DataInCodeLoadCmd)
return dice_iterator(DiceRef(DRI, this));
MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
unsigned Offset = DicLC.dataoff + DicLC.datasize;
DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
return dice_iterator(DiceRef(DRI, this));
}
ExportEntry::ExportEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> T) : E(E), O(O), Trie(T) {}
void ExportEntry::moveToFirst() {
ErrorAsOutParameter ErrAsOutParam(E);
pushNode(0);
if (*E)
return;
pushDownUntilBottom();
}
void ExportEntry::moveToEnd() {
Stack.clear();
Done = true;
}
bool ExportEntry::operator==(const ExportEntry &Other) const {
// Common case, one at end, other iterating from begin.
if (Done || Other.Done)
return (Done == Other.Done);
// Not equal if different stack sizes.
if (Stack.size() != Other.Stack.size())
return false;
// Not equal if different cumulative strings.
if (!CumulativeString.equals(Other.CumulativeString))
return false;
// Equal if all nodes in both stacks match.
for (unsigned i=0; i < Stack.size(); ++i) {
if (Stack[i].Start != Other.Stack[i].Start)
return false;
}
return true;
}
uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr, const char **error) {
2014-09-03 02:50:24 +08:00
unsigned Count;
uint64_t Result = decodeULEB128(Ptr, &Count, Trie.end(), error);
2014-09-03 02:50:24 +08:00
Ptr += Count;
if (Ptr > Trie.end())
2014-09-03 02:50:24 +08:00
Ptr = Trie.end();
return Result;
}
StringRef ExportEntry::name() const {
return CumulativeString;
}
uint64_t ExportEntry::flags() const {
return Stack.back().Flags;
}
uint64_t ExportEntry::address() const {
return Stack.back().Address;
}
uint64_t ExportEntry::other() const {
return Stack.back().Other;
}
StringRef ExportEntry::otherName() const {
const char* ImportName = Stack.back().ImportName;
if (ImportName)
return StringRef(ImportName);
return StringRef();
}
uint32_t ExportEntry::nodeOffset() const {
return Stack.back().Start - Trie.begin();
}
2015-09-22 19:13:55 +08:00
ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
: Start(Ptr), Current(Ptr) {}
void ExportEntry::pushNode(uint64_t offset) {
ErrorAsOutParameter ErrAsOutParam(E);
const uint8_t *Ptr = Trie.begin() + offset;
NodeState State(Ptr);
const char *error;
uint64_t ExportInfoSize = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("export info size " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(offset));
moveToEnd();
return;
}
State.IsExportNode = (ExportInfoSize != 0);
const uint8_t* Children = State.Current + ExportInfoSize;
if (Children > Trie.end()) {
*E = malformedError(
"export info size: 0x" + Twine::utohexstr(ExportInfoSize) +
" in export trie data at node: 0x" + Twine::utohexstr(offset) +
" too big and extends past end of trie data");
moveToEnd();
return;
}
if (State.IsExportNode) {
const uint8_t *ExportStart = State.Current;
State.Flags = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("flags " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(offset));
moveToEnd();
return;
}
uint64_t Kind = State.Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK;
if (State.Flags != 0 &&
(Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR &&
Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE &&
Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL)) {
*E = malformedError(
"unsupported exported symbol kind: " + Twine((int)Kind) +
" in flags: 0x" + Twine::utohexstr(State.Flags) +
" in export trie data at node: 0x" + Twine::utohexstr(offset));
moveToEnd();
return;
}
if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
State.Address = 0;
State.Other = readULEB128(State.Current, &error); // dylib ordinal
if (error) {
*E = malformedError("dylib ordinal of re-export " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(offset));
moveToEnd();
return;
}
if (O != nullptr) {
if (State.Other > O->getLibraryCount()) {
*E = malformedError(
"bad library ordinal: " + Twine((int)State.Other) + " (max " +
Twine((int)O->getLibraryCount()) +
") in export trie data at node: 0x" + Twine::utohexstr(offset));
moveToEnd();
return;
}
}
State.ImportName = reinterpret_cast<const char*>(State.Current);
if (*State.ImportName == '\0') {
State.Current++;
} else {
const uint8_t *End = State.Current + 1;
if (End >= Trie.end()) {
*E = malformedError("import name of re-export in export trie data at "
"node: 0x" +
Twine::utohexstr(offset) +
" starts past end of trie data");
moveToEnd();
return;
}
while(*End != '\0' && End < Trie.end())
End++;
if (*End != '\0') {
*E = malformedError("import name of re-export in export trie data at "
"node: 0x" +
Twine::utohexstr(offset) +
" extends past end of trie data");
moveToEnd();
return;
}
State.Current = End + 1;
}
} else {
State.Address = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("address " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(offset));
moveToEnd();
return;
}
if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
State.Other = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("resolver of stub and resolver " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(offset));
moveToEnd();
return;
}
}
}
if(ExportStart + ExportInfoSize != State.Current) {
*E = malformedError(
"inconsistant export info size: 0x" +
Twine::utohexstr(ExportInfoSize) + " where actual size was: 0x" +
Twine::utohexstr(State.Current - ExportStart) +
" in export trie data at node: 0x" + Twine::utohexstr(offset));
moveToEnd();
return;
}
}
State.ChildCount = *Children;
if (State.ChildCount != 0 && Children + 1 >= Trie.end()) {
*E = malformedError("byte for count of childern in export trie data at "
"node: 0x" +
Twine::utohexstr(offset) +
" extends past end of trie data");
moveToEnd();
return;
}
State.Current = Children + 1;
State.NextChildIndex = 0;
State.ParentStringLength = CumulativeString.size();
Stack.push_back(State);
}
void ExportEntry::pushDownUntilBottom() {
ErrorAsOutParameter ErrAsOutParam(E);
const char *error;
while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
NodeState &Top = Stack.back();
CumulativeString.resize(Top.ParentStringLength);
for (;*Top.Current != 0 && Top.Current < Trie.end(); Top.Current++) {
2014-09-03 02:50:24 +08:00
char C = *Top.Current;
CumulativeString.push_back(C);
}
if (Top.Current >= Trie.end()) {
*E = malformedError("edge sub-string in export trie data at node: 0x" +
Twine::utohexstr(Top.Start - Trie.begin()) +
" for child #" + Twine((int)Top.NextChildIndex) +
" extends past end of trie data");
moveToEnd();
return;
}
Top.Current += 1;
uint64_t childNodeIndex = readULEB128(Top.Current, &error);
if (error) {
*E = malformedError("child node offset " + Twine(error) +
" in export trie data at node: 0x" +
Twine::utohexstr(Top.Start - Trie.begin()));
moveToEnd();
return;
}
for (const NodeState &node : nodes()) {
if (node.Start == Trie.begin() + childNodeIndex){
*E = malformedError("loop in childern in export trie data at node: 0x" +
Twine::utohexstr(Top.Start - Trie.begin()) +
" back to node: 0x" +
Twine::utohexstr(childNodeIndex));
moveToEnd();
return;
}
}
Top.NextChildIndex += 1;
pushNode(childNodeIndex);
if (*E)
return;
}
if (!Stack.back().IsExportNode) {
*E = malformedError("node is not an export node in export trie data at "
"node: 0x" +
Twine::utohexstr(Stack.back().Start - Trie.begin()));
moveToEnd();
return;
}
}
// We have a trie data structure and need a way to walk it that is compatible
// with the C++ iterator model. The solution is a non-recursive depth first
// traversal where the iterator contains a stack of parent nodes along with a
// string that is the accumulation of all edge strings along the parent chain
// to this point.
//
// There is one "export" node for each exported symbol. But because some
// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
// node may have child nodes too.
//
// The algorithm for moveNext() is to keep moving down the leftmost unvisited
// child until hitting a node with no children (which is an export node or
// else the trie is malformed). On the way down, each node is pushed on the
// stack ivar. If there is no more ways down, it pops up one and tries to go
// down a sibling path until a childless node is reached.
void ExportEntry::moveNext() {
assert(!Stack.empty() && "ExportEntry::moveNext() with empty node stack");
if (!Stack.back().IsExportNode) {
*E = malformedError("node is not an export node in export trie data at "
"node: 0x" +
Twine::utohexstr(Stack.back().Start - Trie.begin()));
moveToEnd();
return;
}
Stack.pop_back();
while (!Stack.empty()) {
NodeState &Top = Stack.back();
if (Top.NextChildIndex < Top.ChildCount) {
pushDownUntilBottom();
// Now at the next export node.
return;
} else {
if (Top.IsExportNode) {
// This node has no children but is itself an export node.
CumulativeString.resize(Top.ParentStringLength);
return;
}
Stack.pop_back();
}
}
Done = true;
}
iterator_range<export_iterator>
MachOObjectFile::exports(Error &E, ArrayRef<uint8_t> Trie,
const MachOObjectFile *O) {
ExportEntry Start(&E, O, Trie);
if (Trie.empty())
Start.moveToEnd();
else
Start.moveToFirst();
ExportEntry Finish(&E, O, Trie);
Finish.moveToEnd();
return make_range(export_iterator(Start), export_iterator(Finish));
}
iterator_range<export_iterator> MachOObjectFile::exports(Error &Err) const {
return exports(Err, getDyldInfoExportsTrie(), this);
}
MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit)
: E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
PointerSize(is64Bit ? 8 : 4) {}
void MachORebaseEntry::moveToFirst() {
Ptr = Opcodes.begin();
moveNext();
}
void MachORebaseEntry::moveToEnd() {
Ptr = Opcodes.end();
RemainingLoopCount = 0;
Done = true;
}
void MachORebaseEntry::moveNext() {
ErrorAsOutParameter ErrAsOutParam(E);
// If in the middle of some loop, move to next rebasing in loop.
SegmentOffset += AdvanceAmount;
if (RemainingLoopCount) {
--RemainingLoopCount;
return;
}
// REBASE_OPCODE_DONE is only used for padding if we are not aligned to
// pointer size. Therefore it is possible to reach the end without ever having
// seen REBASE_OPCODE_DONE.
if (Ptr == Opcodes.end()) {
Done = true;
return;
}
bool More = true;
while (More) {
// Parse next opcode and set up next loop.
const uint8_t *OpcodeStart = Ptr;
uint8_t Byte = *Ptr++;
uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK;
uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK;
uint32_t Count, Skip;
const char *error = nullptr;
switch (Opcode) {
case MachO::REBASE_OPCODE_DONE:
More = false;
Done = true;
moveToEnd();
DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n");
break;
case MachO::REBASE_OPCODE_SET_TYPE_IMM:
RebaseType = ImmValue;
if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) {
*E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " +
Twine((int)RebaseType) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
<< "RebaseType=" << (int) RebaseType << "\n");
break;
case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
SegmentOffset = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
<< "SegmentIndex=" << SegmentIndex << ", "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
SegmentOffset += readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE("mach-o-rebase",
dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
SegmentOffset += ImmValue * PointerSize;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E =
malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED "
" (after adding immediate times the pointer size) " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE("mach-o-rebase",
dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
AdvanceAmount = PointerSize;
Skip = 0;
Count = ImmValue;
if (ImmValue != 0)
RemainingLoopCount = ImmValue - 1;
else
RemainingLoopCount = 0;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
2017-08-28 14:47:47 +08:00
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
AdvanceAmount = PointerSize;
Skip = 0;
Count = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (Count != 0)
RemainingLoopCount = Count - 1;
else
RemainingLoopCount = 0;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
2017-08-28 14:47:47 +08:00
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
Skip = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
AdvanceAmount = Skip + PointerSize;
Count = 1;
RemainingLoopCount = 0;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
2017-08-28 14:47:47 +08:00
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
Count = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
"ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (Count != 0)
RemainingLoopCount = Count - 1;
else
RemainingLoopCount = 0;
Skip = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
"ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
AdvanceAmount = Skip + PointerSize;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
"ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
2017-08-28 14:47:47 +08:00
return;
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
default:
*E = malformedError("bad rebase info (bad opcode value 0x" +
Twine::utohexstr(Opcode) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
}
}
uint64_t MachORebaseEntry::readULEB128(const char **error) {
unsigned Count;
uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
return Result;
}
int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; }
StringRef MachORebaseEntry::typeName() const {
switch (RebaseType) {
case MachO::REBASE_TYPE_POINTER:
return "pointer";
case MachO::REBASE_TYPE_TEXT_ABSOLUTE32:
return "text abs32";
case MachO::REBASE_TYPE_TEXT_PCREL32:
return "text rel32";
}
return "unknown";
}
// For use with the SegIndex of a checked Mach-O Rebase entry
// to get the segment name.
StringRef MachORebaseEntry::segmentName() const {
return O->BindRebaseSegmentName(SegmentIndex);
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry
// to get the section name.
StringRef MachORebaseEntry::sectionName() const {
return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry
// to get the address.
uint64_t MachORebaseEntry::address() const {
return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
}
bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
#else
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
#endif
return (Ptr == Other.Ptr) &&
(RemainingLoopCount == Other.RemainingLoopCount) &&
(Done == Other.Done);
}
iterator_range<rebase_iterator>
MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O,
ArrayRef<uint8_t> Opcodes, bool is64) {
if (O->BindRebaseSectionTable == nullptr)
O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O);
MachORebaseEntry Start(&Err, O, Opcodes, is64);
Start.moveToFirst();
MachORebaseEntry Finish(&Err, O, Opcodes, is64);
Finish.moveToEnd();
return make_range(rebase_iterator(Start), rebase_iterator(Finish));
}
iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) {
return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit());
}
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
: E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
PointerSize(is64Bit ? 8 : 4), TableKind(BK) {}
void MachOBindEntry::moveToFirst() {
Ptr = Opcodes.begin();
moveNext();
}
void MachOBindEntry::moveToEnd() {
Ptr = Opcodes.end();
RemainingLoopCount = 0;
Done = true;
}
void MachOBindEntry::moveNext() {
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
ErrorAsOutParameter ErrAsOutParam(E);
// If in the middle of some loop, move to next binding in loop.
SegmentOffset += AdvanceAmount;
if (RemainingLoopCount) {
--RemainingLoopCount;
return;
}
// BIND_OPCODE_DONE is only used for padding if we are not aligned to
// pointer size. Therefore it is possible to reach the end without ever having
// seen BIND_OPCODE_DONE.
if (Ptr == Opcodes.end()) {
Done = true;
return;
}
bool More = true;
while (More) {
// Parse next opcode and set up next loop.
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
const uint8_t *OpcodeStart = Ptr;
uint8_t Byte = *Ptr++;
uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK;
uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK;
int8_t SignExtended;
const uint8_t *SymStart;
uint32_t Count, Skip;
const char *error = nullptr;
switch (Opcode) {
case MachO::BIND_OPCODE_DONE:
if (TableKind == Kind::Lazy) {
// Lazying bindings have a DONE opcode between entries. Need to ignore
// it to advance to next entry. But need not if this is last entry.
bool NotLastEntry = false;
for (const uint8_t *P = Ptr; P < Opcodes.end(); ++P) {
if (*P) {
NotLastEntry = true;
}
}
if (NotLastEntry)
break;
}
More = false;
moveToEnd();
DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
if (TableKind == Kind::Weak) {
*E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in "
"weak bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Ordinal = ImmValue;
LibraryOrdinalSet = true;
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
if (ImmValue > O->getLibraryCount()) {
*E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad "
"library ordinal: " +
Twine((int)ImmValue) + " (max " +
Twine((int)O->getLibraryCount()) +
") for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: "
<< "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
if (TableKind == Kind::Weak) {
*E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in "
"weak bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Ordinal = readULEB128(&error);
LibraryOrdinalSet = true;
if (error) {
*E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (Ordinal > (int)O->getLibraryCount()) {
*E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad "
"library ordinal: " +
Twine((int)Ordinal) + " (max " +
Twine((int)O->getLibraryCount()) +
") for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: "
<< "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
if (TableKind == Kind::Weak) {
*E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in "
"weak bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (ImmValue) {
SignExtended = MachO::BIND_OPCODE_MASK | ImmValue;
Ordinal = SignExtended;
if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) {
*E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown "
"special ordinal: " +
Twine((int)Ordinal) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
} else
Ordinal = 0;
LibraryOrdinalSet = true;
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: "
<< "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
Flags = ImmValue;
SymStart = Ptr;
while (*Ptr && (Ptr < Opcodes.end())) {
++Ptr;
}
if (Ptr == Opcodes.end()) {
*E = malformedError(
"for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM "
"symbol name extends past opcodes for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
SymbolName = StringRef(reinterpret_cast<const char*>(SymStart),
Ptr-SymStart);
++Ptr;
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: "
<< "SymbolName=" << SymbolName << "\n");
if (TableKind == Kind::Weak) {
if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
return;
}
break;
case MachO::BIND_OPCODE_SET_TYPE_IMM:
BindType = ImmValue;
if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) {
*E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " +
Twine((int)ImmValue) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_TYPE_IMM: "
<< "BindType=" << (int)BindType << "\n");
break;
case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
Addend = readSLEB128(&error);
if (error) {
*E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: "
<< "Addend=" << Addend << "\n");
break;
case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
SegmentOffset = readULEB128(&error);
if (error) {
*E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
<< "SegmentIndex=" << SegmentIndex << ", "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< "\n");
break;
case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
SegmentOffset += readULEB128(&error);
if (error) {
*E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
break;
case MachO::BIND_OPCODE_DO_BIND:
AdvanceAmount = PointerSize;
RemainingLoopCount = 0;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) +
" for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (SymbolName == StringRef()) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND missing preceding "
"BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
*E =
malformedError("for BIND_OPCODE_DO_BIND missing preceding "
"BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
dbgs() << "BIND_OPCODE_DO_BIND: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
if (TableKind == Kind::Lazy) {
*E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in "
"lazy bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (SymbolName == StringRef()) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing "
"preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode "
"at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing "
"preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
AdvanceAmount = readULEB128(&error) + PointerSize;
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
// Note, this is not really an error until the next bind but make no sense
// for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another
// bind operation.
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
AdvanceAmount, PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding "
"ULEB) " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
RemainingLoopCount = 0;
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
if (TableKind == Kind::Lazy) {
*E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not "
"allowed in lazy bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (SymbolName == StringRef()) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
"missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for "
"opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
"missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode "
"at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
AdvanceAmount = ImmValue * PointerSize + PointerSize;
RemainingLoopCount = 0;
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
AdvanceAmount, PointerSize);
if (error) {
*E =
malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
" (after adding immediate times the pointer size) " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
dbgs()
<< "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: "
<< format("SegmentOffset=0x%06X", SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
if (TableKind == Kind::Lazy) {
*E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not "
"allowed in lazy bind table for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Count = readULEB128(&error);
if (Count != 0)
RemainingLoopCount = Count - 1;
else
RemainingLoopCount = 0;
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
" (count value) " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Skip = readULEB128(&error);
AdvanceAmount = Skip + PointerSize;
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
" (skip value) " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (SymbolName == StringRef()) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
"missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for "
"opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
"missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode "
"at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
PointerSize, Count, Skip);
if (error) {
*E =
malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " +
Twine(error) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
2017-08-28 14:47:47 +08:00
return;
}
DEBUG_WITH_TYPE(
"mach-o-bind",
dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: "
<< format("SegmentOffset=0x%06X", SegmentOffset)
<< ", AdvanceAmount=" << AdvanceAmount
<< ", RemainingLoopCount=" << RemainingLoopCount
<< "\n");
return;
default:
*E = malformedError("bad bind info (bad opcode value 0x" +
Twine::utohexstr(Opcode) + " for opcode at: 0x" +
Twine::utohexstr(OpcodeStart - Opcodes.begin()));
moveToEnd();
return;
}
}
}
uint64_t MachOBindEntry::readULEB128(const char **error) {
unsigned Count;
uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
return Result;
}
int64_t MachOBindEntry::readSLEB128(const char **error) {
unsigned Count;
int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
return Result;
}
int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
StringRef MachOBindEntry::typeName() const {
switch (BindType) {
case MachO::BIND_TYPE_POINTER:
return "pointer";
case MachO::BIND_TYPE_TEXT_ABSOLUTE32:
return "text abs32";
case MachO::BIND_TYPE_TEXT_PCREL32:
return "text rel32";
}
return "unknown";
}
StringRef MachOBindEntry::symbolName() const { return SymbolName; }
int64_t MachOBindEntry::addend() const { return Addend; }
uint32_t MachOBindEntry::flags() const { return Flags; }
int MachOBindEntry::ordinal() const { return Ordinal; }
// For use with the SegIndex of a checked Mach-O Bind entry
// to get the segment name.
StringRef MachOBindEntry::segmentName() const {
return O->BindRebaseSegmentName(SegmentIndex);
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry
// to get the section name.
StringRef MachOBindEntry::sectionName() const {
return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry
// to get the address.
uint64_t MachOBindEntry::address() const {
return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
}
bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
#else
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
#endif
return (Ptr == Other.Ptr) &&
(RemainingLoopCount == Other.RemainingLoopCount) &&
(Done == Other.Done);
}
// Build table of sections so SegIndex/SegOffset pairs can be translated.
BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) {
uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0;
StringRef CurSegName;
uint64_t CurSegAddress;
for (const SectionRef &Section : Obj->sections()) {
SectionInfo Info;
Expected<StringRef> NameOrErr = Section.getName();
if (!NameOrErr)
consumeError(NameOrErr.takeError());
else
Info.SectionName = *NameOrErr;
Info.Address = Section.getAddress();
Info.Size = Section.getSize();
Info.SegmentName =
Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl());
if (!Info.SegmentName.equals(CurSegName)) {
++CurSegIndex;
CurSegName = Info.SegmentName;
CurSegAddress = Info.Address;
}
Info.SegmentIndex = CurSegIndex - 1;
Info.OffsetInSegment = Info.Address - CurSegAddress;
Info.SegmentStartAddress = CurSegAddress;
Sections.push_back(Info);
}
MaxSegIndex = CurSegIndex;
}
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
// For use with a SegIndex, SegOffset, and PointerSize triple in
// MachOBindEntry::moveNext() to validate a MachOBindEntry or MachORebaseEntry.
//
// Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
// that fully contains a pointer at that location. Multiple fixups in a bind
// (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
// be tested via the Count and Skip parameters.
const char * BindRebaseSegInfo::checkSegAndOffsets(int32_t SegIndex,
uint64_t SegOffset,
uint8_t PointerSize,
uint32_t Count,
uint32_t Skip) {
if (SegIndex == -1)
return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB";
if (SegIndex >= MaxSegIndex)
return "bad segIndex (too large)";
Fix Mach-O bind and rebase validation errors in libObject Summary: llvm-objdump (via libObject) validates DYLD_INFO rebase and bind entries against the basic structure found in the Mach-O file before evaluating the contents of those entries. Certain malformed Mach-Os can defeat the validation check and force llvm-objdump (libObject) to crash. The previous logic verified a rebase or bind started in a valid Mach-O section, but did not verify that the section wholely contained the fixup. It also generally allows rebases or binds to start immediately after a valid section even if that range is not itself part of a valid section. Finally, bind and rebase opcodes that indicate more than one fixup (apply N times...) are not completely validated: only the first and final fixups are checked. The previous logic also rejected certain binaries as false positives. Some bind and rebase opcodes can modify the state machine such that the next bind or rebase will fail. libObject will reject these opcodes as invalid in order to be helpful and print an error message associated with the instruction that caused the problem, even though the binary is not actually illegal until it consumes the invalid state in the state machine. In other words, libObject may reject a Mach-O binary that Apple's dynamic linker may consider legal. The original version of macho-rebase-add-addr-uleb-too-big is an example of such a binary. I have replaced the existing checkSegAndOffset and checkCountAndSkip functions with a single function, checkSegAndOffsets, which validates all of the fixups realized by a DYLD_INFO opcode. checkSegAndOffsets verifies that a Mach-O section fully contains each fixup. Every fixup realized by an opcode is validated, and some (but not all!) inconsistencies in the state machine are allowed until a fixup is realized. This means that libObject may fail on an opcode that realizes a fixup, not on the opcode that introduced the arithmetic error. Existing test cases have been modified to reflect the changes in error messages returned by libObject. What's more, the test case for macho-rebase-add-addr-uleb-too-big has been modified so that it actually triggers the error condition; the new code in libObject considers the original test binary "legal". rdar://47797757 Reviewers: lhames, pete, ab Reviewed By: pete Subscribers: rupprecht, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59574 llvm-svn: 356629
2019-03-21 07:21:16 +08:00
for (uint32_t i = 0; i < Count; ++i) {
uint32_t Start = SegOffset + i * (PointerSize + Skip);
uint32_t End = Start + PointerSize;
bool Found = false;
for (const SectionInfo &SI : Sections) {
if (SI.SegmentIndex != SegIndex)
continue;
if ((SI.OffsetInSegment<=Start) && (Start<(SI.OffsetInSegment+SI.Size))) {
if (End <= SI.OffsetInSegment + SI.Size) {
Found = true;
break;
}
else
return "bad offset, extends beyond section boundary";
}
}
if (!Found)
return "bad offset, not in section";
}
return nullptr;
}
// For use with the SegIndex of a checked Mach-O Bind or Rebase entry
// to get the segment name.
StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) {
for (const SectionInfo &SI : Sections) {
if (SI.SegmentIndex == SegIndex)
return SI.SegmentName;
}
llvm_unreachable("invalid SegIndex");
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
// to get the SectionInfo.
const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection(
int32_t SegIndex, uint64_t SegOffset) {
for (const SectionInfo &SI : Sections) {
if (SI.SegmentIndex != SegIndex)
continue;
if (SI.OffsetInSegment > SegOffset)
continue;
if (SegOffset >= (SI.OffsetInSegment + SI.Size))
continue;
return SI;
}
llvm_unreachable("SegIndex and SegOffset not in any section");
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
// entry to get the section name.
StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex,
uint64_t SegOffset) {
return findSection(SegIndex, SegOffset).SectionName;
}
// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
// entry to get the address.
uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) {
const SectionInfo &SI = findSection(SegIndex, OffsetInSeg);
return SI.SegmentStartAddress + OffsetInSeg;
}
iterator_range<bind_iterator>
MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O,
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
ArrayRef<uint8_t> Opcodes, bool is64,
MachOBindEntry::Kind BKind) {
if (O->BindRebaseSectionTable == nullptr)
O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O);
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
MachOBindEntry Start(&Err, O, Opcodes, is64, BKind);
Start.moveToFirst();
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind);
Finish.moveToEnd();
return make_range(bind_iterator(Start), bind_iterator(Finish));
}
iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) {
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Regular);
}
iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) {
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Lazy);
}
iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) {
Actually add error handling to unpacking the dyld compact bind and other tables. Providing a helpful error message to what the error is and where the error occurred based on which opcode it was associated with. There have been handful of bug fixes dealing with bad bind info in object files, r294021 and r249845, which only put a band aid on the problem after a bad bind table was created after unpacking from its compact info. In these cases a bind table should have never been created and an error should have simply been generated. This change puts in place the plumbing to allow checking and returning of an error when the compact info is unpacked. This follows the model of iterators that can fail that Lang Hanes designed when fixing the problem for bad archives r275316 (or r275361). This change uses one of the existing test cases that now causes an error instead of printing <<bad library ordinal>> after a bad bind table is created. The error uses the offset into the opcode table as shown with the macOS dyldinfo(1) tool to indicate where the error is and which opcode and which parameter is in error. For example the exiting test case has this lazy binding opcode table: % dyldinfo -opcodes test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 … lazy binding opcodes: 0x0000 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB(0x02, 0x00000010) 0x0002 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM(2) In the test case the binary only has one library so setting the library ordinal to the value of 2 in the BIND_OPCODE_SET_DYLIB_ORDINAL_IMM opcode at 0x0002 above is an error. This now produces this error message: % llvm-objdump -lazy-bind bad-ordinal.macho-x86_64 … llvm-objdump: 'bad-ordinal.macho-x86_64': truncated or malformed object (for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad library ordinal: 2 (max 1) for opcode at: 0x2) This change provides the plumbing for the error handling and one example of an error message. Other error checks and test cases will be added in follow on commits. llvm-svn: 296527
2017-03-01 05:47:07 +08:00
return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Weak);
}
MachOObjectFile::load_command_iterator
MachOObjectFile::begin_load_commands() const {
return LoadCommands.begin();
}
MachOObjectFile::load_command_iterator
MachOObjectFile::end_load_commands() const {
return LoadCommands.end();
}
iterator_range<MachOObjectFile::load_command_iterator>
MachOObjectFile::load_commands() const {
return make_range(begin_load_commands(), end_load_commands());
}
StringRef
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
return parseSegmentOrSectionName(Raw.data());
}
ArrayRef<char>
MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
assert(Sec.d.a < Sections.size() && "Should have detected this earlier");
const section_base *Base =
reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
return makeArrayRef(Base->sectname);
}
ArrayRef<char>
MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
assert(Sec.d.a < Sections.size() && "Should have detected this earlier");
const section_base *Base =
reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
return makeArrayRef(Base->segname);
}
bool
MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE)
const {
if (getCPUType(*this) == MachO::CPU_TYPE_X86_64)
return false;
return getPlainRelocationAddress(RE) & MachO::R_SCATTERED;
}
unsigned MachOObjectFile::getPlainRelocationSymbolNum(
const MachO::any_relocation_info &RE) const {
if (isLittleEndian())
return RE.r_word1 & 0xffffff;
return RE.r_word1 >> 8;
}
bool MachOObjectFile::getPlainRelocationExternal(
const MachO::any_relocation_info &RE) const {
if (isLittleEndian())
return (RE.r_word1 >> 27) & 1;
return (RE.r_word1 >> 4) & 1;
}
bool MachOObjectFile::getScatteredRelocationScattered(
const MachO::any_relocation_info &RE) const {
return RE.r_word0 >> 31;
}
uint32_t MachOObjectFile::getScatteredRelocationValue(
const MachO::any_relocation_info &RE) const {
return RE.r_word1;
}
uint32_t MachOObjectFile::getScatteredRelocationType(
const MachO::any_relocation_info &RE) const {
return (RE.r_word0 >> 24) & 0xf;
}
unsigned MachOObjectFile::getAnyRelocationAddress(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationAddress(RE);
return getPlainRelocationAddress(RE);
}
unsigned MachOObjectFile::getAnyRelocationPCRel(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationPCRel(RE);
return getPlainRelocationPCRel(*this, RE);
}
unsigned MachOObjectFile::getAnyRelocationLength(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationLength(RE);
return getPlainRelocationLength(*this, RE);
}
unsigned
MachOObjectFile::getAnyRelocationType(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationType(RE);
return getPlainRelocationType(*this, RE);
}
SectionRef
MachOObjectFile::getAnyRelocationSection(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
return *section_end();
unsigned SecNum = getPlainRelocationSymbolNum(RE);
if (SecNum == MachO::R_ABS || SecNum > Sections.size())
return *section_end();
DataRefImpl DRI;
DRI.d.a = SecNum - 1;
return SectionRef(DRI, this);
}
MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const {
assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
return getStruct<MachO::section>(*this, Sections[DRI.d.a]);
}
MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]);
}
MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L,
unsigned Index) const {
const char *Sec = getSectionPtr(*this, L, Index);
return getStruct<MachO::section>(*this, Sec);
}
MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
unsigned Index) const {
const char *Sec = getSectionPtr(*this, L, Index);
return getStruct<MachO::section_64>(*this, Sec);
}
MachO::nlist
MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
const char *P = reinterpret_cast<const char *>(DRI.p);
return getStruct<MachO::nlist>(*this, P);
}
MachO::nlist_64
MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
const char *P = reinterpret_cast<const char *>(DRI.p);
return getStruct<MachO::nlist_64>(*this, P);
}
MachO::linkedit_data_command
MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::linkedit_data_command>(*this, L.Ptr);
}
MachO::segment_command
MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::segment_command>(*this, L.Ptr);
}
MachO::segment_command_64
MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::segment_command_64>(*this, L.Ptr);
}
MachO::linker_option_command
MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::linker_option_command>(*this, L.Ptr);
}
MachO::version_min_command
MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::version_min_command>(*this, L.Ptr);
}
MachO::note_command
MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::note_command>(*this, L.Ptr);
}
MachO::build_version_command
MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::build_version_command>(*this, L.Ptr);
}
MachO::build_tool_version
MachOObjectFile::getBuildToolVersion(unsigned index) const {
return getStruct<MachO::build_tool_version>(*this, BuildTools[index]);
}
MachO::dylib_command
MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::dylib_command>(*this, L.Ptr);
}
MachO::dyld_info_command
MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::dyld_info_command>(*this, L.Ptr);
}
MachO::dylinker_command
MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::dylinker_command>(*this, L.Ptr);
}
MachO::uuid_command
MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::uuid_command>(*this, L.Ptr);
}
MachO::rpath_command
MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::rpath_command>(*this, L.Ptr);
}
MachO::source_version_command
MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::source_version_command>(*this, L.Ptr);
}
MachO::entry_point_command
MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::entry_point_command>(*this, L.Ptr);
}
MachO::encryption_info_command
MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::encryption_info_command>(*this, L.Ptr);
}
MachO::encryption_info_command_64
MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const {
return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr);
}
MachO::sub_framework_command
MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::sub_framework_command>(*this, L.Ptr);
}
MachO::sub_umbrella_command
MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr);
}
MachO::sub_library_command
MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::sub_library_command>(*this, L.Ptr);
}
MachO::sub_client_command
MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::sub_client_command>(*this, L.Ptr);
}
MachO::routines_command
MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::routines_command>(*this, L.Ptr);
}
MachO::routines_command_64
MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const {
return getStruct<MachO::routines_command_64>(*this, L.Ptr);
}
MachO::thread_command
MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::thread_command>(*this, L.Ptr);
}
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
uint32_t Offset;
if (getHeader().filetype == MachO::MH_OBJECT) {
DataRefImpl Sec;
Sec.d.a = Rel.d.a;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Offset = Sect.reloff;
} else {
MachO::section Sect = getSection(Sec);
Offset = Sect.reloff;
}
} else {
MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand();
if (Rel.d.a == 0)
Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations
else
Offset = DysymtabLoadCmd.locreloff; // Offset to the local relocations
}
auto P = reinterpret_cast<const MachO::any_relocation_info *>(
getPtr(*this, Offset)) + Rel.d.b;
return getStruct<MachO::any_relocation_info>(
*this, reinterpret_cast<const char *>(P));
}
MachO::data_in_code_entry
MachOObjectFile::getDice(DataRefImpl Rel) const {
const char *P = reinterpret_cast<const char *>(Rel.p);
return getStruct<MachO::data_in_code_entry>(*this, P);
}
const MachO::mach_header &MachOObjectFile::getHeader() const {
return Header;
}
const MachO::mach_header_64 &MachOObjectFile::getHeader64() const {
assert(is64Bit());
return Header64;
}
uint32_t MachOObjectFile::getIndirectSymbolTableEntry(
const MachO::dysymtab_command &DLC,
unsigned Index) const {
uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t);
return getStruct<uint32_t>(*this, getPtr(*this, Offset));
}
MachO::data_in_code_entry
MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
unsigned Index) const {
uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry);
return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset));
}
MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
if (SymtabLoadCmd)
return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd);
// If there is no SymtabLoadCmd return a load command with zero'ed fields.
MachO::symtab_command Cmd;
Cmd.cmd = MachO::LC_SYMTAB;
Cmd.cmdsize = sizeof(MachO::symtab_command);
Cmd.symoff = 0;
Cmd.nsyms = 0;
Cmd.stroff = 0;
Cmd.strsize = 0;
return Cmd;
}
MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
if (DysymtabLoadCmd)
return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd);
// If there is no DysymtabLoadCmd return a load command with zero'ed fields.
MachO::dysymtab_command Cmd;
Cmd.cmd = MachO::LC_DYSYMTAB;
Cmd.cmdsize = sizeof(MachO::dysymtab_command);
Cmd.ilocalsym = 0;
Cmd.nlocalsym = 0;
Cmd.iextdefsym = 0;
Cmd.nextdefsym = 0;
Cmd.iundefsym = 0;
Cmd.nundefsym = 0;
Cmd.tocoff = 0;
Cmd.ntoc = 0;
Cmd.modtaboff = 0;
Cmd.nmodtab = 0;
Cmd.extrefsymoff = 0;
Cmd.nextrefsyms = 0;
Cmd.indirectsymoff = 0;
Cmd.nindirectsyms = 0;
Cmd.extreloff = 0;
Cmd.nextrel = 0;
Cmd.locreloff = 0;
Cmd.nlocrel = 0;
return Cmd;
}
MachO::linkedit_data_command
MachOObjectFile::getDataInCodeLoadCommand() const {
if (DataInCodeLoadCmd)
return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd);
// If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
MachO::linkedit_data_command Cmd;
Cmd.cmd = MachO::LC_DATA_IN_CODE;
Cmd.cmdsize = sizeof(MachO::linkedit_data_command);
Cmd.dataoff = 0;
Cmd.datasize = 0;
return Cmd;
}
MachO::linkedit_data_command
MachOObjectFile::getLinkOptHintsLoadCommand() const {
if (LinkOptHintsLoadCmd)
return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd);
// If there is no LinkOptHintsLoadCmd return a load command with zero'ed
// fields.
MachO::linkedit_data_command Cmd;
Cmd.cmd = MachO::LC_LINKER_OPTIMIZATION_HINT;
Cmd.cmdsize = sizeof(MachO::linkedit_data_command);
Cmd.dataoff = 0;
Cmd.datasize = 0;
return Cmd;
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
if (!DyldInfoOrErr)
return None;
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
2015-09-22 19:13:55 +08:00
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off));
return makeArrayRef(Ptr, DyldInfo.rebase_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
if (!DyldInfoOrErr)
return None;
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
2015-09-22 19:13:55 +08:00
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off));
return makeArrayRef(Ptr, DyldInfo.bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
if (!DyldInfoOrErr)
return None;
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
2015-09-22 19:13:55 +08:00
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off));
return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
if (!DyldInfoOrErr)
return None;
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
2015-09-22 19:13:55 +08:00
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off));
return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return None;
auto DyldInfoOrErr =
getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
if (!DyldInfoOrErr)
return None;
MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
2015-09-22 19:13:55 +08:00
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off));
return makeArrayRef(Ptr, DyldInfo.export_size);
}
ArrayRef<uint8_t> MachOObjectFile::getUuid() const {
if (!UuidLoadCmd)
return None;
// Returning a pointer is fine as uuid doesn't need endian swapping.
const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid);
return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16);
}
StringRef MachOObjectFile::getStringTableData() const {
MachO::symtab_command S = getSymtabLoadCommand();
return getData().substr(S.stroff, S.strsize);
}
bool MachOObjectFile::is64Bit() const {
return getType() == getMachOType(false, true) ||
getType() == getMachOType(true, true);
}
void MachOObjectFile::ReadULEB128s(uint64_t Index,
SmallVectorImpl<uint64_t> &Out) const {
DataExtractor extractor(ObjectFile::getData(), true, 0);
uint64_t offset = Index;
uint64_t data = 0;
while (uint64_t delta = extractor.getULEB128(&offset)) {
data += delta;
Out.push_back(data);
}
}
bool MachOObjectFile::isRelocatableObject() const {
return getHeader().filetype == MachO::MH_OBJECT;
}
Expected<std::unique_ptr<MachOObjectFile>>
ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer,
uint32_t UniversalCputype,
uint32_t UniversalIndex) {
StringRef Magic = Buffer.getBuffer().slice(0, 4);
if (Magic == "\xFE\xED\xFA\xCE")
return MachOObjectFile::create(Buffer, false, false,
UniversalCputype, UniversalIndex);
if (Magic == "\xCE\xFA\xED\xFE")
return MachOObjectFile::create(Buffer, true, false,
UniversalCputype, UniversalIndex);
if (Magic == "\xFE\xED\xFA\xCF")
return MachOObjectFile::create(Buffer, false, true,
UniversalCputype, UniversalIndex);
if (Magic == "\xCF\xFA\xED\xFE")
return MachOObjectFile::create(Buffer, true, true,
UniversalCputype, UniversalIndex);
return make_error<GenericBinaryError>("Unrecognized MachO magic number",
object_error::invalid_file_type);
}
StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const {
return StringSwitch<StringRef>(Name)
.Case("debug_str_offs", "debug_str_offsets")
.Default(Name);
}