forked from OSchip/llvm-project
[mach-o] Add support for LC_DATA_IN_CODE
Sometimes compilers emit data into code sections (e.g. constant pools or jump tables). These runs of data can throw off disassemblers. The solution in mach-o is that ranges of data-in-code are encoded into a table pointed to by the LC_DATA_IN_CODE load command. The way the data-in-code information is encoded into lld's Atom model is that that start and end of each data run is marked with a Reference whose offset is the start/end of the data run. For arm, the switch back to code also marks whether it is thumb or arm code. llvm-svn: 213901
This commit is contained in:
parent
8ec1474f7f
commit
21921375cc
|
@ -140,6 +140,30 @@ public:
|
|||
/// Add arch-specific References.
|
||||
virtual void addAdditionalReferences(MachODefinedAtom &atom) { }
|
||||
|
||||
// Add Reference for data-in-code marker.
|
||||
virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff,
|
||||
uint16_t length, uint16_t kind) { }
|
||||
|
||||
/// Returns true if the specificed Reference value marks the start or end
|
||||
/// of a data-in-code range in an atom.
|
||||
virtual bool isDataInCodeTransition(Reference::KindValue refKind) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns the Reference value for a Reference that marks that start of
|
||||
/// a data-in-code range.
|
||||
virtual Reference::KindValue dataInCodeTransitionStart(
|
||||
const MachODefinedAtom &atom) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the Reference value for a Reference that marks that end of
|
||||
/// a data-in-code range.
|
||||
virtual Reference::KindValue dataInCodeTransitionEnd(
|
||||
const MachODefinedAtom &atom) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Only relevant for 32-bit arm archs.
|
||||
virtual bool isThumbFunction(const DefinedAtom &atom) { return false; }
|
||||
|
||||
|
|
|
@ -71,6 +71,28 @@ public:
|
|||
|
||||
void addAdditionalReferences(MachODefinedAtom &atom) override;
|
||||
|
||||
bool isDataInCodeTransition(Reference::KindValue refKind) override {
|
||||
switch (refKind) {
|
||||
case modeThumbCode:
|
||||
case modeArmCode:
|
||||
case modeData:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Reference::KindValue dataInCodeTransitionStart(
|
||||
const MachODefinedAtom &atom) override {
|
||||
return modeData;
|
||||
}
|
||||
|
||||
Reference::KindValue dataInCodeTransitionEnd(
|
||||
const MachODefinedAtom &atom) override {
|
||||
return atom.isThumb() ? modeThumbCode : modeArmCode;
|
||||
}
|
||||
|
||||
bool isThumbFunction(const DefinedAtom &atom) override;
|
||||
|
||||
private:
|
||||
|
@ -82,6 +104,7 @@ private:
|
|||
|
||||
modeThumbCode, /// Content starting at this offset is thumb.
|
||||
modeArmCode, /// Content starting at this offset is arm.
|
||||
modeData, /// Content starting at this offset is data.
|
||||
|
||||
// Kinds found in mach-o .o files:
|
||||
thumb_b22, /// ex: bl _foo
|
||||
|
@ -143,6 +166,7 @@ ArchHandler_arm::~ArchHandler_arm() { }
|
|||
const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = {
|
||||
LLD_KIND_STRING_ENTRY(modeThumbCode),
|
||||
LLD_KIND_STRING_ENTRY(modeArmCode),
|
||||
LLD_KIND_STRING_ENTRY(modeData),
|
||||
LLD_KIND_STRING_ENTRY(thumb_b22),
|
||||
LLD_KIND_STRING_ENTRY(thumb_movw),
|
||||
LLD_KIND_STRING_ENTRY(thumb_movt),
|
||||
|
@ -735,6 +759,8 @@ void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *location,
|
|||
case modeArmCode:
|
||||
thumbMode = false;
|
||||
break;
|
||||
case modeData:
|
||||
break;
|
||||
case thumb_b22:
|
||||
assert(thumbMode);
|
||||
displacement = (targetAddress - (fixupAddress + 4)) + ref.addend();
|
||||
|
@ -868,6 +894,8 @@ void ArchHandler_arm::applyFixupRelocatable(const Reference &ref,
|
|||
case modeArmCode:
|
||||
thumbMode = false;
|
||||
break;
|
||||
case modeData:
|
||||
break;
|
||||
case thumb_b22:
|
||||
assert(thumbMode);
|
||||
if (useExternalReloc)
|
||||
|
@ -971,6 +999,8 @@ void ArchHandler_arm::appendSectionRelocations(
|
|||
switch (ref.kindValue()) {
|
||||
case modeThumbCode:
|
||||
case modeArmCode:
|
||||
case modeData:
|
||||
break;
|
||||
// Do nothing.
|
||||
break;
|
||||
case thumb_b22:
|
||||
|
@ -1174,7 +1204,7 @@ bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) {
|
|||
return false;
|
||||
if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
|
||||
continue;
|
||||
assert(ref->kindArch() == Reference::KindArch::ARM);
|
||||
assert(ref->kindArch() == Reference::KindArch::ARM);
|
||||
if (ref->kindValue() == modeThumbCode)
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -69,6 +69,27 @@ public:
|
|||
FindAddressForAtom addressForAtom,
|
||||
normalized::Relocations &relocs) override;
|
||||
|
||||
bool isDataInCodeTransition(Reference::KindValue refKind) override {
|
||||
switch (refKind) {
|
||||
case modeCode:
|
||||
case modeData:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Reference::KindValue dataInCodeTransitionStart(
|
||||
const MachODefinedAtom &atom) override {
|
||||
return modeData;
|
||||
}
|
||||
|
||||
Reference::KindValue dataInCodeTransitionEnd(
|
||||
const MachODefinedAtom &atom) override {
|
||||
return modeCode;
|
||||
}
|
||||
|
||||
private:
|
||||
static const Registry::KindStrings _sKindStrings[];
|
||||
static const StubInfo _sStubInfo;
|
||||
|
@ -76,6 +97,9 @@ private:
|
|||
enum : Reference::KindValue {
|
||||
invalid, /// for error condition
|
||||
|
||||
modeCode, /// Content starting at this offset is code.
|
||||
modeData, /// Content starting at this offset is data.
|
||||
|
||||
// Kinds found in mach-o .o files:
|
||||
branch32, /// ex: call _foo
|
||||
branch16, /// ex: callw _foo
|
||||
|
@ -115,6 +139,8 @@ ArchHandler_x86::~ArchHandler_x86() { }
|
|||
|
||||
const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = {
|
||||
LLD_KIND_STRING_ENTRY(invalid),
|
||||
LLD_KIND_STRING_ENTRY(modeCode),
|
||||
LLD_KIND_STRING_ENTRY(modeData),
|
||||
LLD_KIND_STRING_ENTRY(branch32),
|
||||
LLD_KIND_STRING_ENTRY(branch16),
|
||||
LLD_KIND_STRING_ENTRY(abs32),
|
||||
|
@ -390,6 +416,8 @@ void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *location,
|
|||
case negDelta32:
|
||||
write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend());
|
||||
break;
|
||||
case modeCode:
|
||||
case modeData:
|
||||
case lazyPointer:
|
||||
case lazyImmediateLocation:
|
||||
// do nothing
|
||||
|
@ -434,6 +462,8 @@ void ArchHandler_x86::applyFixupRelocatable(const Reference &ref,
|
|||
case negDelta32:
|
||||
write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend());
|
||||
break;
|
||||
case modeCode:
|
||||
case modeData:
|
||||
case lazyPointer:
|
||||
case lazyImmediateLocation:
|
||||
// do nothing
|
||||
|
@ -480,6 +510,9 @@ void ArchHandler_x86::appendSectionRelocations(
|
|||
uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
|
||||
bool useExternalReloc = useExternalRelocationTo(*ref.target());
|
||||
switch (ref.kindValue()) {
|
||||
case modeCode:
|
||||
case modeData:
|
||||
break;
|
||||
case branch32:
|
||||
if (useExternalReloc) {
|
||||
appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
using llvm::BumpPtrAllocator;
|
||||
using llvm::yaml::Hex64;
|
||||
using llvm::yaml::Hex32;
|
||||
using llvm::yaml::Hex16;
|
||||
using llvm::yaml::Hex8;
|
||||
using llvm::yaml::SequenceTraits;
|
||||
using llvm::MachO::HeaderFileType;
|
||||
|
@ -66,6 +67,7 @@ using llvm::MachO::RelocationInfoType;
|
|||
using llvm::MachO::SectionType;
|
||||
using llvm::MachO::LoadCommandType;
|
||||
using llvm::MachO::ExportSymbolKind;
|
||||
using llvm::MachO::DataRegionType;
|
||||
|
||||
namespace lld {
|
||||
namespace mach_o {
|
||||
|
@ -191,10 +193,18 @@ struct Export {
|
|||
StringRef otherName;
|
||||
};
|
||||
|
||||
/// A normalized data-in-code entry.
|
||||
struct DataInCode {
|
||||
Hex32 offset;
|
||||
Hex16 length;
|
||||
DataRegionType kind;
|
||||
};
|
||||
|
||||
|
||||
/// A typedef so that YAML I/O can encode/decode mach_header.flags.
|
||||
LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
|
||||
|
||||
|
||||
///
|
||||
struct NormalizedFile {
|
||||
NormalizedFile() : arch(MachOLinkingContext::arch_unknown),
|
||||
|
@ -231,12 +241,12 @@ struct NormalizedFile {
|
|||
std::vector<BindLocation> weakBindingInfo;
|
||||
std::vector<BindLocation> lazyBindingInfo;
|
||||
std::vector<Export> exportInfo;
|
||||
std::vector<DataInCode> dataInCode;
|
||||
|
||||
// TODO:
|
||||
// code-signature
|
||||
// split-seg-info
|
||||
// function-starts
|
||||
// data-in-code
|
||||
|
||||
// For any allocations in this struct which need to be owned by this struct.
|
||||
BumpPtrAllocator ownedAllocations;
|
||||
|
|
|
@ -229,6 +229,8 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb,
|
|||
return ec;
|
||||
|
||||
// Walk load commands looking for segments/sections and the symbol table.
|
||||
const data_in_code_entry *dataInCode = nullptr;
|
||||
uint32_t dataInCodeSize = 0;
|
||||
ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
|
||||
[&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
|
||||
if (is64) {
|
||||
|
@ -387,21 +389,32 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb,
|
|||
f->localSymbols.push_back(sout);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cmd == LC_ID_DYLIB) {
|
||||
} else if (cmd == LC_ID_DYLIB) {
|
||||
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
|
||||
dylib_command tempDL;
|
||||
if (swap) {
|
||||
tempDL = *dl; swapStruct(tempDL); dl = &tempDL;
|
||||
}
|
||||
|
||||
f->installName = lc + dl->dylib.name;
|
||||
f->installName = lc + read32(swap, dl->dylib.name);
|
||||
} else if (cmd == LC_DATA_IN_CODE) {
|
||||
const linkedit_data_command *ldc =
|
||||
reinterpret_cast<const linkedit_data_command*>(lc);
|
||||
dataInCode = reinterpret_cast<const data_in_code_entry*>(
|
||||
start + read32(swap, ldc->dataoff));
|
||||
dataInCodeSize = read32(swap, ldc->datasize);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
if (ec)
|
||||
return ec;
|
||||
|
||||
if (dataInCode) {
|
||||
// Convert on-disk data_in_code_entry array to DataInCode vector.
|
||||
for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
|
||||
DataInCode entry;
|
||||
entry.offset = read32(swap, dataInCode[i].offset);
|
||||
entry.length = read16(swap, dataInCode[i].length);
|
||||
entry.kind = (DataRegionType)read16(swap, dataInCode[i].kind);
|
||||
f->dataInCode.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
return std::move(f);
|
||||
}
|
||||
|
||||
|
|
|
@ -74,11 +74,13 @@ private:
|
|||
void writeRebaseInfo();
|
||||
void writeBindingInfo();
|
||||
void writeLazyBindingInfo();
|
||||
void writeDataInCodeInfo();
|
||||
void writeLinkEditContent();
|
||||
void buildLinkEditInfo();
|
||||
void buildRebaseInfo();
|
||||
void buildBindInfo();
|
||||
void buildLazyBindInfo();
|
||||
void computeDataInCodeSize();
|
||||
void computeSymbolTableSizes();
|
||||
void buildSectionRelocations();
|
||||
void appendSymbols(const std::vector<Symbol> &symbols,
|
||||
|
@ -162,6 +164,7 @@ private:
|
|||
uint32_t _countOfLoadCommands;
|
||||
uint32_t _endOfLoadCommands;
|
||||
uint32_t _startOfRelocations;
|
||||
uint32_t _startOfDataInCode;
|
||||
uint32_t _startOfSymbols;
|
||||
uint32_t _startOfIndirectSymbols;
|
||||
uint32_t _startOfSymbolStrings;
|
||||
|
@ -171,6 +174,7 @@ private:
|
|||
uint32_t _symbolTableUndefinesStartIndex;
|
||||
uint32_t _symbolStringPoolSize;
|
||||
uint32_t _symbolTableSize;
|
||||
uint32_t _dataInCodeSize;
|
||||
uint32_t _indirectSymbolTableCount;
|
||||
// Used in object file creation only
|
||||
uint32_t _startOfSectionsContent;
|
||||
|
@ -227,7 +231,10 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
|
|||
+ file.sections.size() * sectsSize
|
||||
+ sizeof(symtab_command);
|
||||
_countOfLoadCommands = 2;
|
||||
|
||||
if (!_file.dataInCode.empty()) {
|
||||
_endOfLoadCommands += sizeof(linkedit_data_command);
|
||||
_countOfLoadCommands++;
|
||||
}
|
||||
// Accumulate size of each section.
|
||||
_startOfSectionsContent = _endOfLoadCommands;
|
||||
_endOfSectionsContent = _startOfSectionsContent;
|
||||
|
@ -239,10 +246,12 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
|
|||
}
|
||||
|
||||
computeSymbolTableSizes();
|
||||
computeDataInCodeSize();
|
||||
|
||||
// Align start of relocations.
|
||||
_startOfRelocations = pointerAlign(_endOfSectionsContent);
|
||||
_startOfSymbols = _startOfRelocations + relocCount * 8;
|
||||
_startOfDataInCode = _startOfRelocations + relocCount * 8;
|
||||
_startOfSymbols = _startOfDataInCode + _dataInCodeSize;
|
||||
// Add Indirect symbol table.
|
||||
_startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
|
||||
// Align start of symbol table and symbol strings.
|
||||
|
@ -273,15 +282,15 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
|
|||
|
||||
// LINKEDIT of final linked images has in order:
|
||||
// rebase info, binding info, lazy binding info, weak binding info,
|
||||
// indirect symbol table, symbol table, symbol table strings.
|
||||
// data-in-code, symbol table, indirect symbol table, symbol table strings.
|
||||
_startOfRebaseInfo = _startOfLinkEdit;
|
||||
_endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
|
||||
_startOfBindingInfo = _endOfRebaseInfo;
|
||||
_endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
|
||||
_startOfLazyBindingInfo = _endOfBindingInfo;
|
||||
_endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
|
||||
|
||||
_startOfSymbols = _endOfLazyBindingInfo;
|
||||
_startOfDataInCode = _endOfLazyBindingInfo;
|
||||
_startOfSymbols = _startOfDataInCode + _dataInCodeSize;
|
||||
_startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
|
||||
_startOfSymbolStrings = _startOfIndirectSymbols
|
||||
+ pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
|
||||
|
@ -300,6 +309,7 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
|
|||
<< " endOfBindingInfo=" << _endOfBindingInfo << "\n"
|
||||
<< " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
|
||||
<< " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
|
||||
<< " startOfDataInCode=" << _startOfDataInCode << "\n"
|
||||
<< " startOfSymbols=" << _startOfSymbols << "\n"
|
||||
<< " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
|
||||
<< " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
|
||||
|
@ -620,6 +630,18 @@ std::error_code MachOFileLayout::writeLoadCommands() {
|
|||
st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
|
||||
if (_swap)
|
||||
swapStruct(*st);
|
||||
lc += sizeof(symtab_command);
|
||||
// Add LC_DATA_IN_CODE if needed.
|
||||
if (_dataInCodeSize != 0) {
|
||||
linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
|
||||
dl->cmd = LC_DATA_IN_CODE;
|
||||
dl->cmdsize = sizeof(linkedit_data_command);
|
||||
dl->dataoff = _startOfDataInCode;
|
||||
dl->datasize = _dataInCodeSize;
|
||||
if (_swap)
|
||||
swapStruct(*dl);
|
||||
lc += sizeof(linkedit_data_command);
|
||||
}
|
||||
} else {
|
||||
// Final linked images have sections under segments.
|
||||
if (_is64)
|
||||
|
@ -804,6 +826,20 @@ void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
|
|||
}
|
||||
}
|
||||
|
||||
void MachOFileLayout::writeDataInCodeInfo() {
|
||||
uint32_t offset = _startOfDataInCode;
|
||||
for (const DataInCode &entry : _file.dataInCode) {
|
||||
data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
|
||||
&_buffer[offset]);
|
||||
dst->offset = entry.offset;
|
||||
dst->length = entry.length;
|
||||
dst->kind = entry.kind;
|
||||
if (_swap)
|
||||
swapStruct(*dst);
|
||||
offset += sizeof(data_in_code_entry);
|
||||
}
|
||||
}
|
||||
|
||||
void MachOFileLayout::writeSymbolTable() {
|
||||
// Write symbol table and symbol strings in parallel.
|
||||
uint32_t symOffset = _startOfSymbols;
|
||||
|
@ -860,6 +896,7 @@ void MachOFileLayout::buildLinkEditInfo() {
|
|||
buildBindInfo();
|
||||
buildLazyBindInfo();
|
||||
computeSymbolTableSizes();
|
||||
computeDataInCodeSize();
|
||||
}
|
||||
|
||||
void MachOFileLayout::buildSectionRelocations() {
|
||||
|
@ -941,10 +978,14 @@ void MachOFileLayout::computeSymbolTableSizes() {
|
|||
}
|
||||
}
|
||||
|
||||
void MachOFileLayout::computeDataInCodeSize() {
|
||||
_dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
|
||||
}
|
||||
|
||||
void MachOFileLayout::writeLinkEditContent() {
|
||||
if (_file.fileType == llvm::MachO::MH_OBJECT) {
|
||||
writeRelocations();
|
||||
writeDataInCodeInfo();
|
||||
writeSymbolTable();
|
||||
} else {
|
||||
writeRebaseInfo();
|
||||
|
|
|
@ -102,6 +102,7 @@ public:
|
|||
void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
|
||||
void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
|
||||
void addSectionRelocs(const lld::File &, NormalizedFile &file);
|
||||
void buildDataInCodeArray(const lld::File &, NormalizedFile &file);
|
||||
void addDependentDylibs(const lld::File &, NormalizedFile &file);
|
||||
void copyEntryPointAddress(NormalizedFile &file);
|
||||
|
||||
|
@ -899,6 +900,46 @@ void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
|
|||
}
|
||||
}
|
||||
|
||||
void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
|
||||
for (SectionInfo *si : _sectionInfos) {
|
||||
for (const AtomInfo &info : si->atomsAndOffsets) {
|
||||
// Atoms that contain data-in-code have "transition" references
|
||||
// which mark a point where the embedded data starts of ends.
|
||||
// This needs to be converted to the mach-o format which is an array
|
||||
// of data-in-code ranges.
|
||||
uint32_t startOffset = 0;
|
||||
DataRegionType mode = DataRegionType(0);
|
||||
for (const Reference *ref : *info.atom) {
|
||||
if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
|
||||
continue;
|
||||
if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
|
||||
DataRegionType nextMode = (DataRegionType)ref->addend();
|
||||
if (mode != nextMode) {
|
||||
if (mode != 0) {
|
||||
// Found end data range, so make range entry.
|
||||
DataInCode entry;
|
||||
entry.offset = si->address + info.offsetInSection + startOffset;
|
||||
entry.length = ref->offsetInAtom() - startOffset;
|
||||
entry.kind = mode;
|
||||
file.dataInCode.push_back(entry);
|
||||
}
|
||||
}
|
||||
mode = nextMode;
|
||||
startOffset = ref->offsetInAtom();
|
||||
}
|
||||
}
|
||||
if (mode != 0) {
|
||||
// Function ends with data (no end transition).
|
||||
DataInCode entry;
|
||||
entry.offset = si->address + info.offsetInSection + startOffset;
|
||||
entry.length = info.atom->size() - startOffset;
|
||||
entry.kind = mode;
|
||||
file.dataInCode.push_back(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
|
||||
NormalizedFile &nFile) {
|
||||
if (_context.outputMachOType() == llvm::MachO::MH_OBJECT)
|
||||
|
@ -992,6 +1033,7 @@ normalizedFromAtoms(const lld::File &atomFile,
|
|||
util.addIndirectSymbols(atomFile, normFile);
|
||||
util.addRebaseAndBindingInfo(atomFile, normFile);
|
||||
util.addSectionRelocs(atomFile, normFile);
|
||||
util.buildDataInCodeArray(atomFile, normFile);
|
||||
util.copyEntryPointAddress(normFile);
|
||||
|
||||
return std::move(f);
|
||||
|
|
|
@ -426,6 +426,17 @@ std::error_code processSection(DefinedAtom::ContentType atomType,
|
|||
return std::error_code();
|
||||
}
|
||||
|
||||
const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
|
||||
uint64_t address) {
|
||||
for (const Section &s : normalizedFile.sections) {
|
||||
uint64_t sAddr = s.address;
|
||||
if ((sAddr <= address) && (address < sAddr+s.content.size())) {
|
||||
return &s;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Walks all relocations for a section in a normalized .o file and
|
||||
// creates corresponding lld::Reference objects.
|
||||
std::error_code convertRelocs(const Section §ion,
|
||||
|
@ -441,17 +452,10 @@ std::error_code convertRelocs(const Section §ion,
|
|||
"index (") + Twine(sectIndex) + ")");
|
||||
const Section *sect = nullptr;
|
||||
if (sectIndex == 0) {
|
||||
for (const Section &s : normalizedFile.sections) {
|
||||
uint64_t sAddr = s.address;
|
||||
if ((sAddr <= addr) && (addr < sAddr+s.content.size())) {
|
||||
sect = &s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!sect) {
|
||||
sect = findSectionCoveringAddress(normalizedFile, addr);
|
||||
if (!sect)
|
||||
return make_dynamic_error_code(Twine("address (" + Twine(addr)
|
||||
+ ") is not in any section"));
|
||||
}
|
||||
+ ") is not in any section"));
|
||||
} else {
|
||||
sect = &normalizedFile.sections[sectIndex-1];
|
||||
}
|
||||
|
@ -612,6 +616,50 @@ normalizedObjectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
|
|||
handler->addAdditionalReferences(*atom);
|
||||
});
|
||||
|
||||
// Process mach-o data-in-code regions array. That information is encoded in
|
||||
// atoms as References at each transition point.
|
||||
unsigned nextIndex = 0;
|
||||
for (const DataInCode &entry : normalizedFile.dataInCode) {
|
||||
++nextIndex;
|
||||
const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
|
||||
if (!s) {
|
||||
return make_dynamic_error_code(Twine("LC_DATA_IN_CODE address ("
|
||||
+ Twine(entry.offset)
|
||||
+ ") is not in any section"));
|
||||
}
|
||||
uint64_t offsetInSect = entry.offset - s->address;
|
||||
uint32_t offsetInAtom;
|
||||
MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
|
||||
&offsetInAtom);
|
||||
if (offsetInAtom + entry.length > atom->size()) {
|
||||
return make_dynamic_error_code(Twine("LC_DATA_IN_CODE entry (offset="
|
||||
+ Twine(entry.offset)
|
||||
+ ", length="
|
||||
+ Twine(entry.length)
|
||||
+ ") crosses atom boundary."));
|
||||
}
|
||||
// Add reference that marks start of data-in-code.
|
||||
atom->addReference(offsetInAtom,
|
||||
handler->dataInCodeTransitionStart(*atom), atom,
|
||||
entry.kind, handler->kindArch());
|
||||
|
||||
// Peek at next entry, if it starts where this one ends, skip ending ref.
|
||||
if (nextIndex < normalizedFile.dataInCode.size()) {
|
||||
const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
|
||||
if (nextEntry.offset == (entry.offset + entry.length))
|
||||
continue;
|
||||
}
|
||||
|
||||
// If data goes to end of function, skip ending ref.
|
||||
if ((offsetInAtom + entry.length) == atom->size())
|
||||
continue;
|
||||
|
||||
// Add reference that marks end of data-in-code.
|
||||
atom->addReference(offsetInAtom+entry.length,
|
||||
handler->dataInCodeTransitionEnd(*atom), atom, 0,
|
||||
handler->kindArch());
|
||||
}
|
||||
|
||||
// Sort references in each atom to their canonical order.
|
||||
for (const DefinedAtom* defAtom : file->defined()) {
|
||||
reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
|
||||
|
|
|
@ -45,6 +45,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation)
|
|||
LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(Export)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode)
|
||||
|
||||
|
||||
// for compatibility with gcc-4.7 in C++11 mode, add extra namespace
|
||||
|
@ -596,6 +597,31 @@ struct MappingTraits<Export> {
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ScalarEnumerationTraits<DataRegionType> {
|
||||
static void enumeration(IO &io, DataRegionType &value) {
|
||||
io.enumCase(value, "DICE_KIND_DATA",
|
||||
llvm::MachO::DICE_KIND_DATA);
|
||||
io.enumCase(value, "DICE_KIND_JUMP_TABLE8",
|
||||
llvm::MachO::DICE_KIND_JUMP_TABLE8);
|
||||
io.enumCase(value, "DICE_KIND_JUMP_TABLE16",
|
||||
llvm::MachO::DICE_KIND_JUMP_TABLE16);
|
||||
io.enumCase(value, "DICE_KIND_JUMP_TABLE32",
|
||||
llvm::MachO::DICE_KIND_JUMP_TABLE32);
|
||||
io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32",
|
||||
llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<DataInCode> {
|
||||
static void mapping(IO &io, DataInCode &entry) {
|
||||
io.mapRequired("offset", entry.offset);
|
||||
io.mapRequired("length", entry.length);
|
||||
io.mapRequired("kind", entry.kind);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct MappingTraits<NormalizedFile> {
|
||||
|
@ -622,6 +648,7 @@ struct MappingTraits<NormalizedFile> {
|
|||
io.mapOptional("weak-bindings", file.weakBindingInfo);
|
||||
io.mapOptional("lazy-bindings", file.lazyBindingInfo);
|
||||
io.mapOptional("exports", file.exportInfo);
|
||||
io.mapOptional("dataInCode", file.dataInCode);
|
||||
}
|
||||
static StringRef validate(IO &io, NormalizedFile &file) {
|
||||
return StringRef();
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
# RUN: lld -flavor darwin -arch armv7 -r -print_atoms %s -o %t | FileCheck %s \
|
||||
# RUN: && lld -flavor darwin -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s
|
||||
#
|
||||
# Test parsing LC_DATA_IN_CODE
|
||||
#
|
||||
#
|
||||
|
||||
--- !mach-o
|
||||
arch: armv7
|
||||
file-type: MH_OBJECT
|
||||
flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
|
||||
sections:
|
||||
- segment: __TEXT
|
||||
section: __text
|
||||
type: S_REGULAR
|
||||
attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
|
||||
alignment: 2
|
||||
address: 0x0000000000000000
|
||||
content: [ 0x00, 0xBF, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0xBF,
|
||||
0x00, 0xF0, 0x20, 0xE3, 0x0A, 0x00, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3 ]
|
||||
local-symbols:
|
||||
- name: _foo_thumb
|
||||
type: N_SECT
|
||||
sect: 1
|
||||
desc: [ N_ARM_THUMB_DEF ]
|
||||
value: 0x0000000000000000
|
||||
- name: _foo_arm
|
||||
type: N_SECT
|
||||
sect: 1
|
||||
value: 0x0000000000000018
|
||||
dataInCode:
|
||||
- offset: 0x00000004
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_DATA
|
||||
- offset: 0x00000008
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE32
|
||||
- offset: 0x0000000C
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE16
|
||||
- offset: 0x00000010
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE8
|
||||
- offset: 0x0000001C
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_DATA
|
||||
- offset: 0x00000020
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE32
|
||||
- offset: 0x00000024
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE16
|
||||
- offset: 0x00000028
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE8
|
||||
...
|
||||
|
||||
|
||||
|
||||
# CHECK: defined-atoms:
|
||||
# CHECK: - name: _foo_thumb
|
||||
# CHECK: references:
|
||||
# CHECK: - kind: modeThumbCode
|
||||
# CHECK: offset: 0
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 4
|
||||
# CHECK: addend: 1
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 8
|
||||
# CHECK: addend: 4
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 12
|
||||
# CHECK: addend: 3
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 16
|
||||
# CHECK: addend: 2
|
||||
# CHECK: - kind: modeThumbCode
|
||||
# CHECK: offset: 20
|
||||
# CHECK: - name: _foo_arm
|
||||
# CHECK: references:
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 4
|
||||
# CHECK: addend: 1
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 8
|
||||
# CHECK: addend: 4
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 12
|
||||
# CHECK: addend: 3
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 16
|
||||
# CHECK: addend: 2
|
||||
# CHECK: - kind: modeArmCode
|
||||
# CHECK: offset: 20
|
||||
|
||||
|
||||
|
||||
# .code 16
|
||||
# .thumb_func _foo_thumb
|
||||
#_foo_thumb:
|
||||
# nop
|
||||
# nop
|
||||
#
|
||||
# .data_region
|
||||
# .long 0
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt32
|
||||
# .long 1
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt16
|
||||
# .long 2
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt8
|
||||
# .long 3
|
||||
# .end_data_region
|
||||
#
|
||||
# nop
|
||||
# nop
|
||||
#
|
||||
#
|
||||
#
|
||||
# .code 32
|
||||
# .align 2
|
||||
#_foo_arm:
|
||||
# nop
|
||||
#
|
||||
# .data_region
|
||||
# .long 10
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt32
|
||||
# .long 11
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt16
|
||||
# .long 12
|
||||
# .end_data_region
|
||||
#
|
||||
# .data_region jt8
|
||||
# .long 13
|
||||
# .end_data_region
|
||||
#
|
||||
# nop
|
||||
#
|
|
@ -0,0 +1,77 @@
|
|||
# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t | FileCheck %s \
|
||||
# RUN: && lld -flavor darwin -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s
|
||||
#
|
||||
# Test parsing LC_DATA_IN_CODE
|
||||
#
|
||||
#
|
||||
|
||||
--- !mach-o
|
||||
arch: x86
|
||||
file-type: MH_OBJECT
|
||||
flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
|
||||
sections:
|
||||
- segment: __TEXT
|
||||
section: __text
|
||||
type: S_REGULAR
|
||||
attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
|
||||
address: 0x0000000000000000
|
||||
content: [ 0x90, 0x90, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00,
|
||||
0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x03, 0x00,
|
||||
0x00, 0x00 ]
|
||||
local-symbols:
|
||||
- name: _func1
|
||||
type: N_SECT
|
||||
sect: 1
|
||||
value: 0x0000000000000000
|
||||
- name: _func2
|
||||
type: N_SECT
|
||||
sect: 1
|
||||
value: 0x000000000000000B
|
||||
dataInCode:
|
||||
- offset: 0x00000002
|
||||
length: 0x0008
|
||||
kind: DICE_KIND_JUMP_TABLE32
|
||||
- offset: 0x0000000E
|
||||
length: 0x0004
|
||||
kind: DICE_KIND_JUMP_TABLE32
|
||||
...
|
||||
|
||||
|
||||
|
||||
# CHECK: defined-atoms:
|
||||
# CHECK: - name: _func1
|
||||
# CHECK: references:
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 2
|
||||
# CHECK: addend: 4
|
||||
# CHECK: - kind: modeCode
|
||||
# CHECK: offset: 10
|
||||
# CHECK: - name: _func2
|
||||
# CHECK: references:
|
||||
# CHECK: - kind: modeData
|
||||
# CHECK: offset: 3
|
||||
# CHECK: addend: 4
|
||||
# CHECK-NOT: - kind: modeData
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
#_func1:
|
||||
# nop
|
||||
# nop
|
||||
# .data_region jt32
|
||||
# .long 1
|
||||
# .long 2
|
||||
# .end_data_region
|
||||
# nop
|
||||
#
|
||||
#
|
||||
# _func2:
|
||||
# nop
|
||||
# nop
|
||||
# nop
|
||||
# .data_region jt32
|
||||
# .long 3
|
||||
# .end_data_region
|
||||
#
|
Loading…
Reference in New Issue