forked from OSchip/llvm-project
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind
Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805
This commit is contained in:
parent
58ecbbcdcd
commit
2124ca1d5c
|
@ -2,8 +2,11 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
|
|||
tablegen(LLVM Options.inc -gen-opt-parser-defs)
|
||||
add_public_tablegen_target(MachOOptionsTableGen)
|
||||
|
||||
include_directories(${LLVM_MAIN_SRC_DIR}/../libunwind/include)
|
||||
|
||||
add_lld_library(lldMachO2
|
||||
Arch/X86_64.cpp
|
||||
UnwindInfoSection.cpp
|
||||
Driver.cpp
|
||||
DriverUtils.cpp
|
||||
ExportTrie.cpp
|
||||
|
|
|
@ -22,6 +22,7 @@ constexpr const char text[] = "__TEXT";
|
|||
constexpr const char data[] = "__DATA";
|
||||
constexpr const char linkEdit[] = "__LINKEDIT";
|
||||
constexpr const char dataConst[] = "__DATA_CONST";
|
||||
constexpr const char ld[] = "__LD"; // output only with -r
|
||||
|
||||
} // namespace segment_names
|
||||
|
||||
|
|
|
@ -35,6 +35,10 @@ constexpr const char symbolTable[] = "__symbol_table";
|
|||
constexpr const char stringTable[] = "__string_table";
|
||||
constexpr const char got[] = "__got";
|
||||
constexpr const char threadPtrs[] = "__thread_ptrs";
|
||||
constexpr const char unwindInfo[] = "__unwind_info";
|
||||
// these are not synthetic, but in service of synthetic __unwind_info
|
||||
constexpr const char compactUnwind[] = "__compact_unwind";
|
||||
constexpr const char ehFrame[] = "__eh_frame";
|
||||
|
||||
} // namespace section_names
|
||||
|
||||
|
|
|
@ -0,0 +1,284 @@
|
|||
//===- UnwindInfoSection.cpp ----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "UnwindInfoSection.h"
|
||||
#include "Config.h"
|
||||
#include "InputSection.h"
|
||||
#include "MergedOutputSection.h"
|
||||
#include "OutputSection.h"
|
||||
#include "OutputSegment.h"
|
||||
#include "Symbols.h"
|
||||
#include "SyntheticSections.h"
|
||||
#include "Target.h"
|
||||
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/BinaryFormat/MachO.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::MachO;
|
||||
using namespace lld;
|
||||
using namespace lld::macho;
|
||||
|
||||
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
|
||||
// optimizes space and exception-time lookup. Most DWARF unwind
|
||||
// entries can be replaced with Compact Unwind entries, but the ones
|
||||
// that cannot are retained in DWARF form.
|
||||
//
|
||||
// This comment will address macro-level organization of the pre-link
|
||||
// and post-link compact unwind tables. For micro-level organization
|
||||
// pertaining to the bitfield layout of the 32-bit compact unwind
|
||||
// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
|
||||
//
|
||||
// Important clarifying factoids:
|
||||
//
|
||||
// * __LD,__compact_unwind is the compact unwind format for compiler
|
||||
// output and linker input. It is never a final output. It could be
|
||||
// an intermediate output with the `-r` option which retains relocs.
|
||||
//
|
||||
// * __TEXT,__unwind_info is the compact unwind format for final
|
||||
// linker output. It is never an input.
|
||||
//
|
||||
// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
|
||||
//
|
||||
// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
|
||||
// level) by ascending address, and the pages are referenced by an
|
||||
// index (1st level) in the section header.
|
||||
//
|
||||
// * Following the headers in __TEXT,__unwind_info, the bulk of the
|
||||
// section contains a vector of compact unwind entries
|
||||
// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
|
||||
// Adjacent entries with the same encoding can be folded to great
|
||||
// advantage, achieving a 3-order-of-magnitude reduction in the
|
||||
// number of entries.
|
||||
//
|
||||
// * The __TEXT,__unwind_info format can accommodate up to 127 unique
|
||||
// encodings for the space-efficient compressed format. In practice,
|
||||
// fewer than a dozen unique encodings are used by C++ programs of
|
||||
// all sizes. Therefore, we don't even bother implementing the regular
|
||||
// non-compressed format. Time will tell if anyone in the field ever
|
||||
// overflows the 127-encodings limit.
|
||||
|
||||
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info
|
||||
// TODO(gkm): how do we align the 2nd-level pages?
|
||||
|
||||
UnwindInfoSection::UnwindInfoSection()
|
||||
: SyntheticSection(segment_names::text, section_names::unwindInfo) {}
|
||||
|
||||
bool UnwindInfoSection::isNeeded() const {
|
||||
return (compactUnwindSection != nullptr);
|
||||
}
|
||||
|
||||
// Scan the __LD,__compact_unwind entries and compute the space needs of
|
||||
// __TEXT,__unwind_info and __TEXT,__eh_frame
|
||||
|
||||
void UnwindInfoSection::finalize() {
|
||||
if (compactUnwindSection == nullptr)
|
||||
return;
|
||||
|
||||
// At this point, the address space for __TEXT,__text has been
|
||||
// assigned, so we can relocate the __LD,__compact_unwind entries
|
||||
// into a temporary buffer. Relocation is necessary in order to sort
|
||||
// the CU entries by function address. Sorting is necessary so that
|
||||
// we can fold adjacent CU entries with identical
|
||||
// encoding+personality+lsda. Folding is necessary because it reduces
|
||||
// the number of CU entries by as much as 3 orders of magnitude!
|
||||
compactUnwindSection->finalize();
|
||||
assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry64) == 0);
|
||||
size_t cuCount =
|
||||
compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64);
|
||||
cuVector.resize(cuCount);
|
||||
// Relocate all __LD,__compact_unwind entries
|
||||
compactUnwindSection->writeTo(reinterpret_cast<uint8_t *>(cuVector.data()));
|
||||
|
||||
// Rather than sort & fold the 32-byte entries directly, we create a
|
||||
// vector of pointers to entries and sort & fold that instead.
|
||||
cuPtrVector.reserve(cuCount);
|
||||
for (const auto &cuEntry : cuVector)
|
||||
cuPtrVector.emplace_back(&cuEntry);
|
||||
std::sort(cuPtrVector.begin(), cuPtrVector.end(),
|
||||
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
|
||||
return a->functionAddress < b->functionAddress;
|
||||
});
|
||||
|
||||
// Fold adjacent entries with matching encoding+personality+lsda
|
||||
// We use three iterators on the same cuPtrVector to fold in-situ:
|
||||
// (1) `foldBegin` is the first of a potential sequence of matching entries
|
||||
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
|
||||
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
|
||||
// entries that can be folded into a single entry and written to ...
|
||||
// (3) `foldWrite`
|
||||
auto foldWrite = cuPtrVector.begin();
|
||||
for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) {
|
||||
auto foldEnd = foldBegin;
|
||||
while (++foldEnd < cuPtrVector.end() &&
|
||||
(*foldBegin)->encoding == (*foldEnd)->encoding &&
|
||||
(*foldBegin)->personality == (*foldEnd)->personality &&
|
||||
(*foldBegin)->lsda == (*foldEnd)->lsda)
|
||||
;
|
||||
*foldWrite++ = *foldBegin;
|
||||
foldBegin = foldEnd;
|
||||
}
|
||||
cuPtrVector.erase(foldWrite, cuPtrVector.end());
|
||||
|
||||
// Count frequencies of the folded encodings
|
||||
llvm::DenseMap<compact_unwind_encoding_t, size_t> encodingFrequencies;
|
||||
for (auto cuPtrEntry : cuPtrVector)
|
||||
encodingFrequencies[cuPtrEntry->encoding]++;
|
||||
if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX)
|
||||
error("TODO(gkm): handle common encodings table overflow");
|
||||
|
||||
// Make a table of encodings, sorted by descending frequency
|
||||
for (const auto &frequency : encodingFrequencies)
|
||||
commonEncodings.emplace_back(frequency);
|
||||
std::sort(commonEncodings.begin(), commonEncodings.end(),
|
||||
[](const std::pair<compact_unwind_encoding_t, size_t> &a,
|
||||
const std::pair<compact_unwind_encoding_t, size_t> &b) {
|
||||
if (a.second == b.second)
|
||||
// When frequencies match, secondarily sort on encoding
|
||||
// to maintain parity with validate-unwind-info.py
|
||||
return a.first > b.first;
|
||||
return a.second > b.second;
|
||||
});
|
||||
|
||||
// Split folded encodings into pages, limited by capacity of a page
|
||||
// and the 24-bit range of function offset
|
||||
//
|
||||
// Record the page splits as a vector of iterators on cuPtrVector
|
||||
// such that successive elements form a semi-open interval. E.g.,
|
||||
// page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] )
|
||||
//
|
||||
// Note that pageBounds.size() is one greater than the number of
|
||||
// pages, and pageBounds.back() holds the sentinel cuPtrVector.cend()
|
||||
pageBounds.push_back(cuPtrVector.cbegin());
|
||||
// TODO(gkm): cut 1st page entries short to accommodate section headers ???
|
||||
CompactUnwindEntry64 cuEntryKey;
|
||||
for (size_t i = 0;;) {
|
||||
// Limit the search to entries that can fit within a 4 KiB page.
|
||||
const auto pageBegin = pageBounds[0] + i;
|
||||
const auto pageMax =
|
||||
pageBounds[0] +
|
||||
std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX,
|
||||
cuPtrVector.size());
|
||||
// Exclude entries with functionOffset that would overflow 24 bits
|
||||
cuEntryKey.functionAddress = (*pageBegin)->functionAddress +
|
||||
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
|
||||
const auto pageBreak = std::lower_bound(
|
||||
pageBegin, pageMax, &cuEntryKey,
|
||||
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
|
||||
return a->functionAddress < b->functionAddress;
|
||||
});
|
||||
pageBounds.push_back(pageBreak);
|
||||
if (pageBreak == cuPtrVector.cend())
|
||||
break;
|
||||
i = pageBreak - cuPtrVector.cbegin();
|
||||
}
|
||||
|
||||
// compute size of __TEXT,__unwind_info section
|
||||
level2PagesOffset =
|
||||
sizeof(unwind_info_section_header) +
|
||||
commonEncodings.size() * sizeof(uint32_t) +
|
||||
personalities.size() * sizeof(uint32_t) +
|
||||
pageBounds.size() * sizeof(unwind_info_section_header_index_entry) +
|
||||
lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
|
||||
unwindInfoSize = level2PagesOffset +
|
||||
(pageBounds.size() - 1) *
|
||||
sizeof(unwind_info_compressed_second_level_page_header) +
|
||||
cuPtrVector.size() * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
// All inputs are relocated and output adddresses are known, so write!
|
||||
|
||||
void UnwindInfoSection::writeTo(uint8_t *buf) const {
|
||||
// section header
|
||||
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
|
||||
uip->version = 1;
|
||||
uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header);
|
||||
uip->commonEncodingsArrayCount = commonEncodings.size();
|
||||
uip->personalityArraySectionOffset =
|
||||
uip->commonEncodingsArraySectionOffset +
|
||||
(uip->commonEncodingsArrayCount * sizeof(uint32_t));
|
||||
uip->personalityArrayCount = personalities.size();
|
||||
uip->indexSectionOffset = uip->personalityArraySectionOffset +
|
||||
(uip->personalityArrayCount * sizeof(uint32_t));
|
||||
uip->indexCount = pageBounds.size();
|
||||
|
||||
// Common encodings
|
||||
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
|
||||
for (const auto &encoding : commonEncodings)
|
||||
*i32p++ = encoding.first;
|
||||
|
||||
// Personalities
|
||||
for (const auto &personality : personalities)
|
||||
*i32p++ = personality;
|
||||
|
||||
// Level-1 index
|
||||
uint32_t lsdaOffset =
|
||||
uip->indexSectionOffset +
|
||||
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
|
||||
uint64_t l2PagesOffset = level2PagesOffset;
|
||||
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
|
||||
for (size_t i = 0; i < pageBounds.size() - 1; i++) {
|
||||
iep->functionOffset = (*pageBounds[i])->functionAddress;
|
||||
iep->secondLevelPagesSectionOffset = l2PagesOffset;
|
||||
iep->lsdaIndexArraySectionOffset = lsdaOffset;
|
||||
iep++;
|
||||
// TODO(gkm): pad to 4 KiB page boundary ???
|
||||
size_t entryCount = pageBounds[i + 1] - pageBounds[i];
|
||||
uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) +
|
||||
entryCount * sizeof(uint32_t);
|
||||
l2PagesOffset += pageSize;
|
||||
}
|
||||
// Level-1 sentinel
|
||||
const CompactUnwindEntry64 &cuEnd = cuVector.back();
|
||||
iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength;
|
||||
iep->secondLevelPagesSectionOffset = 0;
|
||||
iep->lsdaIndexArraySectionOffset = lsdaOffset;
|
||||
iep++;
|
||||
|
||||
// LSDAs
|
||||
auto *lep =
|
||||
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
|
||||
for (const auto &lsda : lsdaEntries) {
|
||||
lep->functionOffset = lsda.functionOffset;
|
||||
lep->lsdaOffset = lsda.lsdaOffset;
|
||||
}
|
||||
|
||||
// create map from encoding to common-encoding-table index compact
|
||||
// encoding entries use 7 bits to index the common-encoding table
|
||||
size_t i = 0;
|
||||
llvm::DenseMap<compact_unwind_encoding_t, size_t> commonEncodingIndexes;
|
||||
for (const auto &encoding : commonEncodings)
|
||||
commonEncodingIndexes[encoding.first] = i++;
|
||||
|
||||
// Level-2 pages
|
||||
auto *p2p =
|
||||
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(lep);
|
||||
for (size_t i = 0; i < pageBounds.size() - 1; i++) {
|
||||
p2p->kind = UNWIND_SECOND_LEVEL_COMPRESSED;
|
||||
p2p->entryPageOffset =
|
||||
sizeof(unwind_info_compressed_second_level_page_header);
|
||||
p2p->entryCount = pageBounds[i + 1] - pageBounds[i];
|
||||
p2p->encodingsPageOffset =
|
||||
p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
|
||||
p2p->encodingsCount = 0;
|
||||
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
|
||||
auto cuPtrVectorIt = pageBounds[i];
|
||||
uintptr_t functionAddressBase = (*cuPtrVectorIt)->functionAddress;
|
||||
while (cuPtrVectorIt < pageBounds[i + 1]) {
|
||||
const CompactUnwindEntry64 *cuep = *cuPtrVectorIt++;
|
||||
size_t cueIndex = commonEncodingIndexes.lookup(cuep->encoding);
|
||||
*ep++ = ((cueIndex << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
|
||||
(cuep->functionAddress - functionAddressBase));
|
||||
}
|
||||
p2p =
|
||||
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(ep);
|
||||
}
|
||||
assert(getSize() ==
|
||||
static_cast<size_t>((reinterpret_cast<uint8_t *>(p2p) - buf)));
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
//===- UnwindInfoSection.h ------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLD_MACHO_UNWIND_INFO_H
|
||||
#define LLD_MACHO_UNWIND_INFO_H
|
||||
|
||||
#include "MergedOutputSection.h"
|
||||
#include "SyntheticSections.h"
|
||||
|
||||
#include "mach-o/compact_unwind_encoding.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
// In 2020, we mostly care about 64-bit targets: x86_64 and arm64
|
||||
struct CompactUnwindEntry64 {
|
||||
uint64_t functionAddress;
|
||||
uint32_t functionLength;
|
||||
compact_unwind_encoding_t encoding;
|
||||
uint64_t personality;
|
||||
uint64_t lsda;
|
||||
};
|
||||
|
||||
// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm
|
||||
struct CompactUnwindEntry32 {
|
||||
uint32_t functionAddress;
|
||||
uint32_t functionLength;
|
||||
compact_unwind_encoding_t encoding;
|
||||
uint32_t personality;
|
||||
uint32_t lsda;
|
||||
};
|
||||
|
||||
namespace lld {
|
||||
namespace macho {
|
||||
|
||||
class UnwindInfoSection : public SyntheticSection {
|
||||
public:
|
||||
UnwindInfoSection();
|
||||
uint64_t getSize() const override { return unwindInfoSize; }
|
||||
bool isNeeded() const override;
|
||||
void finalize() override;
|
||||
void writeTo(uint8_t *buf) const override;
|
||||
void setCompactUnwindSection(MergedOutputSection *cuSection) {
|
||||
compactUnwindSection = cuSection;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
|
||||
std::vector<uint32_t> personalities;
|
||||
std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
|
||||
std::vector<CompactUnwindEntry64> cuVector;
|
||||
std::vector<const CompactUnwindEntry64 *> cuPtrVector;
|
||||
std::vector<std::vector<const CompactUnwindEntry64 *>::const_iterator>
|
||||
pageBounds;
|
||||
MergedOutputSection *compactUnwindSection = nullptr;
|
||||
uint64_t level2PagesOffset = 0;
|
||||
uint64_t unwindInfoSize = 0;
|
||||
};
|
||||
|
||||
#define UNWIND_INFO_COMMON_ENCODINGS_MAX 127
|
||||
|
||||
#define UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE 4096
|
||||
#define UNWIND_INFO_REGULAR_SECOND_LEVEL_ENTRIES_MAX \
|
||||
((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
|
||||
sizeof(unwind_info_regular_second_level_page_header)) / \
|
||||
sizeof(unwind_info_regular_second_level_entry))
|
||||
#define UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
|
||||
((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
|
||||
sizeof(unwind_info_compressed_second_level_page_header)) / \
|
||||
sizeof(uint32_t))
|
||||
|
||||
#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
|
||||
#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
|
||||
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
|
||||
|
||||
} // namespace macho
|
||||
} // namespace lld
|
||||
|
||||
#endif
|
|
@ -17,6 +17,7 @@
|
|||
#include "Symbols.h"
|
||||
#include "SyntheticSections.h"
|
||||
#include "Target.h"
|
||||
#include "UnwindInfoSection.h"
|
||||
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
|
@ -57,6 +58,7 @@ public:
|
|||
MachHeaderSection *header = nullptr;
|
||||
StringTableSection *stringTableSection = nullptr;
|
||||
SymtabSection *symtabSection = nullptr;
|
||||
UnwindInfoSection *unwindInfoSection = nullptr;
|
||||
};
|
||||
|
||||
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
|
||||
|
@ -414,8 +416,11 @@ static int sectionOrder(OutputSection *osec) {
|
|||
StringRef segname = osec->parent->name;
|
||||
// Sections are uniquely identified by their segment + section name.
|
||||
if (segname == segment_names::text) {
|
||||
if (osec->name == section_names::header)
|
||||
return -1;
|
||||
return StringSwitch<int>(osec->name)
|
||||
.Case(section_names::header, -1)
|
||||
.Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
|
||||
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
|
||||
.Default(0);
|
||||
} else if (segname == segment_names::linkEdit) {
|
||||
return StringSwitch<int>(osec->name)
|
||||
.Case(section_names::binding, -6)
|
||||
|
@ -472,6 +477,7 @@ static void sortSegmentsAndSections() {
|
|||
void Writer::createOutputSections() {
|
||||
// First, create hidden sections
|
||||
stringTableSection = make<StringTableSection>();
|
||||
unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
|
||||
symtabSection = make<SymtabSection>(*stringTableSection);
|
||||
|
||||
switch (config->outputType) {
|
||||
|
@ -498,7 +504,11 @@ void Writer::createOutputSections() {
|
|||
for (const auto &it : mergedOutputSections) {
|
||||
StringRef segname = it.first.first;
|
||||
MergedOutputSection *osec = it.second;
|
||||
getOrCreateOutputSegment(segname)->addOutputSection(osec);
|
||||
if (unwindInfoSection && segname == segment_names::ld) {
|
||||
assert(osec->name == section_names::compactUnwind);
|
||||
unwindInfoSection->setCompactUnwindSection(osec);
|
||||
} else
|
||||
getOrCreateOutputSegment(segname)->addOutputSection(osec);
|
||||
}
|
||||
|
||||
for (SyntheticSection *ssec : syntheticSections) {
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# REQUIRES: x86
|
||||
|
||||
# FIXME(gkm): This test is fast on a Release tree, and slow (~10s) on
|
||||
# a Debug tree mostly because of llvm-mc. Is there a way to prefer the
|
||||
# fast installed llvm-mc rather than the slow one in our Debug tree?
|
||||
|
||||
# If headers and offsets are proper, then ...
|
||||
#
|
||||
# 1) llvm-objdump will not crash, and exit with good status
|
||||
#
|
||||
# 2) Summary encodings from the input object will match
|
||||
# those from the linked output
|
||||
#
|
||||
# 3) Encodings & symbols from the intput object will match
|
||||
# those from the linked output
|
||||
|
||||
# RUN: %python %S/tools/generate-cfi-funcs.py --seed=johnnyapple >%t.s
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -o %t.o %t.s
|
||||
# RUN: lld -flavor darwinnew -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t %t.o
|
||||
# RUN: llvm-objdump --unwind-info --syms %t %t.o >%t.dump
|
||||
# RUN: %python %S/tools/validate-unwind-info.py %t.dump
|
|
@ -0,0 +1,135 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Generate skeletal functions with a variety .cfi_ directives.
|
||||
The purpose is to produce object-file test inputs to lld with a
|
||||
variety of compact unwind encodings.
|
||||
"""
|
||||
import random
|
||||
import argparse
|
||||
import string
|
||||
from math import factorial
|
||||
from itertools import permutations
|
||||
|
||||
lsda_n = 0
|
||||
lsda_odds = 0.0
|
||||
func_size_low = 0x10
|
||||
func_size_high = 0x100
|
||||
saved_regs = ["%r15", "%r14", "%r13", "%r12", "%rbx"]
|
||||
saved_regs_combined = list(list(permutations(saved_regs, i))
|
||||
for i in range(0,6))
|
||||
|
||||
def print_function(name: str):
|
||||
global lsda_odds
|
||||
have_lsda = (random.random() < lsda_odds)
|
||||
frame_size = random.randint(4, 64) * 16
|
||||
frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
|
||||
reg_count = random.randint(0, 4)
|
||||
reg_combo = random.randint(0, factorial(reg_count) - 1)
|
||||
regs_saved = saved_regs_combined[reg_count][reg_combo]
|
||||
global func_size_low, func_size_high
|
||||
func_size = random.randint(func_size_low, func_size_high) * 0x10
|
||||
func_size_high += 1
|
||||
if func_size_high % 0x10 == 0:
|
||||
func_size_low += 1
|
||||
|
||||
print(f"""\
|
||||
### {name} regs={reg_count} frame={frame_size} lsda={have_lsda} size={func_size}
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.p2align 4, 0x90
|
||||
.globl {name}
|
||||
{name}:
|
||||
.cfi_startproc""")
|
||||
if have_lsda:
|
||||
global lsda_n
|
||||
lsda_n += 1
|
||||
print(f"""\
|
||||
.cfi_personality 155, ___gxx_personality_v0
|
||||
.cfi_lsda 16, Lexception{lsda_n}""")
|
||||
print(f"""\
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset {frame_size}
|
||||
.cfi_offset %rbp, {frame_offset+(6*8)}
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register %rbp""")
|
||||
for i in range(reg_count):
|
||||
print(f".cfi_offset {regs_saved[i]}, {frame_offset+(i*8)}")
|
||||
print(f"""\
|
||||
.fill {func_size - 6}
|
||||
popq %rbp
|
||||
retq
|
||||
.cfi_endproc
|
||||
""")
|
||||
|
||||
if have_lsda:
|
||||
print(f"""\
|
||||
.section __TEXT,__gcc_except_tab
|
||||
.p2align 2
|
||||
Lexception{lsda_n}:
|
||||
.space 0x10
|
||||
""")
|
||||
return func_size
|
||||
|
||||
def random_seed():
|
||||
"""Generate a seed that can easily be passsed back in via --seed=STRING"""
|
||||
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
epilog="""\
|
||||
Function sizes begin small then monotonically increase. The goal is
|
||||
to produce early pages that are full and later pages that are less
|
||||
than full, in order to test handling for both cases. Full pages
|
||||
contain the maximum of 1021 compact unwind entries for a total page
|
||||
size = 4 KiB.
|
||||
|
||||
Use --pages=N or --functions=N to control the size of the output.
|
||||
Default is --pages=2, meaning produce at least two full pages of
|
||||
compact unwind entries, plus some more. The calculatation is sloppy.
|
||||
""")
|
||||
parser.add_argument('--seed', type=str, default=random_seed(),
|
||||
help='Seed the random number generator')
|
||||
parser.add_argument('--pages', type=int, default=2,
|
||||
help='Number of compact-unwind pages')
|
||||
parser.add_argument('--functions', type=int, default=None,
|
||||
help='Number of functions to generate')
|
||||
parser.add_argument('--encodings', type=int, default=127,
|
||||
help='Maximum number of unique unwind encodings (default = 127)')
|
||||
parser.add_argument('--lsda', type=int, default=0,
|
||||
help='Percentage of functions with personality & LSDA (default = 10')
|
||||
args = parser.parse_args()
|
||||
random.seed(args.seed)
|
||||
p2align = 14
|
||||
global lsda_odds
|
||||
lsda_odds = args.lsda / 100.0
|
||||
|
||||
print(f"""\
|
||||
### seed={args.seed} lsda={lsda_odds} p2align={p2align}
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.p2align {p2align}, 0x90
|
||||
""")
|
||||
|
||||
size = 0
|
||||
base = (1 << p2align)
|
||||
if args.functions:
|
||||
for n in range(args.functions):
|
||||
size += print_function(f"x{size+base:08x}")
|
||||
else:
|
||||
while size < (args.pages << 24):
|
||||
size += print_function(f"x{size+base:08x}")
|
||||
|
||||
print(f"""\
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.globl _main
|
||||
.p2align 4, 0x90
|
||||
_main:
|
||||
retq
|
||||
|
||||
.p2align 4, 0x90
|
||||
___gxx_personality_v0:
|
||||
retq
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,96 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Validate compact unwind info by cross checking the llvm-objdump
|
||||
reports of the input object file vs final linked output.
|
||||
"""
|
||||
import sys
|
||||
import argparse
|
||||
import re
|
||||
from pprint import pprint
|
||||
|
||||
def main():
|
||||
hex = "[a-f\d]"
|
||||
hex8 = hex + "{8}"
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument('files', metavar='FILES', nargs='*',
|
||||
help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')
|
||||
parser.add_argument('--debug', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.files:
|
||||
objdump_string = ''.join([open(f).read() for f in args.files])
|
||||
else:
|
||||
objdump_string = sys.stdin.read()
|
||||
|
||||
object_encodings_list = [(symbol, encoding, personality, lsda)
|
||||
for symbol, encoding, personality, lsda in
|
||||
re.findall(rf"start:\s+0x{hex}+\s+(\w+)\s+" +
|
||||
rf"length:\s+0x{hex}+\s+" +
|
||||
rf"compact encoding:\s+0x({hex}+)(?:\s+" +
|
||||
rf"personality function:\s+0x({hex}+)\s+\w+\s+" +
|
||||
rf"LSDA:\s+0x({hex}+)\s+\w+(?: \+ 0x{hex}+)?)?",
|
||||
objdump_string, re.DOTALL)]
|
||||
object_encodings_map = {symbol:encoding
|
||||
for symbol, encoding, _, _ in object_encodings_list}
|
||||
if not object_encodings_map:
|
||||
sys.exit("no object encodings found in input")
|
||||
|
||||
program_symbols_map = {address:symbol
|
||||
for address, symbol in
|
||||
re.findall(rf"^{hex8}({hex8}) g\s+F __TEXT,__text (x\1)$",
|
||||
objdump_string, re.MULTILINE)}
|
||||
if not program_symbols_map:
|
||||
sys.exit("no program symbols found in input")
|
||||
|
||||
program_common_encodings = (
|
||||
re.findall(rf"^\s+encoding\[\d+\]: 0x({hex}+)$",
|
||||
objdump_string, re.MULTILINE))
|
||||
if not program_common_encodings:
|
||||
sys.exit("no common encodings found in input")
|
||||
|
||||
program_encodings_map = {program_symbols_map[address]:encoding
|
||||
for address, encoding in
|
||||
re.findall(rf"^\s+\[\d+\]: function offset=0x({hex}+), " +
|
||||
rf"encoding\[\d+\]=0x({hex}+)$",
|
||||
objdump_string, re.MULTILINE)}
|
||||
if not object_encodings_map:
|
||||
sys.exit("no program encodings found in input")
|
||||
|
||||
# Fold adjacent entries from the object file that have matching encodings
|
||||
# TODO(gkm) add check for personality+lsda
|
||||
encoding0 = 0
|
||||
for symbol in sorted(object_encodings_map):
|
||||
encoding = object_encodings_map[symbol]
|
||||
fold = (encoding == encoding0)
|
||||
if fold:
|
||||
del object_encodings_map[symbol]
|
||||
if args.debug:
|
||||
print(f"{'delete' if fold else 'retain'} {symbol} with {encoding}")
|
||||
encoding0 = encoding
|
||||
|
||||
if program_encodings_map != object_encodings_map:
|
||||
if args.debug:
|
||||
pprint(f"program encodings map:\n{program_encodings_map}")
|
||||
pprint(f"object encodings map:\n{object_encodings_map}")
|
||||
sys.exit("encoding maps differ")
|
||||
|
||||
# Count frequency of object-file folded encodings
|
||||
# and compare with the program-file common encodings table
|
||||
encoding_frequency_map = {}
|
||||
for _, encoding in object_encodings_map.items():
|
||||
encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
|
||||
encoding_frequencies = [x for x in
|
||||
sorted(encoding_frequency_map,
|
||||
key=lambda x: (encoding_frequency_map.get(x), x),
|
||||
reverse=True)]
|
||||
|
||||
if program_common_encodings != encoding_frequencies:
|
||||
if args.debug:
|
||||
pprint(f"program common encodings:\n{program_common_encodings}")
|
||||
pprint(f"object encoding frequencies:\n{encoding_frequencies}")
|
||||
sys.exit("encoding frequencies differ")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue