[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind

Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality.

Successor commits will add handling for ...
* `__TEXT,__eh_frame`
* personalities & LSDA
* `-r` pass-through

Differential Revision: https://reviews.llvm.org/D86805
This commit is contained in:
Greg McGary 2020-08-20 13:05:13 -07:00
parent 58ecbbcdcd
commit 2124ca1d5c
9 changed files with 641 additions and 3 deletions

View File

@ -2,8 +2,11 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(MachOOptionsTableGen)
include_directories(${LLVM_MAIN_SRC_DIR}/../libunwind/include)
add_lld_library(lldMachO2
Arch/X86_64.cpp
UnwindInfoSection.cpp
Driver.cpp
DriverUtils.cpp
ExportTrie.cpp

View File

@ -22,6 +22,7 @@ constexpr const char text[] = "__TEXT";
constexpr const char data[] = "__DATA";
constexpr const char linkEdit[] = "__LINKEDIT";
constexpr const char dataConst[] = "__DATA_CONST";
constexpr const char ld[] = "__LD"; // output only with -r
} // namespace segment_names

View File

@ -35,6 +35,10 @@ constexpr const char symbolTable[] = "__symbol_table";
constexpr const char stringTable[] = "__string_table";
constexpr const char got[] = "__got";
constexpr const char threadPtrs[] = "__thread_ptrs";
constexpr const char unwindInfo[] = "__unwind_info";
// these are not synthetic, but in service of synthetic __unwind_info
constexpr const char compactUnwind[] = "__compact_unwind";
constexpr const char ehFrame[] = "__eh_frame";
} // namespace section_names

View File

@ -0,0 +1,284 @@
//===- UnwindInfoSection.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "UnwindInfoSection.h"
#include "Config.h"
#include "InputSection.h"
#include "MergedOutputSection.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/MachO.h"
using namespace llvm;
using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
// optimizes space and exception-time lookup. Most DWARF unwind
// entries can be replaced with Compact Unwind entries, but the ones
// that cannot are retained in DWARF form.
//
// This comment will address macro-level organization of the pre-link
// and post-link compact unwind tables. For micro-level organization
// pertaining to the bitfield layout of the 32-bit compact unwind
// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
//
// Important clarifying factoids:
//
// * __LD,__compact_unwind is the compact unwind format for compiler
// output and linker input. It is never a final output. It could be
// an intermediate output with the `-r` option which retains relocs.
//
// * __TEXT,__unwind_info is the compact unwind format for final
// linker output. It is never an input.
//
// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
//
// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
// level) by ascending address, and the pages are referenced by an
// index (1st level) in the section header.
//
// * Following the headers in __TEXT,__unwind_info, the bulk of the
// section contains a vector of compact unwind entries
// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
// Adjacent entries with the same encoding can be folded to great
// advantage, achieving a 3-order-of-magnitude reduction in the
// number of entries.
//
// * The __TEXT,__unwind_info format can accommodate up to 127 unique
// encodings for the space-efficient compressed format. In practice,
// fewer than a dozen unique encodings are used by C++ programs of
// all sizes. Therefore, we don't even bother implementing the regular
// non-compressed format. Time will tell if anyone in the field ever
// overflows the 127-encodings limit.
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info
// TODO(gkm): how do we align the 2nd-level pages?
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {}
bool UnwindInfoSection::isNeeded() const {
return (compactUnwindSection != nullptr);
}
// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame
void UnwindInfoSection::finalize() {
if (compactUnwindSection == nullptr)
return;
// At this point, the address space for __TEXT,__text has been
// assigned, so we can relocate the __LD,__compact_unwind entries
// into a temporary buffer. Relocation is necessary in order to sort
// the CU entries by function address. Sorting is necessary so that
// we can fold adjacent CU entries with identical
// encoding+personality+lsda. Folding is necessary because it reduces
// the number of CU entries by as much as 3 orders of magnitude!
compactUnwindSection->finalize();
assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry64) == 0);
size_t cuCount =
compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64);
cuVector.resize(cuCount);
// Relocate all __LD,__compact_unwind entries
compactUnwindSection->writeTo(reinterpret_cast<uint8_t *>(cuVector.data()));
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of pointers to entries and sort & fold that instead.
cuPtrVector.reserve(cuCount);
for (const auto &cuEntry : cuVector)
cuPtrVector.emplace_back(&cuEntry);
std::sort(cuPtrVector.begin(), cuPtrVector.end(),
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
return a->functionAddress < b->functionAddress;
});
// Fold adjacent entries with matching encoding+personality+lsda
// We use three iterators on the same cuPtrVector to fold in-situ:
// (1) `foldBegin` is the first of a potential sequence of matching entries
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
// entries that can be folded into a single entry and written to ...
// (3) `foldWrite`
auto foldWrite = cuPtrVector.begin();
for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) {
auto foldEnd = foldBegin;
while (++foldEnd < cuPtrVector.end() &&
(*foldBegin)->encoding == (*foldEnd)->encoding &&
(*foldBegin)->personality == (*foldEnd)->personality &&
(*foldBegin)->lsda == (*foldEnd)->lsda)
;
*foldWrite++ = *foldBegin;
foldBegin = foldEnd;
}
cuPtrVector.erase(foldWrite, cuPtrVector.end());
// Count frequencies of the folded encodings
llvm::DenseMap<compact_unwind_encoding_t, size_t> encodingFrequencies;
for (auto cuPtrEntry : cuPtrVector)
encodingFrequencies[cuPtrEntry->encoding]++;
if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX)
error("TODO(gkm): handle common encodings table overflow");
// Make a table of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
commonEncodings.emplace_back(frequency);
std::sort(commonEncodings.begin(), commonEncodings.end(),
[](const std::pair<compact_unwind_encoding_t, size_t> &a,
const std::pair<compact_unwind_encoding_t, size_t> &b) {
if (a.second == b.second)
// When frequencies match, secondarily sort on encoding
// to maintain parity with validate-unwind-info.py
return a.first > b.first;
return a.second > b.second;
});
// Split folded encodings into pages, limited by capacity of a page
// and the 24-bit range of function offset
//
// Record the page splits as a vector of iterators on cuPtrVector
// such that successive elements form a semi-open interval. E.g.,
// page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] )
//
// Note that pageBounds.size() is one greater than the number of
// pages, and pageBounds.back() holds the sentinel cuPtrVector.cend()
pageBounds.push_back(cuPtrVector.cbegin());
// TODO(gkm): cut 1st page entries short to accommodate section headers ???
CompactUnwindEntry64 cuEntryKey;
for (size_t i = 0;;) {
// Limit the search to entries that can fit within a 4 KiB page.
const auto pageBegin = pageBounds[0] + i;
const auto pageMax =
pageBounds[0] +
std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX,
cuPtrVector.size());
// Exclude entries with functionOffset that would overflow 24 bits
cuEntryKey.functionAddress = (*pageBegin)->functionAddress +
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
const auto pageBreak = std::lower_bound(
pageBegin, pageMax, &cuEntryKey,
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
return a->functionAddress < b->functionAddress;
});
pageBounds.push_back(pageBreak);
if (pageBreak == cuPtrVector.cend())
break;
i = pageBreak - cuPtrVector.cbegin();
}
// compute size of __TEXT,__unwind_info section
level2PagesOffset =
sizeof(unwind_info_section_header) +
commonEncodings.size() * sizeof(uint32_t) +
personalities.size() * sizeof(uint32_t) +
pageBounds.size() * sizeof(unwind_info_section_header_index_entry) +
lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
unwindInfoSize = level2PagesOffset +
(pageBounds.size() - 1) *
sizeof(unwind_info_compressed_second_level_page_header) +
cuPtrVector.size() * sizeof(uint32_t);
}
// All inputs are relocated and output adddresses are known, so write!
void UnwindInfoSection::writeTo(uint8_t *buf) const {
// section header
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
uip->version = 1;
uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header);
uip->commonEncodingsArrayCount = commonEncodings.size();
uip->personalityArraySectionOffset =
uip->commonEncodingsArraySectionOffset +
(uip->commonEncodingsArrayCount * sizeof(uint32_t));
uip->personalityArrayCount = personalities.size();
uip->indexSectionOffset = uip->personalityArraySectionOffset +
(uip->personalityArrayCount * sizeof(uint32_t));
uip->indexCount = pageBounds.size();
// Common encodings
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
for (const auto &encoding : commonEncodings)
*i32p++ = encoding.first;
// Personalities
for (const auto &personality : personalities)
*i32p++ = personality;
// Level-1 index
uint32_t lsdaOffset =
uip->indexSectionOffset +
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
for (size_t i = 0; i < pageBounds.size() - 1; i++) {
iep->functionOffset = (*pageBounds[i])->functionAddress;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset = lsdaOffset;
iep++;
// TODO(gkm): pad to 4 KiB page boundary ???
size_t entryCount = pageBounds[i + 1] - pageBounds[i];
uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) +
entryCount * sizeof(uint32_t);
l2PagesOffset += pageSize;
}
// Level-1 sentinel
const CompactUnwindEntry64 &cuEnd = cuVector.back();
iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;
iep->lsdaIndexArraySectionOffset = lsdaOffset;
iep++;
// LSDAs
auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
for (const auto &lsda : lsdaEntries) {
lep->functionOffset = lsda.functionOffset;
lep->lsdaOffset = lsda.lsdaOffset;
}
// create map from encoding to common-encoding-table index compact
// encoding entries use 7 bits to index the common-encoding table
size_t i = 0;
llvm::DenseMap<compact_unwind_encoding_t, size_t> commonEncodingIndexes;
for (const auto &encoding : commonEncodings)
commonEncodingIndexes[encoding.first] = i++;
// Level-2 pages
auto *p2p =
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(lep);
for (size_t i = 0; i < pageBounds.size() - 1; i++) {
p2p->kind = UNWIND_SECOND_LEVEL_COMPRESSED;
p2p->entryPageOffset =
sizeof(unwind_info_compressed_second_level_page_header);
p2p->entryCount = pageBounds[i + 1] - pageBounds[i];
p2p->encodingsPageOffset =
p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
p2p->encodingsCount = 0;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
auto cuPtrVectorIt = pageBounds[i];
uintptr_t functionAddressBase = (*cuPtrVectorIt)->functionAddress;
while (cuPtrVectorIt < pageBounds[i + 1]) {
const CompactUnwindEntry64 *cuep = *cuPtrVectorIt++;
size_t cueIndex = commonEncodingIndexes.lookup(cuep->encoding);
*ep++ = ((cueIndex << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
(cuep->functionAddress - functionAddressBase));
}
p2p =
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(ep);
}
assert(getSize() ==
static_cast<size_t>((reinterpret_cast<uint8_t *>(p2p) - buf)));
}

View File

@ -0,0 +1,84 @@
//===- UnwindInfoSection.h ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_UNWIND_INFO_H
#define LLD_MACHO_UNWIND_INFO_H
#include "MergedOutputSection.h"
#include "SyntheticSections.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/DenseMap.h"
#include <vector>
// In 2020, we mostly care about 64-bit targets: x86_64 and arm64
struct CompactUnwindEntry64 {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
uint64_t personality;
uint64_t lsda;
};
// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm
struct CompactUnwindEntry32 {
uint32_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
uint32_t personality;
uint32_t lsda;
};
namespace lld {
namespace macho {
class UnwindInfoSection : public SyntheticSection {
public:
UnwindInfoSection();
uint64_t getSize() const override { return unwindInfoSize; }
bool isNeeded() const override;
void finalize() override;
void writeTo(uint8_t *buf) const override;
void setCompactUnwindSection(MergedOutputSection *cuSection) {
compactUnwindSection = cuSection;
}
private:
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
std::vector<uint32_t> personalities;
std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
std::vector<CompactUnwindEntry64> cuVector;
std::vector<const CompactUnwindEntry64 *> cuPtrVector;
std::vector<std::vector<const CompactUnwindEntry64 *>::const_iterator>
pageBounds;
MergedOutputSection *compactUnwindSection = nullptr;
uint64_t level2PagesOffset = 0;
uint64_t unwindInfoSize = 0;
};
#define UNWIND_INFO_COMMON_ENCODINGS_MAX 127
#define UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE 4096
#define UNWIND_INFO_REGULAR_SECOND_LEVEL_ENTRIES_MAX \
((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
sizeof(unwind_info_regular_second_level_page_header)) / \
sizeof(unwind_info_regular_second_level_entry))
#define UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
sizeof(unwind_info_compressed_second_level_page_header)) / \
sizeof(uint32_t))
#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
} // namespace macho
} // namespace lld
#endif

View File

@ -17,6 +17,7 @@
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "UnwindInfoSection.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
@ -57,6 +58,7 @@ public:
MachHeaderSection *header = nullptr;
StringTableSection *stringTableSection = nullptr;
SymtabSection *symtabSection = nullptr;
UnwindInfoSection *unwindInfoSection = nullptr;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@ -414,8 +416,11 @@ static int sectionOrder(OutputSection *osec) {
StringRef segname = osec->parent->name;
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
if (osec->name == section_names::header)
return -1;
return StringSwitch<int>(osec->name)
.Case(section_names::header, -1)
.Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
.Default(0);
} else if (segname == segment_names::linkEdit) {
return StringSwitch<int>(osec->name)
.Case(section_names::binding, -6)
@ -472,6 +477,7 @@ static void sortSegmentsAndSections() {
void Writer::createOutputSections() {
// First, create hidden sections
stringTableSection = make<StringTableSection>();
unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
symtabSection = make<SymtabSection>(*stringTableSection);
switch (config->outputType) {
@ -498,7 +504,11 @@ void Writer::createOutputSections() {
for (const auto &it : mergedOutputSections) {
StringRef segname = it.first.first;
MergedOutputSection *osec = it.second;
getOrCreateOutputSegment(segname)->addOutputSection(osec);
if (unwindInfoSection && segname == segment_names::ld) {
assert(osec->name == section_names::compactUnwind);
unwindInfoSection->setCompactUnwindSection(osec);
} else
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
for (SyntheticSection *ssec : syntheticSections) {

View File

@ -0,0 +1,21 @@
# REQUIRES: x86
# FIXME(gkm): This test is fast on a Release tree, and slow (~10s) on
# a Debug tree mostly because of llvm-mc. Is there a way to prefer the
# fast installed llvm-mc rather than the slow one in our Debug tree?
# If headers and offsets are proper, then ...
#
# 1) llvm-objdump will not crash, and exit with good status
#
# 2) Summary encodings from the input object will match
# those from the linked output
#
# 3) Encodings & symbols from the intput object will match
# those from the linked output
# RUN: %python %S/tools/generate-cfi-funcs.py --seed=johnnyapple >%t.s
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -o %t.o %t.s
# RUN: lld -flavor darwinnew -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t %t.o
# RUN: llvm-objdump --unwind-info --syms %t %t.o >%t.dump
# RUN: %python %S/tools/validate-unwind-info.py %t.dump

View File

@ -0,0 +1,135 @@
#!/usr/bin/env python
"""Generate skeletal functions with a variety .cfi_ directives.
The purpose is to produce object-file test inputs to lld with a
variety of compact unwind encodings.
"""
import random
import argparse
import string
from math import factorial
from itertools import permutations
lsda_n = 0
lsda_odds = 0.0
func_size_low = 0x10
func_size_high = 0x100
saved_regs = ["%r15", "%r14", "%r13", "%r12", "%rbx"]
saved_regs_combined = list(list(permutations(saved_regs, i))
for i in range(0,6))
def print_function(name: str):
global lsda_odds
have_lsda = (random.random() < lsda_odds)
frame_size = random.randint(4, 64) * 16
frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
reg_count = random.randint(0, 4)
reg_combo = random.randint(0, factorial(reg_count) - 1)
regs_saved = saved_regs_combined[reg_count][reg_combo]
global func_size_low, func_size_high
func_size = random.randint(func_size_low, func_size_high) * 0x10
func_size_high += 1
if func_size_high % 0x10 == 0:
func_size_low += 1
print(f"""\
### {name} regs={reg_count} frame={frame_size} lsda={have_lsda} size={func_size}
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
.globl {name}
{name}:
.cfi_startproc""")
if have_lsda:
global lsda_n
lsda_n += 1
print(f"""\
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception{lsda_n}""")
print(f"""\
pushq %rbp
.cfi_def_cfa_offset {frame_size}
.cfi_offset %rbp, {frame_offset+(6*8)}
movq %rsp, %rbp
.cfi_def_cfa_register %rbp""")
for i in range(reg_count):
print(f".cfi_offset {regs_saved[i]}, {frame_offset+(i*8)}")
print(f"""\
.fill {func_size - 6}
popq %rbp
retq
.cfi_endproc
""")
if have_lsda:
print(f"""\
.section __TEXT,__gcc_except_tab
.p2align 2
Lexception{lsda_n}:
.space 0x10
""")
return func_size
def random_seed():
"""Generate a seed that can easily be passsed back in via --seed=STRING"""
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))
def main():
parser = argparse.ArgumentParser(
description=__doc__,
epilog="""\
Function sizes begin small then monotonically increase. The goal is
to produce early pages that are full and later pages that are less
than full, in order to test handling for both cases. Full pages
contain the maximum of 1021 compact unwind entries for a total page
size = 4 KiB.
Use --pages=N or --functions=N to control the size of the output.
Default is --pages=2, meaning produce at least two full pages of
compact unwind entries, plus some more. The calculatation is sloppy.
""")
parser.add_argument('--seed', type=str, default=random_seed(),
help='Seed the random number generator')
parser.add_argument('--pages', type=int, default=2,
help='Number of compact-unwind pages')
parser.add_argument('--functions', type=int, default=None,
help='Number of functions to generate')
parser.add_argument('--encodings', type=int, default=127,
help='Maximum number of unique unwind encodings (default = 127)')
parser.add_argument('--lsda', type=int, default=0,
help='Percentage of functions with personality & LSDA (default = 10')
args = parser.parse_args()
random.seed(args.seed)
p2align = 14
global lsda_odds
lsda_odds = args.lsda / 100.0
print(f"""\
### seed={args.seed} lsda={lsda_odds} p2align={p2align}
.section __TEXT,__text,regular,pure_instructions
.p2align {p2align}, 0x90
""")
size = 0
base = (1 << p2align)
if args.functions:
for n in range(args.functions):
size += print_function(f"x{size+base:08x}")
else:
while size < (args.pages << 24):
size += print_function(f"x{size+base:08x}")
print(f"""\
.section __TEXT,__text,regular,pure_instructions
.globl _main
.p2align 4, 0x90
_main:
retq
.p2align 4, 0x90
___gxx_personality_v0:
retq
""")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,96 @@
#!/usr/bin/env python
"""Validate compact unwind info by cross checking the llvm-objdump
reports of the input object file vs final linked output.
"""
import sys
import argparse
import re
from pprint import pprint
def main():
hex = "[a-f\d]"
hex8 = hex + "{8}"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('files', metavar='FILES', nargs='*',
help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
if args.files:
objdump_string = ''.join([open(f).read() for f in args.files])
else:
objdump_string = sys.stdin.read()
object_encodings_list = [(symbol, encoding, personality, lsda)
for symbol, encoding, personality, lsda in
re.findall(rf"start:\s+0x{hex}+\s+(\w+)\s+" +
rf"length:\s+0x{hex}+\s+" +
rf"compact encoding:\s+0x({hex}+)(?:\s+" +
rf"personality function:\s+0x({hex}+)\s+\w+\s+" +
rf"LSDA:\s+0x({hex}+)\s+\w+(?: \+ 0x{hex}+)?)?",
objdump_string, re.DOTALL)]
object_encodings_map = {symbol:encoding
for symbol, encoding, _, _ in object_encodings_list}
if not object_encodings_map:
sys.exit("no object encodings found in input")
program_symbols_map = {address:symbol
for address, symbol in
re.findall(rf"^{hex8}({hex8}) g\s+F __TEXT,__text (x\1)$",
objdump_string, re.MULTILINE)}
if not program_symbols_map:
sys.exit("no program symbols found in input")
program_common_encodings = (
re.findall(rf"^\s+encoding\[\d+\]: 0x({hex}+)$",
objdump_string, re.MULTILINE))
if not program_common_encodings:
sys.exit("no common encodings found in input")
program_encodings_map = {program_symbols_map[address]:encoding
for address, encoding in
re.findall(rf"^\s+\[\d+\]: function offset=0x({hex}+), " +
rf"encoding\[\d+\]=0x({hex}+)$",
objdump_string, re.MULTILINE)}
if not object_encodings_map:
sys.exit("no program encodings found in input")
# Fold adjacent entries from the object file that have matching encodings
# TODO(gkm) add check for personality+lsda
encoding0 = 0
for symbol in sorted(object_encodings_map):
encoding = object_encodings_map[symbol]
fold = (encoding == encoding0)
if fold:
del object_encodings_map[symbol]
if args.debug:
print(f"{'delete' if fold else 'retain'} {symbol} with {encoding}")
encoding0 = encoding
if program_encodings_map != object_encodings_map:
if args.debug:
pprint(f"program encodings map:\n{program_encodings_map}")
pprint(f"object encodings map:\n{object_encodings_map}")
sys.exit("encoding maps differ")
# Count frequency of object-file folded encodings
# and compare with the program-file common encodings table
encoding_frequency_map = {}
for _, encoding in object_encodings_map.items():
encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
encoding_frequencies = [x for x in
sorted(encoding_frequency_map,
key=lambda x: (encoding_frequency_map.get(x), x),
reverse=True)]
if program_common_encodings != encoding_frequencies:
if args.debug:
pprint(f"program common encodings:\n{program_common_encodings}")
pprint(f"object encoding frequencies:\n{encoding_frequencies}")
sys.exit("encoding frequencies differ")
if __name__ == '__main__':
main()