[lld-macho] Associate compact unwind entries with function symbols

Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.

In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.

The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.

Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D109944
This commit is contained in:
Jez Ng 2021-10-26 16:04:04 -04:00
parent 4faf88cc14
commit 002eda7056
18 changed files with 331 additions and 211 deletions

View File

@ -330,6 +330,7 @@ void ConcatOutputSection::finalize() {
/*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
/*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
thunkInfo.sym->used = true;
target->populateThunk(thunkInfo.isec, funcSym);
finalizeOne(thunkInfo.isec);
thunks.push_back(thunkInfo.isec);

View File

@ -1006,7 +1006,6 @@ static void gatherInputSections() {
continue;
if (isec->getSegName() == segment_names::ld) {
assert(isec->getName() == section_names::compactUnwind);
in.unwindInfo->addInput(isec);
continue;
}
isec->outSecOff = inputOrder++;

View File

@ -309,22 +309,6 @@ void ICF::segregate(
}
}
template <class Ptr>
DenseSet<const InputSection *> findFunctionsWithUnwindInfo() {
DenseSet<const InputSection *> result;
for (ConcatInputSection *isec : in.unwindInfo->getInputs()) {
for (size_t i = 0; i < isec->relocs.size(); ++i) {
Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, functionAddress))
continue;
result.insert(r.referent.get<InputSection *>());
}
}
return result;
}
void macho::foldIdenticalSections() {
TimeTraceScope timeScope("Fold Identical Code Sections");
// The ICF equivalence-class segregation algorithm relies on pre-computed
@ -334,11 +318,6 @@ void macho::foldIdenticalSections() {
// parallelization. Therefore, we hash every InputSection here where we have
// them all accessible as simple vectors.
// ICF can't fold functions with unwind info
DenseSet<const InputSection *> functionsWithUnwindInfo =
target->wordSize == 8 ? findFunctionsWithUnwindInfo<uint64_t>()
: findFunctionsWithUnwindInfo<uint32_t>();
// If an InputSection is ineligible for ICF, we give it a unique ID to force
// it into an unfoldable singleton equivalence class. Begin the unique-ID
// space at inputSections.size(), so that it will never intersect with
@ -351,9 +330,15 @@ void macho::foldIdenticalSections() {
for (ConcatInputSection *isec : inputSections) {
// FIXME: consider non-code __text sections as hashable?
bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
!isec->shouldOmitFromOutput() &&
!functionsWithUnwindInfo.contains(isec) &&
isec->isHashableForICF();
!isec->shouldOmitFromOutput() && isec->isHashableForICF();
// ICF can't fold functions with unwind info
if (isHashable)
for (Defined *d : isec->symbols)
if (d->compactUnwind) {
isHashable = false;
break;
}
if (isHashable)
hashable.push_back(isec);
else

View File

@ -235,9 +235,13 @@ InputFile::InputFile(Kind kind, const InterfaceFile &interface)
// Note that "record" is a term I came up with. In contrast, "literal" is a term
// used by the Mach-O format.
static Optional<size_t> getRecordSize(StringRef segname, StringRef name) {
if (name == section_names::cfString)
if (name == section_names::cfString) {
if (config->icfLevel != ICFLevel::none && segname == segment_names::data)
return target->wordSize == 8 ? 32 : 16;
} else if (name == section_names::compactUnwind) {
if (segname == segment_names::ld)
return target->wordSize == 8 ? 32 : 20;
}
return {};
}
@ -562,7 +566,6 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
}
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
return make<Defined>(
@ -662,11 +665,11 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
InputSection *isec = subsecMap.back().isec;
// __cfstring has already been split into subsections during
InputSection *lastIsec = subsecMap.back().isec;
// Record-based sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
if (config->icfLevel != ICFLevel::none && isCfStringSection(isec)) {
if (getRecordSize(lastIsec->getSegName(), lastIsec->getName())) {
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
@ -674,7 +677,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
uint64_t symbolOffset = sym.n_value - sectionAddr;
InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset);
if (symbolOffset != 0) {
error(toString(this) + ": __cfstring contains symbol " + name +
error(toString(lastIsec) + ": symbol " + name +
" at misaligned offset");
continue;
}
@ -719,7 +722,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
auto *concatIsec = cast<ConcatInputSection>(isec);
auto *nextIsec = make<ConcatInputSection>(*concatIsec);
nextIsec->numRefs = 0;
nextIsec->wasCoalesced = false;
if (isZeroFill(isec->getFlags())) {
// Zero-fill sections have NULL data.data() non-zero data.size()
@ -830,6 +832,7 @@ template <class LP> void ObjFile::parse() {
parseDebugInfo();
if (config->emitDataInCodeInfo)
parseDataInCode();
registerCompactUnwind();
}
void ObjFile::parseDebugInfo() {
@ -870,6 +873,52 @@ void ObjFile::parseDataInCode() {
}));
}
// Create pointers from symbols to their associated compact unwind entries.
void ObjFile::registerCompactUnwind() {
// First, locate the __compact_unwind section.
SubsectionMap *cuSubsecMap = nullptr;
for (SubsectionMap &map : subsections) {
if (map.empty())
continue;
if (map[0].isec->getSegName() != segment_names::ld)
continue;
cuSubsecMap = &map;
break;
}
if (!cuSubsecMap)
return;
for (SubsectionEntry &entry : *cuSubsecMap) {
ConcatInputSection *isec = cast<ConcatInputSection>(entry.isec);
ConcatInputSection *referentIsec;
for (const Reloc &r : isec->relocs) {
if (r.offset != 0)
continue;
uint64_t add = r.addend;
if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
add += sym->value;
referentIsec = cast<ConcatInputSection>(sym->isec);
} else {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
}
// The functionAddress relocations are typically section relocations.
// However, unwind info operates on a per-symbol basis, so we search for
// the function symbol here.
auto it = llvm::lower_bound(
referentIsec->symbols, add,
[](Defined *d, uint64_t add) { return d->value < add; });
// The relocation should point at the exact address of a symbol (with no
// addend).
if (it == referentIsec->symbols.end() || (*it)->value != add) {
assert(referentIsec->wasCoalesced);
continue;
}
(*it)->compactUnwind = isec;
}
}
}
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);

View File

@ -119,6 +119,7 @@ private:
SubsectionMap &);
void parseDebugInfo();
void parseDataInCode();
void registerCompactUnwind();
};
// command-line -sectcreate file

View File

@ -93,9 +93,33 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
align = std::max(align, copy->align);
copy->live = false;
copy->wasCoalesced = true;
numRefs += copy->numRefs;
copy->numRefs = 0;
copy->replacement = this;
// Merge the sorted vectors of symbols together.
auto it = symbols.begin();
for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) {
if (it == symbols.end()) {
symbols.push_back(*copyIt++);
it = symbols.end();
} else if ((*it)->value > (*copyIt)->value) {
std::swap(*it++, *copyIt);
} else {
++it;
}
}
copy->symbols.clear();
// Remove duplicate compact unwind info for symbols at the same address.
if (symbols.size() == 0)
return;
it = symbols.begin();
uint64_t v = (*it)->value;
for (++it; it != symbols.end(); ++it) {
if ((*it)->value == v)
(*it)->compactUnwind = nullptr;
else
v = (*it)->value;
}
}
void ConcatInputSection::writeTo(uint8_t *buf) {

View File

@ -11,12 +11,14 @@
#include "Config.h"
#include "Relocations.h"
#include "Symbols.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/BinaryFormat/MachO.h"
namespace lld {
@ -24,7 +26,6 @@ namespace macho {
class InputFile;
class OutputSection;
class Defined;
class InputSection {
public:
@ -61,6 +62,9 @@ public:
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
// The symbols that belong to this InputSection, sorted by value. With
// .subsections_via_symbols, there is typically only one element here.
llvm::TinyPtrVector<Defined *> symbols;
protected:
// The fields in this struct are immutable. Since we create a lot of
@ -84,6 +88,10 @@ protected:
: align(align), callSiteCount(0), isFinal(false), data(data),
shared(make<Shared>(file, name, segname, flags, kind)) {}
InputSection(const InputSection &rhs)
: align(rhs.align), callSiteCount(0), isFinal(false), data(rhs.data),
shared(rhs.shared) {}
const Shared *const shared;
};
@ -107,7 +115,7 @@ public:
// ConcatInputSections are entirely live or dead, so the offset is irrelevant.
bool isLive(uint64_t off) const override { return live; }
void markLive(uint64_t off) override { live = true; }
bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
bool isCoalescedWeak() const { return wasCoalesced && symbols.size() == 0; }
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
bool isHashableForICF() const;
void hashForICF();
@ -134,8 +142,6 @@ public:
// first and not copied to the output.
bool wasCoalesced = false;
bool live = !config->deadStrip;
// How many symbols refer to this InputSection.
uint32_t numRefs = 0;
// This variable has two usages. Initially, it represents the input order.
// After assignAddresses is called, it represents the offset from the
// beginning of the output section this section was assigned to.

View File

@ -45,10 +45,15 @@ void markLive() {
};
auto addSym = [&](Symbol *s) {
if (s->used)
return;
s->used = true;
if (auto *d = dyn_cast<Defined>(s))
if (auto *d = dyn_cast<Defined>(s)) {
if (d->isec)
enqueue(d->isec, d->value);
if (d->compactUnwind)
enqueue(d->compactUnwind, 0);
}
};
// Add GC roots.
@ -118,31 +123,6 @@ void markLive() {
}
}
// Dead strip runs before UnwindInfoSection handling so we need to keep
// __LD,__compact_unwind alive here.
// But that section contains absolute references to __TEXT,__text and
// keeps most code alive due to that. So we can't just enqueue() the
// section: We must skip the relocations for the functionAddress
// in each CompactUnwindEntry.
// See also scanEhFrameSection() in lld/ELF/MarkLive.cpp.
for (ConcatInputSection *isec : in.unwindInfo->getInputs()) {
isec->live = true;
const int compactUnwindEntrySize =
target->wordSize == 8 ? sizeof(CompactUnwindEntry<uint64_t>)
: sizeof(CompactUnwindEntry<uint32_t>);
for (const Reloc &r : isec->relocs) {
// This is the relocation for the address of the function itself.
// Ignore it, else these would keep everything alive.
if (r.offset % compactUnwindEntrySize == 0)
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>())
addSym(s);
else
enqueue(r.referent.get<InputSection *>(), r.addend);
}
}
do {
// Mark things reachable from GC roots as live.
while (!worklist.empty()) {
@ -156,6 +136,8 @@ void markLive() {
else
enqueue(r.referent.get<InputSection *>(), r.addend);
}
for (Defined *d : s->symbols)
addSym(d);
}
// S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section.

View File

@ -10,6 +10,7 @@
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "lld/Common/ErrorHandler.h"

View File

@ -31,26 +31,33 @@ uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
bool Symbol::isLive() const {
if (isa<DylibSymbol>(this) || isa<Undefined>(this))
return used;
if (auto *d = dyn_cast<Defined>(this)) {
// Non-absolute symbols might be alive because their section is
// no_dead_strip or live_support. In that case, the section will know
// that it's live but `used` might be false. Non-absolute symbols always
// have to use the section's `live` bit as source of truth.
if (d->isAbsolute())
return used;
return d->isec->canonical()->isLive(d->value);
Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
uint64_t value, uint64_t size, bool isWeakDef, bool isExternal,
bool isPrivateExtern, bool isThumb,
bool isReferencedDynamically, bool noDeadStrip)
: Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
overridesWeakDef(false), privateExtern(isPrivateExtern),
includeInSymtab(true), thumb(isThumb),
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
weakDef(isWeakDef), external(isExternal) {
if (isec) {
isec->symbols.push_back(this);
// Maintain sorted order.
for (auto it = isec->symbols.rbegin(), rend = isec->symbols.rend();
it != rend; ++it) {
auto next = std::next(it);
if (next == rend)
break;
if ((*it)->value < (*next)->value)
std::swap(*next, *it);
else
break;
}
}
}
assert(!isa<CommonSymbol>(this) &&
"replaceCommonSymbols() runs before dead code stripping, and isLive() "
"should only be called after dead code stripping");
// Assume any other kind of symbol is live.
return true;
bool Defined::isTlv() const {
return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
}
uint64_t Defined::getVA() const {

View File

@ -9,8 +9,8 @@
#ifndef LLD_MACHO_SYMBOLS_H
#define LLD_MACHO_SYMBOLS_H
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
@ -20,7 +20,6 @@
namespace lld {
namespace macho {
class InputSection;
class MachHeaderSection;
struct StringRefZ {
@ -51,7 +50,7 @@ public:
return {nameData, nameSize};
}
bool isLive() const;
bool isLive() const { return used; }
virtual uint64_t getVA() const { return 0; }
@ -114,23 +113,13 @@ class Defined : public Symbol {
public:
Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
: Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
overridesWeakDef(false), privateExtern(isPrivateExtern),
includeInSymtab(true), thumb(isThumb),
referencedDynamically(isReferencedDynamically),
noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
concatIsec->numRefs++;
}
bool isThumb, bool isReferencedDynamically, bool noDeadStrip);
bool isWeakDef() const override { return weakDef; }
bool isExternalWeakDef() const {
return isWeakDef() && isExternal() && !privateExtern;
}
bool isTlv() const override {
return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
}
bool isTlv() const override;
bool isExternal() const { return external; }
bool isAbsolute() const { return isec == nullptr; }
@ -145,6 +134,7 @@ public:
uint64_t value;
// size is only calculated for regular (non-bitcode) symbols.
uint64_t size;
ConcatInputSection *compactUnwind = nullptr;
bool overridesWeakDef : 1;
// Whether this symbol should appear in the output binary's export trie.

View File

@ -621,14 +621,14 @@ void StubHelperSection::setup() {
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
// setting `live` to true on the backing InputSection.
// argument doesn't matter.
dyldPrivate =
make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0,
/*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
/*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
dyldPrivate->used = true;
}
LazyPointerSection::LazyPointerSection()

View File

@ -107,7 +107,7 @@ template <class Ptr>
class UnwindInfoSectionImpl final : public UnwindInfoSection {
public:
void prepareRelocations(ConcatInputSection *) override;
void addInput(ConcatInputSection *) override;
void addSymbol(const Defined *) override;
void finalize() override;
void writeTo(uint8_t *buf) const override;
@ -141,11 +141,11 @@ void UnwindInfoSection::prepareRelocations() {
}
template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
assert(isec->getSegName() == segment_names::ld &&
isec->getName() == section_names::compactUnwind);
isec->parent = compactUnwindSection;
compactUnwindSection->addInput(isec);
void UnwindInfoSectionImpl<Ptr>::addSymbol(const Defined *d) {
if (d->compactUnwind) {
d->compactUnwind->parent = compactUnwindSection;
compactUnwindSection->addInput(d->compactUnwind);
}
}
// Compact unwind relocations have different semantics, so we handle them in a
@ -255,9 +255,6 @@ static ConcatInputSection *checkTextSegment(InputSection *isec) {
return cast<ConcatInputSection>(isec);
}
template <class Ptr>
constexpr Ptr TombstoneValue = std::numeric_limits<Ptr>::max();
// We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
@ -274,7 +271,7 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
memcpy(buf, isec->data.data(), isec->data.size());
for (const Reloc &r : isec->relocs) {
uint64_t referentVA = TombstoneValue<Ptr>;
uint64_t referentVA = 0;
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
if (!isa<Undefined>(referentSym)) {
if (auto *defined = dyn_cast<Defined>(referentSym))
@ -333,8 +330,7 @@ static void addEntriesForFunctionsWithoutUnwindInfo(
std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
DenseSet<Ptr> hasUnwindInfo;
for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
if (cuEntry.functionAddress != TombstoneValue<Ptr>)
hasUnwindInfo.insert(cuEntry.functionAddress);
hasUnwindInfo.insert(cuEntry.functionAddress);
// Add explicit "has no unwind info" entries for all global and local symbols
// without unwind info.
@ -414,28 +410,6 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
return a->functionAddress < b->functionAddress;
});
// Dead-stripped functions get a functionAddress of TombstoneValue in
// relocateCompactUnwind(). Filter them out here.
// FIXME: This doesn't yet collect associated data like LSDAs kept
// alive only by a now-removed CompactUnwindEntry or other comdat-like
// data (`kindNoneGroupSubordinate*` in ld64).
CompactUnwindEntry<Ptr> tombstone;
tombstone.functionAddress = TombstoneValue<Ptr>;
cuPtrVector.erase(
std::lower_bound(cuPtrVector.begin(), cuPtrVector.end(), &tombstone,
[](const CompactUnwindEntry<Ptr> *a,
const CompactUnwindEntry<Ptr> *b) {
return a->functionAddress < b->functionAddress;
}),
cuPtrVector.end());
// If there are no entries left after adding explicit "no unwind info"
// entries and removing entries for dead-stripped functions, don't write
// an __unwind_info section at all.
assert(allEntriesAreOmitted == cuPtrVector.empty());
if (cuPtrVector.empty())
return;
// Fold adjacent entries with matching encoding+personality+lsda
// We use three iterators on the same cuPtrVector to fold in-situ:
// (1) `foldBegin` is the first of a potential sequence of matching entries
@ -608,7 +582,6 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
}
// Level-1 sentinel
const CompactUnwindEntry<Ptr> &cuEnd = *cuPtrVector.back();
assert(cuEnd.functionAddress != TombstoneValue<Ptr>);
iep->functionOffset =
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;

View File

@ -31,7 +31,7 @@ public:
return !compactUnwindSection->inputs.empty() && !allEntriesAreOmitted;
}
uint64_t getSize() const override { return unwindInfoSize; }
virtual void addInput(ConcatInputSection *) = 0;
virtual void addSymbol(const Defined *) = 0;
std::vector<ConcatInputSection *> getInputs() {
return compactUnwindSection->inputs;
}

View File

@ -673,8 +673,11 @@ void Writer::scanSymbols() {
TimeTraceScope timeScope("Scan symbols");
for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) {
if (defined->overridesWeakDef && defined->isLive())
if (!defined->isLive())
continue;
if (defined->overridesWeakDef)
in.weakBinding->addNonWeakDefinition(defined);
in.unwindInfo->addSymbol(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
// This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup())
@ -683,6 +686,15 @@ void Writer::scanSymbols() {
std::max(dysym->getFile()->refState, dysym->getRefState());
}
}
for (const InputFile *file : inputFiles) {
if (auto *objFile = dyn_cast<ObjFile>(file))
for (Symbol *sym : objFile->symbols) {
if (auto *defined = dyn_cast_or_null<Defined>(sym))
if (!defined->isExternal() && defined->isLive())
in.unwindInfo->addSymbol(defined);
}
}
}
// TODO: ld64 enforces the old load commands in a few other cases.
@ -1101,6 +1113,7 @@ template <class LP> void Writer::run() {
treatSpecialUndefineds();
if (config->entry && !isa<Undefined>(config->entry))
prepareBranchTarget(config->entry);
scanSymbols();
scanRelocations();
// Do not proceed if there was an undefined symbol.
@ -1109,7 +1122,6 @@ template <class LP> void Writer::run() {
if (in.stubHelper->isNeeded())
in.stubHelper->setup();
scanSymbols();
createOutputSections<LP>();
// After this point, we create no new segments; HOWEVER, we might

View File

@ -28,21 +28,21 @@
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --bind %t/a.out | FileCheck %s --check-prefixes=A,CHECK -D#%x,OFF=0x100000000
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --bind %t/b.out | FileCheck %s --check-prefixes=BC,CHECK -D#%x,OFF=0x100000000
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --bind %t/c.out | FileCheck %s --check-prefixes=BC,CHECK -D#%x,OFF=0
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --bind %t/c.out | FileCheck %s --check-prefixes=BC,C,CHECK -D#%x,OFF=0
# A: Indirect symbols for (__DATA_CONST,__got)
# A-NEXT: address index name
# A-DAG: 0x[[#%x,GXX_PERSONALITY_LO:]] [[#]] ___gxx_personality_v0
# A-DAG: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0
# A-DAG: 0x[[#%x,PERSONALITY_1:]] LOCAL
# A-DAG: 0x[[#%x,PERSONALITY_2:]] LOCAL
# A: 0x[[#%x,GXX_PERSONALITY_LO:]] [[#]] ___gxx_personality_v0
# A: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0
# A: 0x[[#%x,PERSONALITY_1:]] LOCAL
# A: 0x[[#%x,PERSONALITY_2:]] LOCAL
# BC: Indirect symbols for (__DATA_CONST,__got)
# BC-NEXT: address index name
# BC-DAG: 0x[[#%x,GXX_PERSONALITY_HI:]] LOCAL
# BC-DAG: 0x[[#%x,GXX_PERSONALITY_LO:]] LOCAL
# BC-DAG: 0x[[#%x,PERSONALITY_1:]] LOCAL
# BC-DAG: 0x[[#%x,PERSONALITY_2:]] LOCAL
# C: 0x[[#%x,GXX_PERSONALITY_HI:]] LOCAL
# BC: 0x[[#%x,GXX_PERSONALITY_LO:]] LOCAL
# BC: 0x[[#%x,PERSONALITY_1:]] LOCAL
# BC: 0x[[#%x,PERSONALITY_2:]] LOCAL
# CHECK: Personality functions: (count = 3)
# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY_LO-OFF]]

View File

@ -248,6 +248,7 @@
# UNWIND-NEXT: *UND* ___gxx_personality_v0
# UNWIND-NEXT: *UND* ___cxa_begin_catch
# UNWIND-NEXT: *UND* dyld_stub_binder
# UNWIND-NOT: GCC_except_table0
## If a dead stripped function has a strong ref to a dylib symbol but
## a live function only a weak ref, the dylib is still not a WEAK_DYLIB.
@ -636,7 +637,13 @@ _baz_refd:
#--- unwind.s
## This is the output of `clang -O2 -S throw.cc` where throw.cc
## looks like this:
## void unref() {}
## int unref() {
## try {
## throw 0;
## } catch (int i) {
## return i + 1;
## }
## }
## int main() {
## try {
## throw 0;
@ -645,92 +652,175 @@ _baz_refd:
## }
## }
.section __TEXT,__text,regular,pure_instructions
.globl __Z5unrefv
.globl __Z5unrefv ## -- Begin function _Z5unrefv
.p2align 4, 0x90
__Z5unrefv:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main:
__Z5unrefv: ## @_Z5unrefv
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception0
pushq %rbp
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
.cfi_offset %rbx, -24
movl $4, %edi
callq ___cxa_allocate_exception
movl $0, (%rax)
subq $16, %rsp
movl $4, %edi
callq ___cxa_allocate_exception
movl $0, (%rax)
Ltmp0:
movq __ZTIi@GOTPCREL(%rip), %rsi
movq %rax, %rdi
xorl %edx, %edx
callq ___cxa_throw
movq __ZTIi@GOTPCREL(%rip), %rsi
movq %rax, %rdi
xorl %edx, %edx
callq ___cxa_throw
Ltmp1:
## %bb.1:
ud2
LBB1_2:
LBB0_2:
Ltmp2:
movq %rax, %rdi
callq ___cxa_begin_catch
movl (%rax), %ebx
callq ___cxa_end_catch
movl %ebx, %eax
addq $8, %rsp
popq %rbx
popq %rbp
leaq -4(%rbp), %rcx
movq %rax, %rdi
movl %edx, %esi
movq %rcx, %rdx
callq __Z5unrefv.cold.1
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table1:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table0:
Lexception0:
.byte 255 ## @LPStart Encoding = omit
.byte 155 ## @TType Encoding = indirect pcrel sdata4
.byte 255 ## @LPStart Encoding = omit
.byte 155 ## @TType Encoding = indirect pcrel sdata4
.uleb128 Lttbase0-Lttbaseref0
Lttbaseref0:
.byte 1 ## Call site Encoding = uleb128
.byte 1 ## Call site Encoding = uleb128
.uleb128 Lcst_end0-Lcst_begin0
Lcst_begin0:
.uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 <<
.uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
.uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 <<
.uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1
.uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2
.byte 1 ## On action: 1
.uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 <<
.uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
.uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 <<
.uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
.uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 <<
.uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1
.uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2
.byte 1 ## On action: 1
.uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 <<
.uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
Lcst_end0:
.byte 1 ## >> Action Record 1 <<
## Catch TypeInfo 1
.byte 0 ## No further actions
.byte 1 ## >> Action Record 1 <<
## Catch TypeInfo 1
.byte 0 ## No further actions
.p2align 2
## >> Catch TypeInfos <<
.long __ZTIi@GOTPCREL+4 ## TypeInfo 1
## >> Catch TypeInfos <<
.long __ZTIi@GOTPCREL+4 ## TypeInfo 1
Lttbase0:
.p2align 2
## -- End function
## -- End function
.section __TEXT,__text,regular,pure_instructions
.globl _main ## -- Begin function main
.p2align 4, 0x90
_main: ## @main
Lfunc_begin1:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception1
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
.cfi_offset %rbx, -24
movl $4, %edi
callq ___cxa_allocate_exception
movl $0, (%rax)
Ltmp3:
movq __ZTIi@GOTPCREL(%rip), %rsi
movq %rax, %rdi
xorl %edx, %edx
callq ___cxa_throw
Ltmp4:
## %bb.1:
ud2
LBB1_2:
Ltmp5:
movq %rax, %rdi
callq ___cxa_begin_catch
movl (%rax), %ebx
callq ___cxa_end_catch
movl %ebx, %eax
addq $8, %rsp
popq %rbx
popq %rbp
retq
Lfunc_end1:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table1:
Lexception1:
.byte 255 ## @LPStart Encoding = omit
.byte 155 ## @TType Encoding = indirect pcrel sdata4
.uleb128 Lttbase1-Lttbaseref1
Lttbaseref1:
.byte 1 ## Call site Encoding = uleb128
.uleb128 Lcst_end1-Lcst_begin1
Lcst_begin1:
.uleb128 Lfunc_begin1-Lfunc_begin1 ## >> Call Site 1 <<
.uleb128 Ltmp3-Lfunc_begin1 ## Call between Lfunc_begin1 and Ltmp3
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
.uleb128 Ltmp3-Lfunc_begin1 ## >> Call Site 2 <<
.uleb128 Ltmp4-Ltmp3 ## Call between Ltmp3 and Ltmp4
.uleb128 Ltmp5-Lfunc_begin1 ## jumps to Ltmp5
.byte 1 ## On action: 1
.uleb128 Ltmp4-Lfunc_begin1 ## >> Call Site 3 <<
.uleb128 Lfunc_end1-Ltmp4 ## Call between Ltmp4 and Lfunc_end1
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
Lcst_end1:
.byte 1 ## >> Action Record 1 <<
## Catch TypeInfo 1
.byte 0 ## No further actions
.p2align 2
## >> Catch TypeInfos <<
.long __ZTIi@GOTPCREL+4 ## TypeInfo 1
Lttbase1:
.p2align 2
## -- End function
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90 ## -- Begin function _Z5unrefv.cold.1
__Z5unrefv.cold.1: ## @_Z5unrefv.cold.1
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
.cfi_offset %rbx, -24
movq %rdx, %rbx
callq ___cxa_begin_catch
movl (%rax), %eax
incl %eax
movl %eax, (%rbx)
addq $8, %rsp
popq %rbx
popq %rbp
jmp ___cxa_end_catch ## TAILCALL
.cfi_endproc
## -- End function
.subsections_via_symbols
#--- weak-ref.s

View File

@ -2,7 +2,7 @@
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
# RUN: not %lld -dylib -framework CoreFoundation --icf=all %t/test.o 2>&1 | FileCheck %s
# CHECK: error: {{.*}}test.o: __cfstring contains symbol _uh_oh at misaligned offset
# CHECK: error: {{.*}}test.o:(__cfstring): symbol _uh_oh at misaligned offset
.cstring
L_.str: