[lld-macho] Refactor segment/section creation, sorting, and merging

Summary:
There were a few issues with the previous setup:

1. The section sorting comparator used a declarative map of section names to
  determine the correct order, but it turns out we need to match on more than
  just names -- in particular, an upcoming diff will sort based on whether the
  S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
  flexible form.

2. We were sorting OutputSections stored in a MapVector, which left the
  MapVector in an inconsistent state -- the wrong keys map to the wrong values!
  In practice, we weren't doing key lookups (only container iteration) after the
  sort, so this was fine, but it was still a dubious state of affairs. This diff
  copies the OutputSections to a vector before sorting them.

3. We were adding unneeded OutputSections to OutputSegments and then filtering
  them out later, which meant that we had to remember whether an OutputSegment
  was in a pre- or post-filtered state. This diff only adds the sections to the
  segments if they are needed.

In addition to those major changes, two minor ones worth noting:

1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
  Previously we were using some inconsistent combination of `osec`, `os`, and
  `section`.

2. I added a check (and a test) for InputSections with names that clashed with
  those of our synthetic OutputSections.

Reviewers: #lld-macho

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D81887
This commit is contained in:
Jez Ng 2020-06-15 00:03:24 -07:00
parent 90406d62e5
commit 3646ee503d
9 changed files with 131 additions and 220 deletions

View File

@ -12,6 +12,7 @@
#include "InputSection.h"
#include "OutputSection.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/MapVector.h"
namespace lld {
namespace macho {
@ -31,7 +32,7 @@ public:
uint64_t getSize() const override { return size; }
uint64_t getFileSize() const override { return fileSize; }
void mergeInput(InputSection *input) override;
void mergeInput(InputSection *input);
void finalize() override;
void writeTo(uint8_t *buf) const override;

View File

@ -8,7 +8,6 @@
#include "OutputSection.h"
#include "OutputSegment.h"
#include "lld/Common/ErrorHandler.h"
using namespace llvm;
using namespace lld;
@ -17,7 +16,3 @@ using namespace lld::macho;
uint64_t OutputSection::getSegmentOffset() const {
return addr - parent->firstSection()->addr;
}
void OutputSection::mergeInput(InputSection *input) {
llvm_unreachable("Cannot merge input section into unmergable output section");
}

View File

@ -47,9 +47,6 @@ public:
// Unneeded sections are omitted entirely (header and body).
virtual bool isNeeded() const { return true; }
// Some sections may allow coalescing other raw input sections.
virtual void mergeInput(InputSection *input);
// Specifically finalizes addresses and section size, not content.
virtual void finalize() {
// TODO investigate refactoring synthetic section finalization logic into
@ -71,38 +68,6 @@ private:
Kind sectionKind;
};
class OutputSectionComparator {
public:
OutputSectionComparator(uint32_t segmentOrder,
const std::vector<StringRef> &sectOrdering)
: segmentOrder(segmentOrder) {
for (uint32_t j = 0, m = sectOrdering.size(); j < m; ++j)
sectionOrdering[sectOrdering[j]] = j;
}
uint32_t sectionOrder(StringRef secname) {
auto sectIt = sectionOrdering.find(secname);
if (sectIt != sectionOrdering.end())
return sectIt->second;
return sectionOrdering.size();
}
// Sort sections within a common segment, which stores them in
// a MapVector of section name -> section
bool operator()(const std::pair<StringRef, OutputSection *> &a,
const std::pair<StringRef, OutputSection *> &b) {
return sectionOrder(a.first) < sectionOrder(b.first);
}
bool operator<(const OutputSectionComparator &b) {
return segmentOrder < b.segmentOrder;
}
private:
uint32_t segmentOrder;
llvm::DenseMap<StringRef, uint32_t> sectionOrdering;
};
} // namespace macho
} // namespace lld

View File

@ -38,84 +38,20 @@ static uint32_t maxProt(StringRef name) {
size_t OutputSegment::numNonHiddenSections() const {
size_t count = 0;
for (const OutputSegment::SectionMapEntry &i : sections) {
OutputSection *os = i.second;
count += (!os->isHidden() ? 1 : 0);
for (const OutputSection *osec : sections) {
count += (!osec->isHidden() ? 1 : 0);
}
return count;
}
void OutputSegment::addOutputSection(OutputSection *os) {
os->parent = this;
std::pair<SectionMap::iterator, bool> result =
sections.insert(SectionMapEntry(os->name, os));
if (!result.second) {
llvm_unreachable("Attempted to set section, but a section with the same "
"name already exists");
}
}
OutputSection *OutputSegment::getOrCreateOutputSection(StringRef name) {
OutputSegment::SectionMap::iterator i = sections.find(name);
if (i != sections.end()) {
return i->second;
}
auto *os = make<MergedOutputSection>(name);
addOutputSection(os);
return os;
}
void OutputSegment::sortOutputSections(OutputSegmentComparator *comparator) {
llvm::stable_sort(sections, *comparator->sectionComparator(this));
}
void OutputSegment::removeUnneededSections() {
sections.remove_if([](const std::pair<StringRef, OutputSection *> &p) {
return !p.second->isNeeded();
});
}
OutputSegmentComparator::OutputSegmentComparator() {
// This defines the order of segments and the sections within each segment.
// Segments that are not mentioned here will end up at defaultPosition;
// sections that are not mentioned will end up at the end of the section
// list for their given segment.
std::vector<std::pair<StringRef, std::vector<StringRef>>> ordering{
{segment_names::pageZero, {}},
{segment_names::text, {section_names::header}},
{defaultPosition, {}},
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
// sections must be ordered after other sections).
{segment_names::linkEdit,
{
section_names::binding,
section_names::export_,
section_names::symbolTable,
section_names::stringTable,
}},
};
for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
auto &p = ordering[i];
StringRef segname = p.first;
const std::vector<StringRef> &sectOrdering = p.second;
orderMap.insert(std::pair<StringRef, OutputSectionComparator>(
segname, OutputSectionComparator(i, sectOrdering)));
}
// Cache the position for the default comparator since this is the likely
// scenario.
defaultPositionComparator = &orderMap.find(defaultPosition)->second;
void OutputSegment::addOutputSection(OutputSection *osec) {
osec->parent = this;
sections.push_back(osec);
}
static llvm::DenseMap<StringRef, OutputSegment *> nameToOutputSegment;
std::vector<OutputSegment *> macho::outputSegments;
OutputSegment *macho::getOutputSegment(StringRef name) {
return nameToOutputSegment.lookup(name);
}
OutputSegment *macho::getOrCreateOutputSegment(StringRef name) {
OutputSegment *&segRef = nameToOutputSegment[name];
if (segRef != nullptr)

View File

@ -11,50 +11,35 @@
#include "OutputSection.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/MapVector.h"
namespace lld {
namespace macho {
namespace segment_names {
constexpr const char *pageZero = "__PAGEZERO";
constexpr const char *text = "__TEXT";
constexpr const char *data = "__DATA";
constexpr const char *linkEdit = "__LINKEDIT";
constexpr const char *dataConst = "__DATA_CONST";
constexpr const char pageZero[] = "__PAGEZERO";
constexpr const char text[] = "__TEXT";
constexpr const char data[] = "__DATA";
constexpr const char linkEdit[] = "__LINKEDIT";
constexpr const char dataConst[] = "__DATA_CONST";
} // namespace segment_names
class OutputSection;
class OutputSegmentComparator;
class InputSection;
class OutputSegment {
public:
using SectionMap = typename llvm::MapVector<StringRef, OutputSection *>;
using SectionMapEntry = typename std::pair<StringRef, OutputSection *>;
const OutputSection *firstSection() const { return sections.front(); }
const OutputSection *lastSection() const { return sections.back(); }
const OutputSection *firstSection() const { return sections.front().second; }
const OutputSection *lastSection() const { return sections.back().second; }
bool isNeeded() const {
if (name == segment_names::linkEdit)
return true;
for (const SectionMapEntry &i : sections) {
OutputSection *os = i.second;
if (os->isNeeded())
return true;
}
return false;
void addOutputSection(OutputSection *os);
void sortOutputSections(
llvm::function_ref<bool(OutputSection *, OutputSection *)> comparator) {
llvm::stable_sort(sections, comparator);
}
OutputSection *getOrCreateOutputSection(StringRef name);
void addOutputSection(OutputSection *os);
void sortOutputSections(OutputSegmentComparator *comparator);
void removeUnneededSections();
const SectionMap &getSections() const { return sections; }
const std::vector<OutputSection *> &getSections() const { return sections; }
size_t numNonHiddenSections() const;
uint64_t fileOff = 0;
@ -64,34 +49,11 @@ public:
uint8_t index;
private:
SectionMap sections;
};
class OutputSegmentComparator {
public:
OutputSegmentComparator();
OutputSectionComparator *sectionComparator(const OutputSegment *os) {
auto it = orderMap.find(os->name);
if (it == orderMap.end()) {
return defaultPositionComparator;
}
return &it->second;
}
bool operator()(const OutputSegment *a, const OutputSegment *b) {
return *sectionComparator(a) < *sectionComparator(b);
}
private:
const StringRef defaultPosition = StringRef();
llvm::DenseMap<StringRef, OutputSectionComparator> orderMap;
OutputSectionComparator *defaultPositionComparator;
std::vector<OutputSection *> sections;
};
extern std::vector<OutputSegment *> outputSegments;
OutputSegment *getOutputSegment(StringRef name);
OutputSegment *getOrCreateOutputSegment(StringRef name);
} // namespace macho

View File

@ -30,10 +30,8 @@ namespace lld {
namespace macho {
SyntheticSection::SyntheticSection(const char *segname, const char *name)
: OutputSection(SyntheticKind, name) {
// Synthetic sections always know which segment they belong to so hook
// them up when they're made
getOrCreateOutputSegment(segname)->addOutputSection(this);
: OutputSection(SyntheticKind, name), segname(segname) {
syntheticSections.push_back(this);
}
// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
@ -349,6 +347,7 @@ void StringTableSection::writeTo(uint8_t *buf) const {
}
InStruct in;
std::vector<SyntheticSection *> syntheticSections;
} // namespace macho
} // namespace lld

View File

@ -23,14 +23,14 @@ namespace macho {
namespace section_names {
constexpr const char *pageZero = "__pagezero";
constexpr const char *header = "__mach_header";
constexpr const char *binding = "__binding";
constexpr const char *lazyBinding = "__lazy_binding";
constexpr const char *export_ = "__export";
constexpr const char *symbolTable = "__symbol_table";
constexpr const char *stringTable = "__string_table";
constexpr const char *got = "__got";
constexpr const char pageZero[] = "__pagezero";
constexpr const char header[] = "__mach_header";
constexpr const char binding[] = "__binding";
constexpr const char lazyBinding[] = "__lazy_binding";
constexpr const char export_[] = "__export";
constexpr const char symbolTable[] = "__symbol_table";
constexpr const char stringTable[] = "__string_table";
constexpr const char got[] = "__got";
} // namespace section_names
@ -45,6 +45,8 @@ public:
static bool classof(const OutputSection *sec) {
return sec->kind() == SyntheticKind;
}
const StringRef segname;
};
// The header of the Mach-O file, which must have a file offset of zero.
@ -262,6 +264,7 @@ struct InStruct {
};
extern InStruct in;
extern std::vector<SyntheticSection *> syntheticSections;
} // namespace macho
} // namespace lld

View File

@ -134,25 +134,23 @@ public:
seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr;
c->nsects = seg->numNonHiddenSections();
for (auto &p : seg->getSections()) {
StringRef s = p.first;
OutputSection *section = p.second;
c->filesize += section->getFileSize();
for (OutputSection *osec : seg->getSections()) {
c->filesize += osec->getFileSize();
if (section->isHidden())
if (osec->isHidden())
continue;
auto *sectHdr = reinterpret_cast<section_64 *>(buf);
buf += sizeof(section_64);
memcpy(sectHdr->sectname, s.data(), s.size());
memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
memcpy(sectHdr->segname, name.data(), name.size());
sectHdr->addr = section->addr;
sectHdr->offset = section->fileOff;
sectHdr->align = Log2_32(section->align);
sectHdr->flags = section->flags;
sectHdr->size = section->getSize();
sectHdr->addr = osec->addr;
sectHdr->offset = osec->fileOff;
sectHdr->align = Log2_32(osec->align);
sectHdr->flags = osec->flags;
sectHdr->size = osec->getSize();
}
}
@ -336,28 +334,59 @@ static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
return sectionPriorities;
}
static int segmentOrder(OutputSegment *seg) {
return StringSwitch<int>(seg->name)
.Case(segment_names::pageZero, -2)
.Case(segment_names::text, -1)
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
// sections must be ordered after other sections).
.Case(segment_names::linkEdit, std::numeric_limits<int>::max())
.Default(0);
}
static int sectionOrder(OutputSection *osec) {
StringRef segname = osec->parent->name;
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
if (osec->name == section_names::header)
return -1;
} else if (segname == segment_names::linkEdit) {
return StringSwitch<int>(osec->name)
.Case(section_names::binding, -4)
.Case(section_names::export_, -3)
.Case(section_names::symbolTable, -2)
.Case(section_names::stringTable, -1)
.Default(0);
}
return 0;
}
template <typename T, typename F>
static std::function<bool(T, T)> compareByOrder(F ord) {
return [=](T a, T b) { return ord(a) < ord(b); };
}
// Sorting only can happen once all outputs have been collected. Here we sort
// segments, output sections within each segment, and input sections within each
// output segment.
static void sortSegmentsAndSections() {
auto comparator = OutputSegmentComparator();
llvm::stable_sort(outputSegments, comparator);
llvm::stable_sort(outputSegments,
compareByOrder<OutputSegment *>(segmentOrder));
DenseMap<const InputSection *, size_t> isecPriorities =
buildInputSectionPriorities();
uint32_t sectionIndex = 0;
for (OutputSegment *seg : outputSegments) {
seg->sortOutputSections(&comparator);
for (auto &p : seg->getSections()) {
OutputSection *section = p.second;
seg->sortOutputSections(compareByOrder<OutputSection *>(sectionOrder));
for (auto *osec : seg->getSections()) {
// Now that the output sections are sorted, assign the final
// output section indices.
if (!section->isHidden())
section->index = ++sectionIndex;
if (!osec->isHidden())
osec->index = ++sectionIndex;
if (!isecPriorities.empty()) {
if (auto *merged = dyn_cast<MergedOutputSection>(section)) {
if (auto *merged = dyn_cast<MergedOutputSection>(osec)) {
llvm::stable_sort(merged->inputs,
[&](InputSection *a, InputSection *b) {
return isecPriorities[a] > isecPriorities[b];
@ -387,22 +416,33 @@ void Writer::createOutputSections() {
llvm_unreachable("unhandled output file type");
}
// Then merge input sections into output sections/segments.
// Then merge input sections into output sections.
MapVector<std::pair<StringRef, StringRef>, MergedOutputSection *>
mergedOutputSections;
for (InputSection *isec : inputSections) {
getOrCreateOutputSegment(isec->segname)
->getOrCreateOutputSection(isec->name)
->mergeInput(isec);
MergedOutputSection *&osec =
mergedOutputSections[{isec->segname, isec->name}];
if (osec == nullptr)
osec = make<MergedOutputSection>(isec->name);
osec->mergeInput(isec);
}
// Remove unneeded segments and sections.
// TODO: Avoid creating unneeded segments in the first place
for (auto it = outputSegments.begin(); it != outputSegments.end();) {
OutputSegment *seg = *it;
seg->removeUnneededSections();
if (!seg->isNeeded())
it = outputSegments.erase(it);
else
++it;
for (const auto &it : mergedOutputSections) {
StringRef segname = it.first.first;
MergedOutputSection *osec = it.second;
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
for (SyntheticSection *ssec : syntheticSections) {
auto it = mergedOutputSections.find({ssec->segname, ssec->name});
if (it == mergedOutputSections.end()) {
if (ssec->isNeeded())
getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
} else {
error("section from " + it->second->firstSection()->file->getName() +
" conflicts with synthetic section " + ssec->segname + "," +
ssec->name);
}
}
}
@ -411,16 +451,15 @@ void Writer::assignAddresses(OutputSegment *seg) {
fileOff = alignTo(fileOff, PageSize);
seg->fileOff = fileOff;
for (auto &p : seg->getSections()) {
OutputSection *section = p.second;
addr = alignTo(addr, section->align);
fileOff = alignTo(fileOff, section->align);
section->addr = addr;
section->fileOff = isZeroFill(section->flags) ? 0 : fileOff;
section->finalize();
for (auto *osec : seg->getSections()) {
addr = alignTo(addr, osec->align);
fileOff = alignTo(fileOff, osec->align);
osec->addr = addr;
osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
osec->finalize();
addr += section->getSize();
fileOff += section->getFileSize();
addr += osec->getSize();
fileOff += osec->getFileSize();
}
}
@ -438,12 +477,9 @@ void Writer::openFile() {
void Writer::writeSections() {
uint8_t *buf = buffer->getBufferStart();
for (OutputSegment *seg : outputSegments) {
for (auto &p : seg->getSections()) {
OutputSection *section = p.second;
section->writeTo(buf + section->fileOff);
}
}
for (OutputSegment *seg : outputSegments)
for (OutputSection *osec : seg->getSections())
osec->writeTo(buf + osec->fileOff);
}
void Writer::run() {

View File

@ -0,0 +1,14 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not lld -flavor darwinnew -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o
# CHECK: error: section from [[FILE]] conflicts with synthetic section __DATA_CONST,__got
.globl _main
.section __DATA_CONST,__got
.space 1
.text
_main:
mov $0, %rax
ret