[lld-macho] Have ICF operate on all sections at once

ICF previously operated only within a given OutputSection. We would
merge all CFStrings first, then merge all regular code sections in a
second phase. This worked fine since CFStrings would never reference
regular `__text` sections. However, I would like to expand ICF to merge
functions that reference unwind info. Unwind info references the LSDA
section, which can in turn reference the `__text` section, so we cannot
perform ICF in phases.

In order to have ICF operate on InputSections spanning multiple
OutputSections, we need a way to distinguish InputSections that are
destined for different OutputSections, so that we don't fold across
section boundaries. We achieve this by creating OutputSections early,
and setting `InputSection::parent` to point to them. This is what
LLD-ELF does. (This change should also make it easier to implement the
`section$start$` symbols.)

This diff also folds InputSections w/o checking their flags, which I
think is the right behavior -- if they are destined for the same
OutputSection, they will have the same flags in the output (even if
their input flags differ). I.e. the `parent` pointer check subsumes the
`flags` check. In practice this has nearly no effect (ICF did not become
any more effective on chromium_framework).

I've also updated ICF.cpp's block comment to better reflect its current
status.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D105641
This commit is contained in:
Jez Ng 2021-07-17 13:42:26 -04:00
parent 182ba8ab1b
commit 428a7c1b38
11 changed files with 99 additions and 56 deletions

View File

@ -24,7 +24,10 @@ using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
void ConcatOutputSection::addInput(ConcatInputSection *input) {
assert(input->parent == this);
if (inputs.empty()) {
align = input->align;
flags = input->getFlags();
@ -33,7 +36,6 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
finalizeFlags(input);
}
inputs.push_back(input);
input->parent = this;
}
// Branch-range extension can be implemented in two ways, either through ...
@ -357,3 +359,22 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) {
break;
}
}
ConcatOutputSection *
ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
ConcatOutputSection *&osec = concatOutputSections[names];
if (!osec)
osec = make<ConcatOutputSection>(names.second);
return osec;
}
NamePair macho::maybeRenameSection(NamePair key) {
auto newNames = config->sectionRenameMap.find(key);
if (newNames != config->sectionRenameMap.end())
return newNames->second;
auto newName = config->segmentRenameMap.find(key.first);
if (newName != config->segmentRenameMap.end())
return std::make_pair(newName->second, key.second);
return key;
}

View File

@ -31,6 +31,7 @@ public:
const ConcatInputSection *firstSection() const { return inputs.front(); }
const ConcatInputSection *lastSection() const { return inputs.back(); }
bool isNeeded() const override { return !inputs.empty(); }
// These accessors will only be valid after finalizing the section
uint64_t getSize() const override { return size; }
@ -50,6 +51,8 @@ public:
return sec->kind() == ConcatKind;
}
static ConcatOutputSection *getOrCreateForInput(const InputSection *);
private:
void finalizeFlags(InputSection *input);
@ -79,6 +82,12 @@ struct ThunkInfo {
uint8_t sequence = 0; // how many thunks created so-far?
};
NamePair maybeRenameSection(NamePair key);
// Output sections are added to output segments in iteration order
// of ConcatOutputSection, so must have deterministic iteration order.
extern llvm::MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;
} // namespace macho

View File

@ -577,6 +577,7 @@ static void compileBitcodeFiles() {
// any CommonSymbols.
static void replaceCommonSymbols() {
TimeTraceScope timeScope("Replace common symbols");
ConcatOutputSection *osec = nullptr;
for (Symbol *sym : symtab->getSymbols()) {
auto *common = dyn_cast<CommonSymbol>(sym);
if (common == nullptr)
@ -589,6 +590,9 @@ static void replaceCommonSymbols() {
auto *isec = make<ConcatInputSection>(
segment_names::data, section_names::common, common->getFile(), data,
common->align, S_ZEROFILL);
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
@ -1037,6 +1041,7 @@ static void gatherInputSections() {
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const SubsectionMap &map : file->subsections) {
ConcatOutputSection *osec = nullptr;
for (const SubsectionEntry &entry : map) {
if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
if (isec->isCoalescedWeak())
@ -1047,6 +1052,9 @@ static void gatherInputSections() {
continue;
}
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)

View File

@ -52,22 +52,18 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
//
// Summary of segments & sections:
//
// Since folding never occurs across output-section boundaries,
// ConcatOutputSection is the natural input for ICF.
//
// The __TEXT segment is readonly at the MMU. Some sections are already
// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
// synthetic and inherently free of duplicates (__TEXT,__stubs &
// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
// could work on __TEXT,__concat, but doing so induces many test failures.
// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
// because doing so induces many test failures.
//
// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
// thus ineligible for ICF.
//
// The __DATA_CONST segment is read/write at the MMU, but is logically const to
// the application after dyld applies fixups to pointer data. Some sections are
// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
// (__DATA_CONST,__got). There are no ICF opportunities here.
// the application after dyld applies fixups to pointer data. We currently
// fold only the __DATA_CONST,__cfstring section.
//
// The __DATA segment is read/write at the MMU, and as application-writeable
// data, none of its sections are eligible for ICF.
@ -84,12 +80,13 @@ static std::atomic<bool> icfRepeat{false};
// Compare everything except the relocation referents
static bool equalsConstant(const ConcatInputSection *ia,
const ConcatInputSection *ib) {
// We can only fold within the same OutputSection.
if (ia->parent != ib->parent)
return false;
if (ia->data.size() != ib->data.size())
return false;
if (ia->data != ib->data)
return false;
if (ia->getFlags() != ib->getFlags())
return false;
if (ia->relocs.size() != ib->relocs.size())
return false;
auto f = [&](const Reloc &ra, const Reloc &rb) {
@ -323,8 +320,6 @@ void macho::foldIdenticalSections() {
// relocs to find every referenced InputSection, but that precludes easy
// parallelization. Therefore, we hash every InputSection here where we have
// them all accessible as simple vectors.
std::vector<ConcatInputSection *> codeSections;
std::vector<ConcatInputSection *> cfStringSections;
// ICF can't fold functions with unwind info
DenseSet<const InputSection *> functionsWithUnwindInfo =
@ -338,32 +333,22 @@ void macho::foldIdenticalSections() {
// coexist with equivalence-class IDs, this is not necessary, but might help
// someone keep the numbers straight in case we ever need to debug the
// ICF::segregate()
std::vector<ConcatInputSection *> hashable;
uint64_t icfUniqueID = inputSections.size();
for (ConcatInputSection *isec : inputSections) {
// FIXME: consider non-code __text sections as hashable?
bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
!isec->shouldOmitFromOutput() &&
!functionsWithUnwindInfo.contains(isec) &&
isec->isHashableForICF();
if (isHashable) {
if (isCodeSection(isec))
codeSections.push_back(isec);
else {
assert(isCfStringSection(isec));
cfStringSections.push_back(isec);
}
} else {
if (isHashable)
hashable.push_back(isec);
else
isec->icfEqClass[0] = ++icfUniqueID;
}
}
std::vector<ConcatInputSection *> hashable(codeSections);
hashable.insert(hashable.end(), cfStringSections.begin(),
cfStringSections.end());
parallelForEach(hashable,
[](ConcatInputSection *isec) { isec->hashForICF(); });
// Now that every input section is either hashed or marked as unique, run the
// segregation algorithm to detect foldable subsections.
// We dedup cfStringSections first since code sections may refer to them, but
// not vice-versa.
ICF(cfStringSections).run();
ICF(codeSections).run();
ICF(hashable).run();
}

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "InputSection.h"
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "OutputSegment.h"

View File

@ -15,7 +15,6 @@
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
@ -596,6 +595,8 @@ void StubHelperSection::setup() {
in.got->addEntry(stubBinder);
in.imageLoaderCache->parent =
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter. It's kept alive by ImageLoaderCacheSection()

View File

@ -15,6 +15,7 @@
#include "OutputSection.h"
#include "OutputSegment.h"
#include "Target.h"
#include "Writer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"

View File

@ -144,6 +144,7 @@ template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
assert(isec->getSegName() == segment_names::ld &&
isec->getName() == section_names::compactUnwind);
isec->parent = compactUnwindSection;
compactUnwindSection->addInput(isec);
}

View File

@ -76,10 +76,6 @@ public:
LCUuid *uuidCommand = nullptr;
OutputSegment *linkEditSegment = nullptr;
// Output sections are added to output segments in iteration order
// of ConcatOutputSection, so must have deterministic iteration order.
MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@ -879,16 +875,6 @@ static void sortSegmentsAndSections() {
}
}
NamePair macho::maybeRenameSection(NamePair key) {
auto newNames = config->sectionRenameMap.find(key);
if (newNames != config->sectionRenameMap.end())
return newNames->second;
auto newName = config->segmentRenameMap.find(key.first);
if (newName != config->segmentRenameMap.end())
return std::make_pair(newName->second, key.second);
return key;
}
template <class LP> void Writer::createOutputSections() {
TimeTraceScope timeScope("Create output sections");
// First, create hidden sections
@ -919,10 +905,7 @@ template <class LP> void Writer::createOutputSections() {
for (ConcatInputSection *isec : inputSections) {
if (isec->shouldOmitFromOutput())
continue;
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
ConcatOutputSection *&osec = concatOutputSections[names];
if (!osec)
osec = make<ConcatOutputSection>(names.second);
ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
osec->addInput(isec);
osec->inputOrder =
std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
@ -934,7 +917,8 @@ template <class LP> void Writer::createOutputSections() {
StringRef segname = it.first.first;
ConcatOutputSection *osec = it.second;
assert(segname != segment_names::ld);
getOrCreateOutputSegment(segname)->addOutputSection(osec);
if (osec->isNeeded())
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
for (SyntheticSection *ssec : syntheticSections) {

View File

@ -9,8 +9,6 @@
#ifndef LLD_MACHO_WRITER_H
#define LLD_MACHO_WRITER_H
#include "Config.h"
#include <cstdint>
namespace lld {
@ -29,8 +27,6 @@ public:
template <class LP> void writeResult();
NamePair maybeRenameSection(NamePair key);
void createSyntheticSections();
// Add bindings for symbols that need weak or non-lazy bindings.

View File

@ -25,15 +25,17 @@
# CHECK: [[#%x,SR]] g F __TEXT,__text _sr2
# CHECK: [[#%x,MR:]] g F __TEXT,__text _mr1
# CHECK: [[#%x,MR]] g F __TEXT,__text _mr2
# CHECK: [[#%x,K1:]] g O __TEXT,__foo _k1
# CHECK: [[#%x,A:]] g F __TEXT,__text _k2
### FIXME: Mutually-recursive functions with identical bodies (see below)
# COM: [[#%x,XR:]] g F __TEXT,__text _xr1
# COM: [[#%x,XR]] g F __TEXT,__text _xr2
# CHECK-LABEL: Disassembly of section __TEXT,__text:
# CHECK: [[#%x,MAIN]] <_main>:
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
# CHECK-NEXT: callq 0x[[#%x,B]] <_b>
# CHECK-NEXT: callq 0x[[#%x,B2]] <_b2>
# CHECK-NEXT: callq 0x[[#%x,C]] <_c>
@ -44,6 +46,8 @@
# CHECK-NEXT: callq 0x[[#%x,H]] <_h>
# CHECK-NEXT: callq 0x[[#%x,I]] <_i>
# CHECK-NEXT: callq 0x[[#%x,J]] <_j>
# CHECK-NEXT: callq 0x[[#%x,K1]] <_k1>
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
# CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2>
@ -232,8 +236,38 @@ _j:
ret
.cfi_endproc
### No fold: _k1 is in a different section from _a1
.section __TEXT,__foo
.globl _k1
.p2align 2
_k1:
callq _d
mov ___nan@GOTPCREL(%rip), %rax
callq ___isnan
movabs $_abs1a, %rdx
movl $0, %eax
ret
nopl (%rax)
### Fold: _k2 is in a section that gets renamed and output as __text
.section __TEXT,__StaticInit
.globl _k2
.p2align 2
_k2:
callq _d
mov ___nan@GOTPCREL(%rip), %rax
callq ___isnan
movabs $_abs1a, %rdx
movl $0, %eax
ret
## For some reason, llvm-mc generates different nop encodings when adding
## padding for __StaticInit vs __text functions. So we explicitly specify the
## nop here to make sure this function can be folded with _a1.
nopl (%rax)
### Fold: Simple recursion
.text
.globl _sr1
.p2align 2
_sr1:
@ -311,6 +345,8 @@ _main:
callq _h
callq _i
callq _j
callq _k1
callq _k2
callq _sr1
callq _sr2
callq _mr1