forked from OSchip/llvm-project
[lld-macho] Have ICF operate on all sections at once
ICF previously operated only within a given OutputSection. We would merge all CFStrings first, then merge all regular code sections in a second phase. This worked fine since CFStrings would never reference regular `__text` sections. However, I would like to expand ICF to merge functions that reference unwind info. Unwind info references the LSDA section, which can in turn reference the `__text` section, so we cannot perform ICF in phases. In order to have ICF operate on InputSections spanning multiple OutputSections, we need a way to distinguish InputSections that are destined for different OutputSections, so that we don't fold across section boundaries. We achieve this by creating OutputSections early, and setting `InputSection::parent` to point to them. This is what LLD-ELF does. (This change should also make it easier to implement the `section$start$` symbols.) This diff also folds InputSections w/o checking their flags, which I think is the right behavior -- if they are destined for the same OutputSection, they will have the same flags in the output (even if their input flags differ). I.e. the `parent` pointer check subsumes the `flags` check. In practice this has nearly no effect (ICF did not become any more effective on chromium_framework). I've also updated ICF.cpp's block comment to better reflect its current status. Reviewed By: #lld-macho, smeenai Differential Revision: https://reviews.llvm.org/D105641
This commit is contained in:
parent
182ba8ab1b
commit
428a7c1b38
|
@ -24,7 +24,10 @@ using namespace llvm::MachO;
|
|||
using namespace lld;
|
||||
using namespace lld::macho;
|
||||
|
||||
MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
|
||||
|
||||
void ConcatOutputSection::addInput(ConcatInputSection *input) {
|
||||
assert(input->parent == this);
|
||||
if (inputs.empty()) {
|
||||
align = input->align;
|
||||
flags = input->getFlags();
|
||||
|
@ -33,7 +36,6 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
|
|||
finalizeFlags(input);
|
||||
}
|
||||
inputs.push_back(input);
|
||||
input->parent = this;
|
||||
}
|
||||
|
||||
// Branch-range extension can be implemented in two ways, either through ...
|
||||
|
@ -357,3 +359,22 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ConcatOutputSection *
|
||||
ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
|
||||
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
|
||||
ConcatOutputSection *&osec = concatOutputSections[names];
|
||||
if (!osec)
|
||||
osec = make<ConcatOutputSection>(names.second);
|
||||
return osec;
|
||||
}
|
||||
|
||||
NamePair macho::maybeRenameSection(NamePair key) {
|
||||
auto newNames = config->sectionRenameMap.find(key);
|
||||
if (newNames != config->sectionRenameMap.end())
|
||||
return newNames->second;
|
||||
auto newName = config->segmentRenameMap.find(key.first);
|
||||
if (newName != config->segmentRenameMap.end())
|
||||
return std::make_pair(newName->second, key.second);
|
||||
return key;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ public:
|
|||
|
||||
const ConcatInputSection *firstSection() const { return inputs.front(); }
|
||||
const ConcatInputSection *lastSection() const { return inputs.back(); }
|
||||
bool isNeeded() const override { return !inputs.empty(); }
|
||||
|
||||
// These accessors will only be valid after finalizing the section
|
||||
uint64_t getSize() const override { return size; }
|
||||
|
@ -50,6 +51,8 @@ public:
|
|||
return sec->kind() == ConcatKind;
|
||||
}
|
||||
|
||||
static ConcatOutputSection *getOrCreateForInput(const InputSection *);
|
||||
|
||||
private:
|
||||
void finalizeFlags(InputSection *input);
|
||||
|
||||
|
@ -79,6 +82,12 @@ struct ThunkInfo {
|
|||
uint8_t sequence = 0; // how many thunks created so-far?
|
||||
};
|
||||
|
||||
NamePair maybeRenameSection(NamePair key);
|
||||
|
||||
// Output sections are added to output segments in iteration order
|
||||
// of ConcatOutputSection, so must have deterministic iteration order.
|
||||
extern llvm::MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
|
||||
|
||||
extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;
|
||||
|
||||
} // namespace macho
|
||||
|
|
|
@ -577,6 +577,7 @@ static void compileBitcodeFiles() {
|
|||
// any CommonSymbols.
|
||||
static void replaceCommonSymbols() {
|
||||
TimeTraceScope timeScope("Replace common symbols");
|
||||
ConcatOutputSection *osec = nullptr;
|
||||
for (Symbol *sym : symtab->getSymbols()) {
|
||||
auto *common = dyn_cast<CommonSymbol>(sym);
|
||||
if (common == nullptr)
|
||||
|
@ -589,6 +590,9 @@ static void replaceCommonSymbols() {
|
|||
auto *isec = make<ConcatInputSection>(
|
||||
segment_names::data, section_names::common, common->getFile(), data,
|
||||
common->align, S_ZEROFILL);
|
||||
if (!osec)
|
||||
osec = ConcatOutputSection::getOrCreateForInput(isec);
|
||||
isec->parent = osec;
|
||||
inputSections.push_back(isec);
|
||||
|
||||
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
|
||||
|
@ -1037,6 +1041,7 @@ static void gatherInputSections() {
|
|||
int inputOrder = 0;
|
||||
for (const InputFile *file : inputFiles) {
|
||||
for (const SubsectionMap &map : file->subsections) {
|
||||
ConcatOutputSection *osec = nullptr;
|
||||
for (const SubsectionEntry &entry : map) {
|
||||
if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
|
||||
if (isec->isCoalescedWeak())
|
||||
|
@ -1047,6 +1052,9 @@ static void gatherInputSections() {
|
|||
continue;
|
||||
}
|
||||
isec->outSecOff = inputOrder++;
|
||||
if (!osec)
|
||||
osec = ConcatOutputSection::getOrCreateForInput(isec);
|
||||
isec->parent = osec;
|
||||
inputSections.push_back(isec);
|
||||
} else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
|
||||
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
|
||||
|
|
|
@ -52,22 +52,18 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
|
|||
//
|
||||
// Summary of segments & sections:
|
||||
//
|
||||
// Since folding never occurs across output-section boundaries,
|
||||
// ConcatOutputSection is the natural input for ICF.
|
||||
//
|
||||
// The __TEXT segment is readonly at the MMU. Some sections are already
|
||||
// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
|
||||
// synthetic and inherently free of duplicates (__TEXT,__stubs &
|
||||
// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
|
||||
// could work on __TEXT,__concat, but doing so induces many test failures.
|
||||
// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
|
||||
// because doing so induces many test failures.
|
||||
//
|
||||
// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
|
||||
// thus ineligible for ICF.
|
||||
//
|
||||
// The __DATA_CONST segment is read/write at the MMU, but is logically const to
|
||||
// the application after dyld applies fixups to pointer data. Some sections are
|
||||
// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
|
||||
// (__DATA_CONST,__got). There are no ICF opportunities here.
|
||||
// the application after dyld applies fixups to pointer data. We currently
|
||||
// fold only the __DATA_CONST,__cfstring section.
|
||||
//
|
||||
// The __DATA segment is read/write at the MMU, and as application-writeable
|
||||
// data, none of its sections are eligible for ICF.
|
||||
|
@ -84,12 +80,13 @@ static std::atomic<bool> icfRepeat{false};
|
|||
// Compare everything except the relocation referents
|
||||
static bool equalsConstant(const ConcatInputSection *ia,
|
||||
const ConcatInputSection *ib) {
|
||||
// We can only fold within the same OutputSection.
|
||||
if (ia->parent != ib->parent)
|
||||
return false;
|
||||
if (ia->data.size() != ib->data.size())
|
||||
return false;
|
||||
if (ia->data != ib->data)
|
||||
return false;
|
||||
if (ia->getFlags() != ib->getFlags())
|
||||
return false;
|
||||
if (ia->relocs.size() != ib->relocs.size())
|
||||
return false;
|
||||
auto f = [&](const Reloc &ra, const Reloc &rb) {
|
||||
|
@ -323,8 +320,6 @@ void macho::foldIdenticalSections() {
|
|||
// relocs to find every referenced InputSection, but that precludes easy
|
||||
// parallelization. Therefore, we hash every InputSection here where we have
|
||||
// them all accessible as simple vectors.
|
||||
std::vector<ConcatInputSection *> codeSections;
|
||||
std::vector<ConcatInputSection *> cfStringSections;
|
||||
|
||||
// ICF can't fold functions with unwind info
|
||||
DenseSet<const InputSection *> functionsWithUnwindInfo =
|
||||
|
@ -338,32 +333,22 @@ void macho::foldIdenticalSections() {
|
|||
// coexist with equivalence-class IDs, this is not necessary, but might help
|
||||
// someone keep the numbers straight in case we ever need to debug the
|
||||
// ICF::segregate()
|
||||
std::vector<ConcatInputSection *> hashable;
|
||||
uint64_t icfUniqueID = inputSections.size();
|
||||
for (ConcatInputSection *isec : inputSections) {
|
||||
// FIXME: consider non-code __text sections as hashable?
|
||||
bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
|
||||
!isec->shouldOmitFromOutput() &&
|
||||
!functionsWithUnwindInfo.contains(isec) &&
|
||||
isec->isHashableForICF();
|
||||
if (isHashable) {
|
||||
if (isCodeSection(isec))
|
||||
codeSections.push_back(isec);
|
||||
else {
|
||||
assert(isCfStringSection(isec));
|
||||
cfStringSections.push_back(isec);
|
||||
}
|
||||
} else {
|
||||
if (isHashable)
|
||||
hashable.push_back(isec);
|
||||
else
|
||||
isec->icfEqClass[0] = ++icfUniqueID;
|
||||
}
|
||||
}
|
||||
std::vector<ConcatInputSection *> hashable(codeSections);
|
||||
hashable.insert(hashable.end(), cfStringSections.begin(),
|
||||
cfStringSections.end());
|
||||
parallelForEach(hashable,
|
||||
[](ConcatInputSection *isec) { isec->hashForICF(); });
|
||||
// Now that every input section is either hashed or marked as unique, run the
|
||||
// segregation algorithm to detect foldable subsections.
|
||||
// We dedup cfStringSections first since code sections may refer to them, but
|
||||
// not vice-versa.
|
||||
ICF(cfStringSections).run();
|
||||
ICF(codeSections).run();
|
||||
ICF(hashable).run();
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "InputSection.h"
|
||||
#include "ConcatOutputSection.h"
|
||||
#include "Config.h"
|
||||
#include "InputFiles.h"
|
||||
#include "OutputSegment.h"
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include "OutputSegment.h"
|
||||
#include "SymbolTable.h"
|
||||
#include "Symbols.h"
|
||||
#include "Writer.h"
|
||||
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
|
@ -596,6 +595,8 @@ void StubHelperSection::setup() {
|
|||
|
||||
in.got->addEntry(stubBinder);
|
||||
|
||||
in.imageLoaderCache->parent =
|
||||
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
|
||||
inputSections.push_back(in.imageLoaderCache);
|
||||
// Since this isn't in the symbol table or in any input file, the noDeadStrip
|
||||
// argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "OutputSection.h"
|
||||
#include "OutputSegment.h"
|
||||
#include "Target.h"
|
||||
#include "Writer.h"
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
|
|
|
@ -144,6 +144,7 @@ template <class Ptr>
|
|||
void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
|
||||
assert(isec->getSegName() == segment_names::ld &&
|
||||
isec->getName() == section_names::compactUnwind);
|
||||
isec->parent = compactUnwindSection;
|
||||
compactUnwindSection->addInput(isec);
|
||||
}
|
||||
|
||||
|
|
|
@ -76,10 +76,6 @@ public:
|
|||
|
||||
LCUuid *uuidCommand = nullptr;
|
||||
OutputSegment *linkEditSegment = nullptr;
|
||||
|
||||
// Output sections are added to output segments in iteration order
|
||||
// of ConcatOutputSection, so must have deterministic iteration order.
|
||||
MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
|
||||
};
|
||||
|
||||
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
|
||||
|
@ -879,16 +875,6 @@ static void sortSegmentsAndSections() {
|
|||
}
|
||||
}
|
||||
|
||||
NamePair macho::maybeRenameSection(NamePair key) {
|
||||
auto newNames = config->sectionRenameMap.find(key);
|
||||
if (newNames != config->sectionRenameMap.end())
|
||||
return newNames->second;
|
||||
auto newName = config->segmentRenameMap.find(key.first);
|
||||
if (newName != config->segmentRenameMap.end())
|
||||
return std::make_pair(newName->second, key.second);
|
||||
return key;
|
||||
}
|
||||
|
||||
template <class LP> void Writer::createOutputSections() {
|
||||
TimeTraceScope timeScope("Create output sections");
|
||||
// First, create hidden sections
|
||||
|
@ -919,10 +905,7 @@ template <class LP> void Writer::createOutputSections() {
|
|||
for (ConcatInputSection *isec : inputSections) {
|
||||
if (isec->shouldOmitFromOutput())
|
||||
continue;
|
||||
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
|
||||
ConcatOutputSection *&osec = concatOutputSections[names];
|
||||
if (!osec)
|
||||
osec = make<ConcatOutputSection>(names.second);
|
||||
ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
|
||||
osec->addInput(isec);
|
||||
osec->inputOrder =
|
||||
std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
|
||||
|
@ -934,7 +917,8 @@ template <class LP> void Writer::createOutputSections() {
|
|||
StringRef segname = it.first.first;
|
||||
ConcatOutputSection *osec = it.second;
|
||||
assert(segname != segment_names::ld);
|
||||
getOrCreateOutputSegment(segname)->addOutputSection(osec);
|
||||
if (osec->isNeeded())
|
||||
getOrCreateOutputSegment(segname)->addOutputSection(osec);
|
||||
}
|
||||
|
||||
for (SyntheticSection *ssec : syntheticSections) {
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
#ifndef LLD_MACHO_WRITER_H
|
||||
#define LLD_MACHO_WRITER_H
|
||||
|
||||
#include "Config.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace lld {
|
||||
|
@ -29,8 +27,6 @@ public:
|
|||
|
||||
template <class LP> void writeResult();
|
||||
|
||||
NamePair maybeRenameSection(NamePair key);
|
||||
|
||||
void createSyntheticSections();
|
||||
|
||||
// Add bindings for symbols that need weak or non-lazy bindings.
|
||||
|
|
|
@ -25,15 +25,17 @@
|
|||
# CHECK: [[#%x,SR]] g F __TEXT,__text _sr2
|
||||
# CHECK: [[#%x,MR:]] g F __TEXT,__text _mr1
|
||||
# CHECK: [[#%x,MR]] g F __TEXT,__text _mr2
|
||||
# CHECK: [[#%x,K1:]] g O __TEXT,__foo _k1
|
||||
# CHECK: [[#%x,A:]] g F __TEXT,__text _k2
|
||||
### FIXME: Mutually-recursive functions with identical bodies (see below)
|
||||
# COM: [[#%x,XR:]] g F __TEXT,__text _xr1
|
||||
# COM: [[#%x,XR]] g F __TEXT,__text _xr2
|
||||
|
||||
# CHECK-LABEL: Disassembly of section __TEXT,__text:
|
||||
# CHECK: [[#%x,MAIN]] <_main>:
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,B]] <_b>
|
||||
# CHECK-NEXT: callq 0x[[#%x,B2]] <_b2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,C]] <_c>
|
||||
|
@ -44,6 +46,8 @@
|
|||
# CHECK-NEXT: callq 0x[[#%x,H]] <_h>
|
||||
# CHECK-NEXT: callq 0x[[#%x,I]] <_i>
|
||||
# CHECK-NEXT: callq 0x[[#%x,J]] <_j>
|
||||
# CHECK-NEXT: callq 0x[[#%x,K1]] <_k1>
|
||||
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
|
||||
# CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2>
|
||||
|
@ -232,8 +236,38 @@ _j:
|
|||
ret
|
||||
.cfi_endproc
|
||||
|
||||
### No fold: _k1 is in a different section from _a1
|
||||
.section __TEXT,__foo
|
||||
.globl _k1
|
||||
.p2align 2
|
||||
_k1:
|
||||
callq _d
|
||||
mov ___nan@GOTPCREL(%rip), %rax
|
||||
callq ___isnan
|
||||
movabs $_abs1a, %rdx
|
||||
movl $0, %eax
|
||||
ret
|
||||
nopl (%rax)
|
||||
|
||||
### Fold: _k2 is in a section that gets renamed and output as __text
|
||||
.section __TEXT,__StaticInit
|
||||
.globl _k2
|
||||
.p2align 2
|
||||
_k2:
|
||||
callq _d
|
||||
mov ___nan@GOTPCREL(%rip), %rax
|
||||
callq ___isnan
|
||||
movabs $_abs1a, %rdx
|
||||
movl $0, %eax
|
||||
ret
|
||||
## For some reason, llvm-mc generates different nop encodings when adding
|
||||
## padding for __StaticInit vs __text functions. So we explicitly specify the
|
||||
## nop here to make sure this function can be folded with _a1.
|
||||
nopl (%rax)
|
||||
|
||||
### Fold: Simple recursion
|
||||
|
||||
.text
|
||||
.globl _sr1
|
||||
.p2align 2
|
||||
_sr1:
|
||||
|
@ -311,6 +345,8 @@ _main:
|
|||
callq _h
|
||||
callq _i
|
||||
callq _j
|
||||
callq _k1
|
||||
callq _k2
|
||||
callq _sr1
|
||||
callq _sr2
|
||||
callq _mr1
|
||||
|
|
Loading…
Reference in New Issue