2020-04-03 02:54:05 +08:00
|
|
|
//===- InputSection.cpp ---------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "InputSection.h"
|
2020-08-13 10:50:09 +08:00
|
|
|
#include "InputFiles.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "OutputSegment.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "Symbols.h"
|
2021-01-19 23:44:42 +08:00
|
|
|
#include "SyntheticSections.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "Target.h"
|
2021-01-09 07:47:40 +08:00
|
|
|
#include "Writer.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
|
2020-05-19 06:46:33 +08:00
|
|
|
using namespace llvm;
|
2020-04-03 02:54:05 +08:00
|
|
|
using namespace llvm::MachO;
|
|
|
|
using namespace llvm::support;
|
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
|
|
|
std::vector<InputSection *> macho::inputSections;
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
uint64_t InputSection::getFileOffset() const {
|
2020-05-02 07:29:06 +08:00
|
|
|
return parent->fileOff + outSecFileOff;
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-12-02 11:57:37 +08:00
|
|
|
uint64_t InputSection::getFileSize() const {
|
|
|
|
return isZeroFill(flags) ? 0 : getSize();
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
uint64_t InputSection::getVA() const { return parent->addr + outSecOff; }
|
|
|
|
|
2021-03-30 08:19:29 +08:00
|
|
|
static uint64_t resolveSymbolVA(uint8_t *loc, const Symbol &sym, uint8_t type) {
|
2021-03-12 02:28:09 +08:00
|
|
|
const RelocAttrs &relocAttrs = target->getRelocAttrs(type);
|
2021-01-19 23:44:42 +08:00
|
|
|
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
|
|
|
|
if (sym.isInStubs())
|
|
|
|
return in.stubs->addr + sym.stubsIndex * target->stubSize;
|
2021-02-24 10:41:54 +08:00
|
|
|
} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
|
2021-01-19 23:44:42 +08:00
|
|
|
if (sym.isInGot())
|
2021-04-03 06:46:18 +08:00
|
|
|
return in.got->addr + sym.gotIndex * target->wordSize;
|
2021-02-24 10:41:54 +08:00
|
|
|
} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
|
2021-01-19 23:44:42 +08:00
|
|
|
if (sym.isInGot())
|
2021-04-03 06:46:18 +08:00
|
|
|
return in.tlvPointers->addr + sym.gotIndex * target->wordSize;
|
2021-01-19 23:44:42 +08:00
|
|
|
assert(isa<Defined>(&sym));
|
|
|
|
}
|
|
|
|
return sym.getVA();
|
|
|
|
}
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
void InputSection::writeTo(uint8_t *buf) {
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
assert(!shouldOmitFromOutput());
|
|
|
|
|
[lld-macho] Ensure __bss sections we output have file offset of zero
Summary:
llvm-mc emits `__bss` sections with an offset of zero, but we weren't expecting
that in our input, so we were copying non-zero data from the start of the file and
putting it in `__bss`, with obviously undesirable runtime results. (It appears that
the kernel will copy those nonzero bytes as long as the offset is nonzero, regardless
of whether S_ZERO_FILL is set.)
I debated on whether to make a special ZeroFillSection -- separate from a
regular InputSection -- but it seemed like too much work for now. But I'm happy
to refactor if anyone feels strongly about having it as a separate class.
Depends on D80857.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80859
2020-06-14 11:00:36 +08:00
|
|
|
if (getFileSize() == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
memcpy(buf, data.data(), data.size());
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-01-19 23:44:42 +08:00
|
|
|
for (size_t i = 0; i < relocs.size(); i++) {
|
2020-09-27 04:00:22 +08:00
|
|
|
const Reloc &r = relocs[i];
|
|
|
|
uint8_t *loc = buf + r.offset;
|
2020-09-13 11:45:00 +08:00
|
|
|
uint64_t referentVA = 0;
|
2021-03-12 02:28:13 +08:00
|
|
|
if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
|
|
|
|
const Symbol *fromSym = r.referent.get<Symbol *>();
|
2021-04-21 04:58:06 +08:00
|
|
|
const Reloc &minuend = relocs[++i];
|
|
|
|
uint64_t minuendVA;
|
|
|
|
if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>())
|
|
|
|
minuendVA = toSym->getVA();
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
else {
|
|
|
|
auto *referentIsec = minuend.referent.get<InputSection *>();
|
|
|
|
assert(!referentIsec->shouldOmitFromOutput());
|
|
|
|
minuendVA = referentIsec->getVA();
|
|
|
|
}
|
2021-04-21 04:58:06 +08:00
|
|
|
referentVA = minuendVA - fromSym->getVA() + minuend.addend;
|
2021-01-19 23:44:42 +08:00
|
|
|
} else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
|
|
|
|
if (target->hasAttr(r.type, RelocAttrBits::LOAD) &&
|
|
|
|
!referentSym->isInGot())
|
|
|
|
target->relaxGotLoad(loc, r.type);
|
|
|
|
referentVA = resolveSymbolVA(loc, *referentSym, r.type);
|
2020-08-08 02:04:52 +08:00
|
|
|
|
|
|
|
if (isThreadLocalVariables(flags)) {
|
2021-01-09 07:47:40 +08:00
|
|
|
// References from thread-local variable sections are treated as offsets
|
|
|
|
// relative to the start of the thread-local data memory area, which
|
|
|
|
// is initialized via copying all the TLV data sections (which are all
|
|
|
|
// contiguous).
|
2021-01-14 05:32:40 +08:00
|
|
|
if (isa<Defined>(referentSym))
|
2021-01-09 07:47:40 +08:00
|
|
|
referentVA -= firstTLVDataSection->addr;
|
2020-08-08 02:04:52 +08:00
|
|
|
}
|
2020-09-13 11:45:00 +08:00
|
|
|
} else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
assert(!referentIsec->shouldOmitFromOutput());
|
2020-09-13 11:45:00 +08:00
|
|
|
referentVA = referentIsec->getVA();
|
2020-08-08 02:04:52 +08:00
|
|
|
}
|
2021-04-21 04:58:06 +08:00
|
|
|
target->relocateOne(loc, r, referentVA + r.addend, getVA() + r.offset);
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
2020-08-13 10:50:09 +08:00
|
|
|
|
2020-12-02 06:45:13 +08:00
|
|
|
bool macho::isCodeSection(InputSection *isec) {
|
2021-03-12 02:28:08 +08:00
|
|
|
uint32_t type = isec->flags & SECTION_TYPE;
|
2020-12-02 06:45:13 +08:00
|
|
|
if (type != S_REGULAR && type != S_COALESCED)
|
|
|
|
return false;
|
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
uint32_t attr = isec->flags & SECTION_ATTRIBUTES_USR;
|
2020-12-02 06:45:13 +08:00
|
|
|
if (attr == S_ATTR_PURE_INSTRUCTIONS)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (isec->segname == segment_names::text)
|
|
|
|
return StringSwitch<bool>(isec->name)
|
2021-04-28 03:22:44 +08:00
|
|
|
.Cases(section_names::textCoalNt, section_names::staticInit, true)
|
2020-12-02 06:45:13 +08:00
|
|
|
.Default(false);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
std::string lld::toString(const InputSection *isec) {
|
|
|
|
return (toString(isec->file) + ":(" + isec->name + ")").str();
|
|
|
|
}
|