2021-03-18 22:38:30 +08:00
|
|
|
//===- MapFile.cpp --------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the -map option. It shows lists in order and
|
|
|
|
// hierarchically the outputFile, arch, input files, output sections and
|
|
|
|
// symbol:
|
|
|
|
//
|
|
|
|
// # Path: test
|
|
|
|
// # Arch: x86_84
|
|
|
|
// # Object files:
|
|
|
|
// [ 0] linker synthesized
|
|
|
|
// [ 1] a.o
|
|
|
|
// # Sections:
|
|
|
|
// # Address Size Segment Section
|
|
|
|
// 0x1000005C0 0x0000004C __TEXT __text
|
|
|
|
// # Symbols:
|
|
|
|
// # Address File Name
|
|
|
|
// 0x1000005C0 [ 1] _main
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "MapFile.h"
|
|
|
|
#include "Config.h"
|
|
|
|
#include "InputFiles.h"
|
|
|
|
#include "InputSection.h"
|
|
|
|
#include "OutputSection.h"
|
|
|
|
#include "OutputSegment.h"
|
|
|
|
#include "Symbols.h"
|
|
|
|
#include "Target.h"
|
|
|
|
#include "llvm/Support/Parallel.h"
|
2021-03-26 02:39:44 +08:00
|
|
|
#include "llvm/Support/TimeProfiler.h"
|
2021-03-18 22:38:30 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::sys;
|
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
|
|
|
using SymbolMapTy = DenseMap<const InputSection *, SmallVector<Defined *, 4>>;
|
|
|
|
|
|
|
|
// Returns a map from sections to their symbols.
|
|
|
|
static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
|
|
|
|
SymbolMapTy ret;
|
|
|
|
for (Defined *dr : syms)
|
|
|
|
ret[dr->isec].push_back(dr);
|
|
|
|
|
|
|
|
// Sort symbols by address. We want to print out symbols in the
|
|
|
|
// order in the output file rather than the order they appeared
|
|
|
|
// in the input files.
|
|
|
|
for (auto &it : ret)
|
|
|
|
llvm::stable_sort(it.second, [](Defined *a, Defined *b) {
|
|
|
|
return a->getVA() < b->getVA();
|
|
|
|
});
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns a list of all symbols that we want to print out.
|
|
|
|
static std::vector<Defined *> getSymbols() {
|
|
|
|
std::vector<Defined *> v;
|
|
|
|
for (InputFile *file : inputFiles)
|
|
|
|
if (isa<ObjFile>(file))
|
|
|
|
for (Symbol *sym : file->symbols) {
|
|
|
|
if (sym == nullptr)
|
|
|
|
continue;
|
|
|
|
if (auto *d = dyn_cast<Defined>(sym))
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
if (d->isec && d->getFile() == file) {
|
|
|
|
assert(!d->isec->shouldOmitFromOutput() &&
|
|
|
|
"file->symbols should store resolved symbols");
|
2021-03-18 22:38:30 +08:00
|
|
|
v.push_back(d);
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
}
|
2021-03-18 22:38:30 +08:00
|
|
|
}
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct a map from symbols to their stringified representations.
|
|
|
|
// Demangling symbols (which is what toString() does) is slow, so
|
|
|
|
// we do that in batch using parallel-for.
|
2021-03-30 08:19:29 +08:00
|
|
|
static DenseMap<Symbol *, std::string>
|
2021-03-18 22:38:30 +08:00
|
|
|
getSymbolStrings(ArrayRef<Defined *> syms) {
|
|
|
|
std::vector<std::string> str(syms.size());
|
|
|
|
parallelForEachN(0, syms.size(), [&](size_t i) {
|
|
|
|
raw_string_ostream os(str[i]);
|
|
|
|
os << toString(*syms[i]);
|
|
|
|
});
|
|
|
|
|
2021-03-30 08:19:29 +08:00
|
|
|
DenseMap<Symbol *, std::string> ret;
|
2021-03-18 22:38:30 +08:00
|
|
|
for (size_t i = 0, e = syms.size(); i < e; ++i)
|
|
|
|
ret[syms[i]] = std::move(str[i]);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void macho::writeMapFile() {
|
|
|
|
if (config->mapFile.empty())
|
|
|
|
return;
|
|
|
|
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Write map file");
|
|
|
|
|
2021-03-18 22:38:30 +08:00
|
|
|
// Open a map file for writing.
|
|
|
|
std::error_code ec;
|
|
|
|
raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);
|
|
|
|
if (ec) {
|
|
|
|
error("cannot open " + config->mapFile + ": " + ec.message());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-30 02:35:57 +08:00
|
|
|
// Dump output path.
|
2021-03-18 22:38:30 +08:00
|
|
|
os << format("# Path: %s\n", config->outputFile.str().c_str());
|
|
|
|
|
2021-03-30 02:35:57 +08:00
|
|
|
// Dump output architecture.
|
2021-04-22 03:43:38 +08:00
|
|
|
os << format("# Arch: %s\n",
|
|
|
|
getArchitectureName(config->arch()).str().c_str());
|
2021-03-18 22:38:30 +08:00
|
|
|
|
2021-03-30 02:35:57 +08:00
|
|
|
// Dump table of object files.
|
2021-03-18 22:38:30 +08:00
|
|
|
os << "# Object files:\n";
|
|
|
|
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
|
|
|
|
uint32_t fileIndex = 1;
|
|
|
|
DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
|
|
|
|
for (InputFile *file : inputFiles) {
|
|
|
|
if (isa<ObjFile>(file)) {
|
|
|
|
os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
|
|
|
|
readerToFileOrdinal[file] = fileIndex++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect symbol info that we want to print out.
|
|
|
|
std::vector<Defined *> syms = getSymbols();
|
|
|
|
SymbolMapTy sectionSyms = getSectionSyms(syms);
|
2021-03-30 08:19:29 +08:00
|
|
|
DenseMap<Symbol *, std::string> symStr = getSymbolStrings(syms);
|
2021-03-18 22:38:30 +08:00
|
|
|
|
|
|
|
// Dump table of sections
|
|
|
|
os << "# Sections:\n";
|
|
|
|
os << "# Address\tSize \tSegment\tSection\n";
|
|
|
|
for (OutputSegment *seg : outputSegments)
|
|
|
|
for (OutputSection *osec : seg->getSections()) {
|
|
|
|
if (osec->isHidden())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(),
|
|
|
|
seg->name.str().c_str(), osec->name.str().c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dump table of symbols
|
|
|
|
os << "# Symbols:\n";
|
|
|
|
os << "# Address\t File Name\n";
|
|
|
|
for (InputSection *isec : inputSections) {
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
auto symsIt = sectionSyms.find(isec);
|
|
|
|
assert(!isec->shouldOmitFromOutput() || (symsIt == sectionSyms.end()));
|
|
|
|
if (symsIt == sectionSyms.end())
|
|
|
|
continue;
|
|
|
|
for (Symbol *sym : symsIt->second) {
|
2021-03-18 22:38:30 +08:00
|
|
|
os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
|
|
|
|
readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: when we implement -dead_strip, we should dump dead stripped symbols
|
|
|
|
}
|