llvm-project/lld/MachO/MapFile.cpp

//===- MapFile.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the -map option. It shows lists in order and
// hierarchically the outputFile, arch, input files, output sections and
// symbol:
//
// # Path: test
// # Arch: x86_84
// # Object files:
// [  0] linker synthesized
// [  1] a.o
// # Sections:
// # Address  Size      Segment  Section
// 0x1000005C0  0x0000004C  __TEXT  __text
// # Symbols:
// # Address  File  Name
// 0x1000005C0  [  1] _main
//
//===----------------------------------------------------------------------===//

#include "MapFile.h"
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "Target.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"

using namespace llvm;
using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;

using SymbolMapTy = DenseMap<const InputSection *, SmallVector<Defined *, 4>>;

// Returns a map from sections to their symbols.
static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
  SymbolMapTy ret;
  for (Defined *dr : syms)
    ret[dr->isec].push_back(dr);

  // Sort symbols by address. We want to print out symbols in the
  // order in the output file rather than the order they appeared
  // in the input files.
  for (auto &it : ret)
    llvm::stable_sort(it.second, [](Defined *a, Defined *b) {
      return a->getVA() < b->getVA();
    });
  return ret;
}

// Returns a list of all symbols that we want to print out.
static std::vector<Defined *> getSymbols() {
  std::vector<Defined *> v;
  for (InputFile *file : inputFiles)
    if (isa<ObjFile>(file))
      for (Symbol *sym : file->symbols) {
        if (sym == nullptr)
          continue;
        if (auto *d = dyn_cast<Defined>(sym))
          if (d->isec && d->getFile() == file) {
            assert(!d->isec->shouldOmitFromOutput() &&
                   "file->symbols should store resolved symbols");
            v.push_back(d);
          }
      }
  return v;
}

// Construct a map from symbols to their stringified representations.
// Demangling symbols (which is what toString() does) is slow, so
// we do that in batch using parallel-for.
static DenseMap<Symbol *, std::string>
getSymbolStrings(ArrayRef<Defined *> syms) {
  std::vector<std::string> str(syms.size());
  parallelForEachN(0, syms.size(), [&](size_t i) {
    raw_string_ostream os(str[i]);
    os << toString(*syms[i]);
  });

  DenseMap<Symbol *, std::string> ret;
  for (size_t i = 0, e = syms.size(); i < e; ++i)
    ret[syms[i]] = std::move(str[i]);
  return ret;
}

void macho::writeMapFile() {
  if (config->mapFile.empty())
    return;

  TimeTraceScope timeScope("Write map file");

  // Open a map file for writing.
  std::error_code ec;
  raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);
  if (ec) {
    error("cannot open " + config->mapFile + ": " + ec.message());
    return;
  }

  // Dump output path.
  os << format("# Path: %s\n", config->outputFile.str().c_str());

  // Dump output architecture.
  os << format("# Arch: %s\n",
               getArchitectureName(config->arch()).str().c_str());

  // Dump table of object files.
  os << "# Object files:\n";
  os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
  uint32_t fileIndex = 1;
  DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
  for (InputFile *file : inputFiles) {
    if (isa<ObjFile>(file)) {
      os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
      readerToFileOrdinal[file] = fileIndex++;
    }
  }

  // Collect symbol info that we want to print out.
  std::vector<Defined *> syms = getSymbols();
  SymbolMapTy sectionSyms = getSectionSyms(syms);
  DenseMap<Symbol *, std::string> symStr = getSymbolStrings(syms);

  // Dump table of sections
  os << "# Sections:\n";
  os << "# Address\tSize    \tSegment\tSection\n";
  for (OutputSegment *seg : outputSegments)
    for (OutputSection *osec : seg->getSections()) {
      if (osec->isHidden())
        continue;

      os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(),
                   seg->name.str().c_str(), osec->name.str().c_str());
    }

  // Dump table of symbols
  os << "# Symbols:\n";
  os << "# Address\t    File  Name\n";
  for (InputSection *isec : inputSections) {
    auto symsIt = sectionSyms.find(isec);
    assert(!isec->shouldOmitFromOutput() || (symsIt == sectionSyms.end()));
    if (symsIt == sectionSyms.end())
      continue;
    for (Symbol *sym : symsIt->second) {
      os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
                   readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
    }
  }

  // TODO: when we implement -dead_strip, we should dump dead stripped symbols
}
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`//===- MapFile.cpp --------------------------------------------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file implements the -map option. It shows lists in order and`
			`// hierarchically the outputFile, arch, input files, output sections and`
			`// symbol:`
			`//`
			`// # Path: test`
			`// # Arch: x86_84`
			`// # Object files:`
			`// [ 0] linker synthesized`
			`// [ 1] a.o`
			`// # Sections:`
			`// # Address Size Segment Section`
			`// 0x1000005C0 0x0000004C __TEXT __text`
			`// # Symbols:`
			`// # Address File Name`
			`// 0x1000005C0 [ 1] _main`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "MapFile.h"`
			`#include "Config.h"`
			`#include "InputFiles.h"`
			`#include "InputSection.h"`
			`#include "OutputSection.h"`
			`#include "OutputSegment.h"`
			`#include "Symbols.h"`
			`#include "Target.h"`
			`#include "llvm/Support/Parallel.h"`
[lld-macho] Add more TimeTraceScopes I added just enough to allow us to see a top-level breakdown of time taken. This is the result of loading the time-trace output into `chrome:://tracing`: https://gist.githubusercontent.com/int3/236c723cbb4b6fa3b2d340bb6395c797/raw/ef5e8234f3fdf609bf93b50f54f4e0d9bd439403/tracing.png Reviewed By: oontvoo Differential Revision: https://reviews.llvm.org/D99311 2021-03-26 02:39:44 +08:00			`#include "llvm/Support/TimeProfiler.h"`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00
			`using namespace llvm;`
			`using namespace llvm::sys;`
			`using namespace lld;`
			`using namespace lld::macho;`

			`using SymbolMapTy = DenseMap<const InputSection , SmallVector<Defined , 4>>;`

			`// Returns a map from sections to their symbols.`
			`static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {`
			`SymbolMapTy ret;`
			`for (Defined *dr : syms)`
			`ret[dr->isec].push_back(dr);`

			`// Sort symbols by address. We want to print out symbols in the`
			`// order in the output file rather than the order they appeared`
			`// in the input files.`
			`for (auto &it : ret)`
			`llvm::stable_sort(it.second, [](Defined a, Defined b) {`
			`return a->getVA() < b->getVA();`
			`});`
			`return ret;`
			`}`

			`// Returns a list of all symbols that we want to print out.`
			`static std::vector<Defined *> getSymbols() {`
			`std::vector<Defined *> v;`
			`for (InputFile *file : inputFiles)`
			`if (isa<ObjFile>(file))`
			`for (Symbol *sym : file->symbols) {`
			`if (sym == nullptr)`
			`continue;`
			`if (auto *d = dyn_cast<Defined>(sym))`
[lld/mac] Write every weak symbol only once in the output Before this, if an inline function was defined in several input files, lld would write each copy of the inline function the output. With this patch, it only writes one copy. Reduces the size of Chromium Framework from 378MB to 345MB (compared to 290MB linked with ld64, which also does dead-stripping, which we don't do yet), and makes linking it faster: N Min Max Median Avg Stddev x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097 + 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012 Difference at 95.0% confidence -0.172162 +/- 0.083847 -4.14165% +/- 2.01709% (Student's t, pooled s = 0.0892373) Implementation-wise, when merging two weak symbols, this sets a "canOmitFromOutput" on the InputSection belonging to the weak symbol not put in the symbol table. We then don't write InputSections that have this set, as long as they are not referenced from other symbols. (This happens e.g. for object files that don't set .subsections_via_symbols or that use .alt_entry.) Some restrictions: - not yet done for bitcode inputs - no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) -- Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs) (that is, catch block unwind information) and Personality Routines associated with weak functions still not stripped. This is wasteful, but harmless. - However, this does strip weaks from __unwind_info (which is needed for correctness and not just for size) - This nopes out on InputSections that are referenced form more than one symbol (eg from .alt_entry) for now Things that work based on symbols Just Work: - map files (change in MapFile.cpp is no-op and not needed; I just found it a bit more explicit) - exports Things that work with inputSections need to explicitly check if an inputSection is written (e.g. unwind info). This patch is useful in itself, but it's also likely also a useful foundation for dead_strip. I used to have a "canoncialRepresentative" pointer on InputSection instead of just the bool, which would be handy for ICF too. But I ended up not needing it for this patch, so I removed that again for now. Differential Revision: https://reviews.llvm.org/D102076 2021-05-07 02:47:57 +08:00			`if (d->isec && d->getFile() == file) {`
			`assert(!d->isec->shouldOmitFromOutput() &&`
			`"file->symbols should store resolved symbols");`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`v.push_back(d);`
[lld/mac] Write every weak symbol only once in the output Before this, if an inline function was defined in several input files, lld would write each copy of the inline function the output. With this patch, it only writes one copy. Reduces the size of Chromium Framework from 378MB to 345MB (compared to 290MB linked with ld64, which also does dead-stripping, which we don't do yet), and makes linking it faster: N Min Max Median Avg Stddev x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097 + 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012 Difference at 95.0% confidence -0.172162 +/- 0.083847 -4.14165% +/- 2.01709% (Student's t, pooled s = 0.0892373) Implementation-wise, when merging two weak symbols, this sets a "canOmitFromOutput" on the InputSection belonging to the weak symbol not put in the symbol table. We then don't write InputSections that have this set, as long as they are not referenced from other symbols. (This happens e.g. for object files that don't set .subsections_via_symbols or that use .alt_entry.) Some restrictions: - not yet done for bitcode inputs - no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) -- Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs) (that is, catch block unwind information) and Personality Routines associated with weak functions still not stripped. This is wasteful, but harmless. - However, this does strip weaks from __unwind_info (which is needed for correctness and not just for size) - This nopes out on InputSections that are referenced form more than one symbol (eg from .alt_entry) for now Things that work based on symbols Just Work: - map files (change in MapFile.cpp is no-op and not needed; I just found it a bit more explicit) - exports Things that work with inputSections need to explicitly check if an inputSection is written (e.g. unwind info). This patch is useful in itself, but it's also likely also a useful foundation for dead_strip. I used to have a "canoncialRepresentative" pointer on InputSection instead of just the bool, which would be handy for ICF too. But I ended up not needing it for this patch, so I removed that again for now. Differential Revision: https://reviews.llvm.org/D102076 2021-05-07 02:47:57 +08:00			`}`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`}`
			`return v;`
			`}`

			`// Construct a map from symbols to their stringified representations.`
			`// Demangling symbols (which is what toString() does) is slow, so`
			`// we do that in batch using parallel-for.`
[lld-macho][NFC] Drop unnecessary macho:: namespace prefix on unambiguous references to Symbol Within `lld/macho/`, only `InputFiles.cpp` and `Symbols.h` require the `macho::` namespace qualifier to disambiguate references to `class Symbol`. Add braces to outer `for` of a 5-level single-line `if`/`for` nest. Differential Revision: https://reviews.llvm.org/D99555 2021-03-30 08:19:29 +08:00			`static DenseMap<Symbol *, std::string>`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`getSymbolStrings(ArrayRef<Defined *> syms) {`
			`std::vector<std::string> str(syms.size());`
			`parallelForEachN(0, syms.size(), [&](size_t i) {`
			`raw_string_ostream os(str[i]);`
			`os << toString(*syms[i]);`
			`});`

[lld-macho][NFC] Drop unnecessary macho:: namespace prefix on unambiguous references to Symbol Within `lld/macho/`, only `InputFiles.cpp` and `Symbols.h` require the `macho::` namespace qualifier to disambiguate references to `class Symbol`. Add braces to outer `for` of a 5-level single-line `if`/`for` nest. Differential Revision: https://reviews.llvm.org/D99555 2021-03-30 08:19:29 +08:00			`DenseMap<Symbol *, std::string> ret;`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`for (size_t i = 0, e = syms.size(); i < e; ++i)`
			`ret[syms[i]] = std::move(str[i]);`
			`return ret;`
			`}`

			`void macho::writeMapFile() {`
			`if (config->mapFile.empty())`
			`return;`

[lld-macho] Add more TimeTraceScopes I added just enough to allow us to see a top-level breakdown of time taken. This is the result of loading the time-trace output into `chrome:://tracing`: https://gist.githubusercontent.com/int3/236c723cbb4b6fa3b2d340bb6395c797/raw/ef5e8234f3fdf609bf93b50f54f4e0d9bd439403/tracing.png Reviewed By: oontvoo Differential Revision: https://reviews.llvm.org/D99311 2021-03-26 02:39:44 +08:00			`TimeTraceScope timeScope("Write map file");`

[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`// Open a map file for writing.`
			`std::error_code ec;`
			`raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);`
			`if (ec) {`
			`error("cannot open " + config->mapFile + ": " + ec.message());`
			`return;`
			`}`

fix comment typo to cycle bots 2021-03-30 02:35:57 +08:00			`// Dump output path.`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`os << format("# Path: %s\n", config->outputFile.str().c_str());`

fix comment typo to cycle bots 2021-03-30 02:35:57 +08:00			`// Dump output architecture.`
[lld-macho][nfc] Add accessors for commonly-used PlatformInfo fields As discussed here: https://reviews.llvm.org/D100523#inline-951543 Reviewed By: #lld-macho, thakis, alexshap Differential Revision: https://reviews.llvm.org/D100978 2021-04-22 03:43:38 +08:00			`os << format("# Arch: %s\n",`
			`getArchitectureName(config->arch()).str().c_str());`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00
fix comment typo to cycle bots 2021-03-30 02:35:57 +08:00			`// Dump table of object files.`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`os << "# Object files:\n";`
			`os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");`
			`uint32_t fileIndex = 1;`
			`DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;`
			`for (InputFile *file : inputFiles) {`
			`if (isa<ObjFile>(file)) {`
			`os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());`
			`readerToFileOrdinal[file] = fileIndex++;`
			`}`
			`}`

			`// Collect symbol info that we want to print out.`
			`std::vector<Defined *> syms = getSymbols();`
			`SymbolMapTy sectionSyms = getSectionSyms(syms);`
[lld-macho][NFC] Drop unnecessary macho:: namespace prefix on unambiguous references to Symbol Within `lld/macho/`, only `InputFiles.cpp` and `Symbols.h` require the `macho::` namespace qualifier to disambiguate references to `class Symbol`. Add braces to outer `for` of a 5-level single-line `if`/`for` nest. Differential Revision: https://reviews.llvm.org/D99555 2021-03-30 08:19:29 +08:00			`DenseMap<Symbol *, std::string> symStr = getSymbolStrings(syms);`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00
			`// Dump table of sections`
			`os << "# Sections:\n";`
			`os << "# Address\tSize \tSegment\tSection\n";`
			`for (OutputSegment *seg : outputSegments)`
			`for (OutputSection *osec : seg->getSections()) {`
			`if (osec->isHidden())`
			`continue;`

			`os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(),`
			`seg->name.str().c_str(), osec->name.str().c_str());`
			`}`

			`// Dump table of symbols`
			`os << "# Symbols:\n";`
			`os << "# Address\t File Name\n";`
			`for (InputSection *isec : inputSections) {`
[lld/mac] Write every weak symbol only once in the output Before this, if an inline function was defined in several input files, lld would write each copy of the inline function the output. With this patch, it only writes one copy. Reduces the size of Chromium Framework from 378MB to 345MB (compared to 290MB linked with ld64, which also does dead-stripping, which we don't do yet), and makes linking it faster: N Min Max Median Avg Stddev x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097 + 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012 Difference at 95.0% confidence -0.172162 +/- 0.083847 -4.14165% +/- 2.01709% (Student's t, pooled s = 0.0892373) Implementation-wise, when merging two weak symbols, this sets a "canOmitFromOutput" on the InputSection belonging to the weak symbol not put in the symbol table. We then don't write InputSections that have this set, as long as they are not referenced from other symbols. (This happens e.g. for object files that don't set .subsections_via_symbols or that use .alt_entry.) Some restrictions: - not yet done for bitcode inputs - no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) -- Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs) (that is, catch block unwind information) and Personality Routines associated with weak functions still not stripped. This is wasteful, but harmless. - However, this does strip weaks from __unwind_info (which is needed for correctness and not just for size) - This nopes out on InputSections that are referenced form more than one symbol (eg from .alt_entry) for now Things that work based on symbols Just Work: - map files (change in MapFile.cpp is no-op and not needed; I just found it a bit more explicit) - exports Things that work with inputSections need to explicitly check if an inputSection is written (e.g. unwind info). This patch is useful in itself, but it's also likely also a useful foundation for dead_strip. I used to have a "canoncialRepresentative" pointer on InputSection instead of just the bool, which would be handy for ICF too. But I ended up not needing it for this patch, so I removed that again for now. Differential Revision: https://reviews.llvm.org/D102076 2021-05-07 02:47:57 +08:00			`auto symsIt = sectionSyms.find(isec);`
			`assert(!isec->shouldOmitFromOutput() \|\| (symsIt == sectionSyms.end()));`
			`if (symsIt == sectionSyms.end())`
			`continue;`
			`for (Symbol *sym : symsIt->second) {`
[lld-macho] implement options -map Implement command-line options -map Reviewed By: int3, #lld-macho Differential Revision: https://reviews.llvm.org/D98323 2021-03-18 22:38:30 +08:00			`os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),`
			`readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());`
			`}`
			`}`

			`// TODO: when we implement -dead_strip, we should dump dead stripped symbols`
			`}`