[lld-macho] Have ICF operate on all sections at once

ICF previously operated only within a given OutputSection. We would merge all CFStrings first, then merge all regular code sections in a second phase. This worked fine since CFStrings would never reference regular `__text` sections. However, I would like to expand ICF to merge functions that reference unwind info. Unwind info references the LSDA section, which can in turn reference the `__text` section, so we cannot perform ICF in phases. In order to have ICF operate on InputSections spanning multiple OutputSections, we need a way to distinguish InputSections that are destined for different OutputSections, so that we don't fold across section boundaries. We achieve this by creating OutputSections early, and setting `InputSection::parent` to point to them. This is what LLD-ELF does. (This change should also make it easier to implement the `section$start$` symbols.) This diff also folds InputSections w/o checking their flags, which I think is the right behavior -- if they are destined for the same OutputSection, they will have the same flags in the output (even if their input flags differ). I.e. the `parent` pointer check subsumes the `flags` check. In practice this has nearly no effect (ICF did not become any more effective on chromium_framework). I've also updated ICF.cpp's block comment to better reflect its current status. Reviewed By: #lld-macho, smeenai Differential Revision: https://reviews.llvm.org/D105641
2021-07-17 13:42:26 -04:00 · 2021-07-17 13:42:26 -04:00 · 428a7c1b38
parent 182ba8ab1b
commit 428a7c1b38
11 changed files with 99 additions and 56 deletions
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@ -24,7 +24,10 @@ using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;

+MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
+
 void ConcatOutputSection::addInput(ConcatInputSection *input) {
+  assert(input->parent == this);
  if (inputs.empty()) {
    align = input->align;
    flags = input->getFlags();
@ -33,7 +36,6 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
    finalizeFlags(input);
  }
  inputs.push_back(input);
-  input->parent = this;
 }

 // Branch-range extension can be implemented in two ways, either through ...
@ -357,3 +359,22 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) {
    break;
  }
 }
+
+ConcatOutputSection *
+ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
+  NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
+  ConcatOutputSection *&osec = concatOutputSections[names];
+  if (!osec)
+    osec = make<ConcatOutputSection>(names.second);
+  return osec;
+}
+
+NamePair macho::maybeRenameSection(NamePair key) {
+  auto newNames = config->sectionRenameMap.find(key);
+  if (newNames != config->sectionRenameMap.end())
+    return newNames->second;
+  auto newName = config->segmentRenameMap.find(key.first);
+  if (newName != config->segmentRenameMap.end())
+    return std::make_pair(newName->second, key.second);
+  return key;
+}
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@ -31,6 +31,7 @@ public:

  const ConcatInputSection *firstSection() const { return inputs.front(); }
  const ConcatInputSection *lastSection() const { return inputs.back(); }
+  bool isNeeded() const override { return !inputs.empty(); }

  // These accessors will only be valid after finalizing the section
  uint64_t getSize() const override { return size; }
@ -50,6 +51,8 @@ public:
    return sec->kind() == ConcatKind;
  }

+  static ConcatOutputSection *getOrCreateForInput(const InputSection *);
+
 private:
  void finalizeFlags(InputSection *input);

@ -79,6 +82,12 @@ struct ThunkInfo {
  uint8_t sequence = 0;        // how many thunks created so-far?
 };

+NamePair maybeRenameSection(NamePair key);
+
+// Output sections are added to output segments in iteration order
+// of ConcatOutputSection, so must have deterministic iteration order.
+extern llvm::MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
+
 extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;

 } // namespace macho
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@ -577,6 +577,7 @@ static void compileBitcodeFiles() {
 // any CommonSymbols.
 static void replaceCommonSymbols() {
  TimeTraceScope timeScope("Replace common symbols");
+  ConcatOutputSection *osec = nullptr;
  for (Symbol *sym : symtab->getSymbols()) {
    auto *common = dyn_cast<CommonSymbol>(sym);
    if (common == nullptr)
@ -589,6 +590,9 @@ static void replaceCommonSymbols() {
    auto *isec = make<ConcatInputSection>(
        segment_names::data, section_names::common, common->getFile(), data,
        common->align, S_ZEROFILL);
+    if (!osec)
+      osec = ConcatOutputSection::getOrCreateForInput(isec);
+    isec->parent = osec;
    inputSections.push_back(isec);

    // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
@ -1037,6 +1041,7 @@ static void gatherInputSections() {
  int inputOrder = 0;
  for (const InputFile *file : inputFiles) {
    for (const SubsectionMap &map : file->subsections) {
+      ConcatOutputSection *osec = nullptr;
      for (const SubsectionEntry &entry : map) {
        if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
          if (isec->isCoalescedWeak())
@ -1047,6 +1052,9 @@ static void gatherInputSections() {
            continue;
          }
          isec->outSecOff = inputOrder++;
+          if (!osec)
+            osec = ConcatOutputSection::getOrCreateForInput(isec);
+          isec->parent = osec;
          inputSections.push_back(isec);
        } else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
          if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@ -52,22 +52,18 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
 //
 // Summary of segments & sections:
 //
-// Since folding never occurs across output-section boundaries,
-// ConcatOutputSection is the natural input for ICF.
-//
 // The __TEXT segment is readonly at the MMU. Some sections are already
 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
 // synthetic and inherently free of duplicates (__TEXT,__stubs &
-// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
-// could work on __TEXT,__concat, but doing so induces many test failures.
+// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
+// because doing so induces many test failures.
 //
 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
 // thus ineligible for ICF.
 //
 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
-// the application after dyld applies fixups to pointer data. Some sections are
-// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
-// (__DATA_CONST,__got). There are no ICF opportunities here.
+// the application after dyld applies fixups to pointer data. We currently
+// fold only the __DATA_CONST,__cfstring section.
 //
 // The __DATA segment is read/write at the MMU, and as application-writeable
 // data, none of its sections are eligible for ICF.
@ -84,12 +80,13 @@ static std::atomic<bool> icfRepeat{false};
 // Compare everything except the relocation referents
 static bool equalsConstant(const ConcatInputSection *ia,
                           const ConcatInputSection *ib) {
+  // We can only fold within the same OutputSection.
+  if (ia->parent != ib->parent)
+    return false;
  if (ia->data.size() != ib->data.size())
    return false;
  if (ia->data != ib->data)
    return false;
-  if (ia->getFlags() != ib->getFlags())
-    return false;
  if (ia->relocs.size() != ib->relocs.size())
    return false;
  auto f = [&](const Reloc &ra, const Reloc &rb) {
@ -323,8 +320,6 @@ void macho::foldIdenticalSections() {
  // relocs to find every referenced InputSection, but that precludes easy
  // parallelization. Therefore, we hash every InputSection here where we have
  // them all accessible as simple vectors.
-  std::vector<ConcatInputSection *> codeSections;
-  std::vector<ConcatInputSection *> cfStringSections;

  // ICF can't fold functions with unwind info
  DenseSet<const InputSection *> functionsWithUnwindInfo =
@ -338,32 +333,22 @@ void macho::foldIdenticalSections() {
  // coexist with equivalence-class IDs, this is not necessary, but might help
  // someone keep the numbers straight in case we ever need to debug the
  // ICF::segregate()
+  std::vector<ConcatInputSection *> hashable;
  uint64_t icfUniqueID = inputSections.size();
  for (ConcatInputSection *isec : inputSections) {
+    // FIXME: consider non-code __text sections as hashable?
    bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
                      !isec->shouldOmitFromOutput() &&
                      !functionsWithUnwindInfo.contains(isec) &&
                      isec->isHashableForICF();
-    if (isHashable) {
-      if (isCodeSection(isec))
-        codeSections.push_back(isec);
-      else {
-        assert(isCfStringSection(isec));
-        cfStringSections.push_back(isec);
-      }
-    } else {
+    if (isHashable)
+      hashable.push_back(isec);
+    else
      isec->icfEqClass[0] = ++icfUniqueID;
-    }
  }
-  std::vector<ConcatInputSection *> hashable(codeSections);
-  hashable.insert(hashable.end(), cfStringSections.begin(),
-                  cfStringSections.end());
  parallelForEach(hashable,
                  [](ConcatInputSection *isec) { isec->hashForICF(); });
  // Now that every input section is either hashed or marked as unique, run the
  // segregation algorithm to detect foldable subsections.
-  // We dedup cfStringSections first since code sections may refer to them, but
-  // not vice-versa.
-  ICF(cfStringSections).run();
-  ICF(codeSections).run();
+  ICF(hashable).run();
 }
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//

 #include "InputSection.h"
+#include "ConcatOutputSection.h"
 #include "Config.h"
 #include "InputFiles.h"
 #include "OutputSegment.h"
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@ -15,7 +15,6 @@
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
-#include "Writer.h"

 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
@ -596,6 +595,8 @@ void StubHelperSection::setup() {

  in.got->addEntry(stubBinder);

+  in.imageLoaderCache->parent =
+      ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
  inputSections.push_back(in.imageLoaderCache);
  // Since this isn't in the symbol table or in any input file, the noDeadStrip
  // argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@ -15,6 +15,7 @@
 #include "OutputSection.h"
 #include "OutputSegment.h"
 #include "Target.h"
+#include "Writer.h"

 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@ -144,6 +144,7 @@ template <class Ptr>
 void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
  assert(isec->getSegName() == segment_names::ld &&
         isec->getName() == section_names::compactUnwind);
+  isec->parent = compactUnwindSection;
  compactUnwindSection->addInput(isec);
 }

--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@ -76,10 +76,6 @@ public:

  LCUuid *uuidCommand = nullptr;
  OutputSegment *linkEditSegment = nullptr;
-
-  // Output sections are added to output segments in iteration order
-  // of ConcatOutputSection, so must have deterministic iteration order.
-  MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
 };

 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@ -879,16 +875,6 @@ static void sortSegmentsAndSections() {
  }
 }

-NamePair macho::maybeRenameSection(NamePair key) {
-  auto newNames = config->sectionRenameMap.find(key);
-  if (newNames != config->sectionRenameMap.end())
-    return newNames->second;
-  auto newName = config->segmentRenameMap.find(key.first);
-  if (newName != config->segmentRenameMap.end())
-    return std::make_pair(newName->second, key.second);
-  return key;
-}
-
 template <class LP> void Writer::createOutputSections() {
  TimeTraceScope timeScope("Create output sections");
  // First, create hidden sections
@ -919,10 +905,7 @@ template <class LP> void Writer::createOutputSections() {
  for (ConcatInputSection *isec : inputSections) {
    if (isec->shouldOmitFromOutput())
      continue;
-    NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
-    ConcatOutputSection *&osec = concatOutputSections[names];
-    if (!osec)
-      osec = make<ConcatOutputSection>(names.second);
+    ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
    osec->addInput(isec);
    osec->inputOrder =
        std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
@ -934,7 +917,8 @@ template <class LP> void Writer::createOutputSections() {
    StringRef segname = it.first.first;
    ConcatOutputSection *osec = it.second;
    assert(segname != segment_names::ld);
-    getOrCreateOutputSegment(segname)->addOutputSection(osec);
+    if (osec->isNeeded())
+      getOrCreateOutputSegment(segname)->addOutputSection(osec);
  }

  for (SyntheticSection *ssec : syntheticSections) {
--- a/lld/MachO/Writer.h
+++ b/lld/MachO/Writer.h
@ -9,8 +9,6 @@
 #ifndef LLD_MACHO_WRITER_H
 #define LLD_MACHO_WRITER_H

-#include "Config.h"
-
 #include <cstdint>

 namespace lld {
@ -29,8 +27,6 @@ public:

 template <class LP> void writeResult();

-NamePair maybeRenameSection(NamePair key);
-
 void createSyntheticSections();

 // Add bindings for symbols that need weak or non-lazy bindings.
--- a/lld/test/MachO/icf.s
+++ b/lld/test/MachO/icf.s
@ -25,15 +25,17 @@
 # CHECK:       [[#%x,SR]]    g   F __TEXT,__text _sr2
 # CHECK:       [[#%x,MR:]]   g   F __TEXT,__text _mr1
 # CHECK:       [[#%x,MR]]    g   F __TEXT,__text _mr2
+# CHECK:       [[#%x,K1:]]   g   O __TEXT,__foo  _k1
+# CHECK:       [[#%x,A:]]    g   F __TEXT,__text _k2
 ### FIXME: Mutually-recursive functions with identical bodies (see below)
 # COM:         [[#%x,XR:]]   g   F __TEXT,__text _xr1
 # COM:         [[#%x,XR]]    g   F __TEXT,__text _xr2

 # CHECK-LABEL: Disassembly of section __TEXT,__text:
 # CHECK:       [[#%x,MAIN]] <_main>:
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
 # CHECK-NEXT:  callq 0x[[#%x,B]]  <_b>
 # CHECK-NEXT:  callq 0x[[#%x,B2]] <_b2>
 # CHECK-NEXT:  callq 0x[[#%x,C]]  <_c>
@ -44,6 +46,8 @@
 # CHECK-NEXT:  callq 0x[[#%x,H]]  <_h>
 # CHECK-NEXT:  callq 0x[[#%x,I]]  <_i>
 # CHECK-NEXT:  callq 0x[[#%x,J]]  <_j>
+# CHECK-NEXT:  callq 0x[[#%x,K1]] <_k1>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
 # CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
 # CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
 # CHECK-NEXT:  callq 0x[[#%x,MR]] <_mr2>
@ -232,8 +236,38 @@ _j:
  ret
  .cfi_endproc

+### No fold: _k1 is in a different section from _a1
+.section __TEXT,__foo
+.globl _k1
+.p2align 2
+_k1:
+  callq _d
+  mov ___nan@GOTPCREL(%rip), %rax
+  callq ___isnan
+  movabs $_abs1a, %rdx
+  movl $0, %eax
+  ret
+  nopl (%rax)
+
+### Fold: _k2 is in a section that gets renamed and output as __text
+.section __TEXT,__StaticInit
+.globl _k2
+.p2align 2
+_k2:
+  callq _d
+  mov ___nan@GOTPCREL(%rip), %rax
+  callq ___isnan
+  movabs $_abs1a, %rdx
+  movl $0, %eax
+  ret
+## For some reason, llvm-mc generates different nop encodings when adding
+## padding for __StaticInit vs __text functions. So we explicitly specify the
+## nop here to make sure this function can be folded with _a1.
+  nopl (%rax)
+
 ### Fold: Simple recursion

+.text
 .globl _sr1
 .p2align 2
 _sr1:
@ -311,6 +345,8 @@ _main:
  callq _h
  callq _i
  callq _j
+  callq _k1
+  callq _k2
  callq _sr1
  callq _sr2
  callq _mr1