llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtom...

//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

///
/// \file Converts from in-memory Atoms to in-memory normalized mach-o.
///
///                  +------------+
///                  | normalized |
///                  +------------+
///                        ^
///                        |
///                        |
///                    +-------+
///                    | Atoms |
///                    +-------+

#include "ArchHandler.h"
#include "DebugInfo.h"
#include "MachONormalizedFile.h"
#include "MachONormalizedFileBinaryUtils.h"
#include "lld/Common/LLVM.h"
#include "lld/Core/Error.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include <map>
#include <system_error>
#include <unordered_set>

using llvm::StringRef;
using llvm::isa;
using namespace llvm::MachO;
using namespace lld::mach_o::normalized;
using namespace lld;

namespace {

struct AtomInfo {
  const DefinedAtom  *atom;
  uint64_t            offsetInSection;
};

struct SectionInfo {
  SectionInfo(StringRef seg, StringRef sect, SectionType type,
              const MachOLinkingContext &ctxt, uint32_t attr,
              bool relocsToDefinedCanBeImplicit);

  StringRef                 segmentName;
  StringRef                 sectionName;
  SectionType               type;
  uint32_t                  attributes;
  uint64_t                  address;
  uint64_t                  size;
  uint16_t                  alignment;

  /// If this is set, the any relocs in this section which point to defined
  /// addresses can be implicitly generated.  This is the case for the
  /// __eh_frame section where references to the function can be implicit if the
  /// function is defined.
  bool                      relocsToDefinedCanBeImplicit;


  std::vector<AtomInfo>     atomsAndOffsets;
  uint32_t                  normalizedSectionIndex;
  uint32_t                  finalSectionIndex;
};

SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t,
                         const MachOLinkingContext &ctxt, uint32_t attrs,
                         bool relocsToDefinedCanBeImplicit)
 : segmentName(sg), sectionName(sct), type(t), attributes(attrs),
                 address(0), size(0), alignment(1),
                 relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit),
                 normalizedSectionIndex(0), finalSectionIndex(0) {
  uint16_t align = 1;
  if (ctxt.sectionAligned(segmentName, sectionName, align)) {
    alignment = align;
  }
}

struct SegmentInfo {
  SegmentInfo(StringRef name);

  StringRef                  name;
  uint64_t                   address;
  uint64_t                   size;
  uint32_t                   init_access;
  uint32_t                   max_access;
  std::vector<SectionInfo*>  sections;
  uint32_t                   normalizedSegmentIndex;
};

SegmentInfo::SegmentInfo(StringRef n)
 : name(n), address(0), size(0), init_access(0), max_access(0),
   normalizedSegmentIndex(0) {
}

class Util {
public:
  Util(const MachOLinkingContext &ctxt)
      : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr),
        _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {}
  ~Util();

  void      processDefinedAtoms(const lld::File &atomFile);
  void      processAtomAttributes(const DefinedAtom *atom);
  void      assignAtomToSection(const DefinedAtom *atom);
  void      organizeSections();
  void      assignAddressesToSections(const NormalizedFile &file);
  uint32_t  fileFlags();
  void      copySegmentInfo(NormalizedFile &file);
  void      copySectionInfo(NormalizedFile &file);
  void      updateSectionInfo(NormalizedFile &file);
  void      buildAtomToAddressMap();
  llvm::Error synthesizeDebugNotes(NormalizedFile &file);
  llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file);
  void      addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
  void      addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
  void      addExportInfo(const lld::File &, NormalizedFile &file);
  void      addSectionRelocs(const lld::File &, NormalizedFile &file);
  void      addFunctionStarts(const lld::File &, NormalizedFile &file);
  void      buildDataInCodeArray(const lld::File &, NormalizedFile &file);
  void      addDependentDylibs(const lld::File &, NormalizedFile &file);
  void      copyEntryPointAddress(NormalizedFile &file);
  void      copySectionContent(NormalizedFile &file);

  bool allSourceFilesHaveMinVersions() const {
    return _allSourceFilesHaveMinVersions;
  }

  uint32_t minVersion() const {
    return _minVersion;
  }

  LoadCommandType minVersionCommandType() const {
    return _minVersionCommandType;
  }

private:
  typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection;
  typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress;

  struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; };
  typedef llvm::StringMap<DylibInfo> DylibPathToInfo;

  SectionInfo *sectionForAtom(const DefinedAtom*);
  SectionInfo *getRelocatableSection(DefinedAtom::ContentType type);
  SectionInfo *getFinalSection(DefinedAtom::ContentType type);
  void         appendAtom(SectionInfo *sect, const DefinedAtom *atom);
  SegmentInfo *segmentForName(StringRef segName);
  void         layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr);
  void         layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &);
  void         copySectionContent(SectionInfo *si, ContentBytes &content);
  uint16_t     descBits(const DefinedAtom* atom);
  int          dylibOrdinal(const SharedLibraryAtom *sa);
  void         segIndexForSection(const SectionInfo *sect,
                             uint8_t &segmentIndex, uint64_t &segmentStartAddr);
  const Atom  *targetOfLazyPointer(const DefinedAtom *lpAtom);
  const Atom  *targetOfStub(const DefinedAtom *stubAtom);
  llvm::Error getSymbolTableRegion(const DefinedAtom* atom,
                                   bool &inGlobalsRegion,
                                   SymbolScope &symbolScope);
  void         appendSection(SectionInfo *si, NormalizedFile &file);
  uint32_t     sectionIndexForAtom(const Atom *atom);
  void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
                           NormalizedFile &file);

  typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex;
  struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; };
  struct AtomSorter {
    bool operator()(const AtomAndIndex &left, const AtomAndIndex &right);
  };
  struct SegmentSorter {
    bool operator()(const SegmentInfo *left, const SegmentInfo *right);
    static unsigned weight(const SegmentInfo *);
  };
  struct TextSectionSorter {
    bool operator()(const SectionInfo *left, const SectionInfo *right);
    static unsigned weight(const SectionInfo *);
  };

  const MachOLinkingContext &_ctx;
  mach_o::ArchHandler          &_archHandler;
  llvm::BumpPtrAllocator        _allocator;
  std::vector<SectionInfo*>     _sectionInfos;
  std::vector<SegmentInfo*>     _segmentInfos;
  TypeToSection                 _sectionMap;
  std::vector<SectionInfo*>     _customSections;
  AtomToAddress                 _atomToAddress;
  DylibPathToInfo               _dylibInfo;
  const DefinedAtom            *_entryAtom;
  AtomToIndex                   _atomToSymbolIndex;
  std::vector<const Atom *>     _machHeaderAliasAtoms;
  bool                          _hasTLVDescriptors;
  bool                          _subsectionsViaSymbols;
  bool                          _allSourceFilesHaveMinVersions = true;
  LoadCommandType               _minVersionCommandType = (LoadCommandType)0;
  uint32_t                      _minVersion = 0;
  std::vector<lld::mach_o::Stab> _stabs;
};

Util::~Util() {
  // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs
  // to be deleted.
  for (SectionInfo *si : _sectionInfos) {
    // clear() destroys vector elements, but does not deallocate.
    // Instead use swap() to deallocate vector buffer.
    std::vector<AtomInfo> empty;
    si->atomsAndOffsets.swap(empty);
  }
  // The SegmentInfo structs are BumpPtr allocated, but sections needs
  // to be deleted.
  for (SegmentInfo *sgi : _segmentInfos) {
    std::vector<SectionInfo*> empty2;
    sgi->sections.swap(empty2);
  }
}

SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) {
  StringRef segmentName;
  StringRef sectionName;
  SectionType sectionType;
  SectionAttr sectionAttrs;
  bool relocsToDefinedCanBeImplicit;

  // Use same table used by when parsing .o files.
  relocatableSectionInfoForContentType(type, segmentName, sectionName,
                                       sectionType, sectionAttrs,
                                       relocsToDefinedCanBeImplicit);
  // If we already have a SectionInfo with this name, re-use it.
  // This can happen if two ContentType map to the same mach-o section.
  for (auto sect : _sectionMap) {
    if (sect.second->sectionName.equals(sectionName) &&
        sect.second->segmentName.equals(segmentName)) {
      return sect.second;
    }
  }
  // Otherwise allocate new SectionInfo object.
  auto *sect = new (_allocator)
      SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs,
                  relocsToDefinedCanBeImplicit);
  _sectionInfos.push_back(sect);
  _sectionMap[type] = sect;
  return sect;
}

#define ENTRY(seg, sect, type, atomType) \
  {seg, sect, type, DefinedAtom::atomType }

struct MachOFinalSectionFromAtomType {
  StringRef                 segmentName;
  StringRef                 sectionName;
  SectionType               sectionType;
  DefinedAtom::ContentType  atomType;
};

const MachOFinalSectionFromAtomType sectsToAtomType[] = {
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeMachHeader),
  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
  ENTRY("__TEXT", "__const",          S_4BYTE_LITERALS,   typeLiteral4),
  ENTRY("__TEXT", "__const",          S_8BYTE_LITERALS,   typeLiteral8),
  ENTRY("__TEXT", "__const",          S_16BYTE_LITERALS,  typeLiteral16),
  ENTRY("__TEXT", "__stubs",          S_SYMBOL_STUBS,     typeStub),
  ENTRY("__TEXT", "__stub_helper",    S_REGULAR,          typeStubHelper),
  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
  ENTRY("__TEXT", "__unwind_info",    S_REGULAR,          typeProcessedUnwindInfo),
  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
  ENTRY("__DATA", "__la_symbol_ptr",  S_LAZY_SYMBOL_POINTERS,
                                                          typeLazyPointer),
  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
                                                          typeInitializerPtr),
  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
                                                          typeTerminatorPtr),
  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
                                                          typeGOT),
  ENTRY("__DATA", "__nl_symbol_ptr",  S_NON_LAZY_SYMBOL_POINTERS,
                                                          typeNonLazyPointer),
  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
                                                          typeThunkTLV),
  ENTRY("__DATA", "__thread_data",    S_THREAD_LOCAL_REGULAR,
                                                          typeTLVInitialData),
  ENTRY("__DATA", "__thread_ptrs",    S_THREAD_LOCAL_VARIABLE_POINTERS,
                                                          typeTLVInitializerPtr),
  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
                                                         typeTLVInitialZeroFill),
  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
};
#undef ENTRY

SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) {
  for (auto &p : sectsToAtomType) {
    if (p.atomType != atomType)
      continue;
    SectionAttr sectionAttrs = 0;
    switch (atomType) {
    case DefinedAtom::typeMachHeader:
    case DefinedAtom::typeCode:
    case DefinedAtom::typeStub:
    case DefinedAtom::typeStubHelper:
      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
      break;
    case DefinedAtom::typeThunkTLV:
      _hasTLVDescriptors = true;
      break;
    default:
      break;
    }
    // If we already have a SectionInfo with this name, re-use it.
    // This can happen if two ContentType map to the same mach-o section.
    for (auto sect : _sectionMap) {
      if (sect.second->sectionName.equals(p.sectionName) &&
          sect.second->segmentName.equals(p.segmentName)) {
        return sect.second;
      }
    }
    // Otherwise allocate new SectionInfo object.
    auto *sect = new (_allocator) SectionInfo(
        p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs,
        /* relocsToDefinedCanBeImplicit */ false);
    _sectionInfos.push_back(sect);
    _sectionMap[atomType] = sect;
    return sect;
  }
  llvm_unreachable("content type not yet supported");
}

SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) {
  if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) {
    // Section for this atom is derived from content type.
    DefinedAtom::ContentType type = atom->contentType();
    auto pos = _sectionMap.find(type);
    if ( pos != _sectionMap.end() )
      return pos->second;
    bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
    return rMode ? getRelocatableSection(type) : getFinalSection(type);
  } else {
    // This atom needs to be in a custom section.
    StringRef customName = atom->customSectionName();
    // Look to see if we have already allocated the needed custom section.
    for(SectionInfo *sect : _customSections) {
      const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom;
      if (firstAtom->customSectionName().equals(customName)) {
        return sect;
      }
    }
    // Not found, so need to create a new custom section.
    size_t seperatorIndex = customName.find('/');
    assert(seperatorIndex != StringRef::npos);
    StringRef segName = customName.slice(0, seperatorIndex);
    StringRef sectName = customName.drop_front(seperatorIndex + 1);
    auto *sect =
        new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx,
                                     0, /* relocsToDefinedCanBeImplicit */ false);
    _customSections.push_back(sect);
    _sectionInfos.push_back(sect);
    return sect;
  }
}

void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
  // Figure out offset for atom in this section given alignment constraints.
  uint64_t offset = sect->size;
  DefinedAtom::Alignment atomAlign = atom->alignment();
  uint64_t align = atomAlign.value;
  uint64_t requiredModulus = atomAlign.modulus;
  uint64_t currentModulus = (offset % align);
  if ( currentModulus != requiredModulus ) {
    if ( requiredModulus > currentModulus )
      offset += requiredModulus-currentModulus;
    else
      offset += align+requiredModulus-currentModulus;
  }
  // Record max alignment of any atom in this section.
  if (align > sect->alignment)
    sect->alignment = atomAlign.value;
  // Assign atom to this section with this offset.
  AtomInfo ai = {atom, offset};
  sect->atomsAndOffsets.push_back(ai);
  // Update section size to include this atom.
  sect->size = offset + atom->size();
}

void Util::processDefinedAtoms(const lld::File &atomFile) {
  for (const DefinedAtom *atom : atomFile.defined()) {
    processAtomAttributes(atom);
    assignAtomToSection(atom);
  }
}

void Util::processAtomAttributes(const DefinedAtom *atom) {
  if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) {
    // If the file doesn't use subsections via symbols, then make sure we don't
    // add that flag to the final output file if we have a relocatable file.
    if (!machoFile->subsectionsViaSymbols())
      _subsectionsViaSymbols = false;

    // All the source files must have min versions for us to output an object
    // file with a min version.
    if (auto v = machoFile->minVersion())
      _minVersion = std::max(_minVersion, v);
    else
      _allSourceFilesHaveMinVersions = false;

    // If we don't have a platform load command, but one of the source files
    // does, then take the one from the file.
    if (!_minVersionCommandType)
      if (auto v = machoFile->minVersionLoadCommandKind())
        _minVersionCommandType = v;
  }
}

void Util::assignAtomToSection(const DefinedAtom *atom) {
  if (atom->contentType() == DefinedAtom::typeMachHeader) {
    _machHeaderAliasAtoms.push_back(atom);
    // Assign atom to this section with this offset.
    AtomInfo ai = {atom, 0};
    sectionForAtom(atom)->atomsAndOffsets.push_back(ai);
  } else if (atom->contentType() == DefinedAtom::typeDSOHandle)
    _machHeaderAliasAtoms.push_back(atom);
  else
    appendAtom(sectionForAtom(atom), atom);
}

SegmentInfo *Util::segmentForName(StringRef segName) {
  for (SegmentInfo *si : _segmentInfos) {
    if ( si->name.equals(segName) )
      return si;
  }
  auto *info = new (_allocator) SegmentInfo(segName);

  // Set the initial segment protection.
  if (segName.equals("__TEXT"))
    info->init_access = VM_PROT_READ | VM_PROT_EXECUTE;
  else if (segName.equals("__PAGEZERO"))
    info->init_access = 0;
  else if (segName.equals("__LINKEDIT"))
    info->init_access = VM_PROT_READ;
  else {
    // All others default to read-write
    info->init_access = VM_PROT_READ | VM_PROT_WRITE;
  }

  // Set max segment protection
  // Note, its overkill to use a switch statement here, but makes it so much
  // easier to use switch coverage to catch new cases.
  switch (_ctx.os()) {
    case lld::MachOLinkingContext::OS::unknown:
    case lld::MachOLinkingContext::OS::macOSX:
    case lld::MachOLinkingContext::OS::iOS_simulator:
      if (segName.equals("__PAGEZERO")) {
        info->max_access = 0;
        break;
      }
      // All others default to all
      info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
      break;
    case lld::MachOLinkingContext::OS::iOS:
      // iPhoneOS always uses same protection for max and initial
      info->max_access = info->init_access;
      break;
  }
  _segmentInfos.push_back(info);
  return info;
}

unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) {
 return llvm::StringSwitch<unsigned>(seg->name)
    .Case("__PAGEZERO",  1)
    .Case("__TEXT",      2)
    .Case("__DATA",      3)
    .Default(100);
}

bool Util::SegmentSorter::operator()(const SegmentInfo *left,
                                  const SegmentInfo *right) {
  return (weight(left) < weight(right));
}

unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) {
 return llvm::StringSwitch<unsigned>(sect->sectionName)
    .Case("__text",         1)
    .Case("__stubs",        2)
    .Case("__stub_helper",  3)
    .Case("__const",        4)
    .Case("__cstring",      5)
    .Case("__unwind_info",  98)
    .Case("__eh_frame",     99)
    .Default(10);
}

bool Util::TextSectionSorter::operator()(const SectionInfo *left,
                                         const SectionInfo *right) {
  return (weight(left) < weight(right));
}

void Util::organizeSections() {
  // NOTE!: Keep this in sync with assignAddressesToSections.
  switch (_ctx.outputMachOType()) {
    case llvm::MachO::MH_EXECUTE:
      // Main executables, need a zero-page segment
      segmentForName("__PAGEZERO");
      // Fall into next case.
      LLVM_FALLTHROUGH;
    case llvm::MachO::MH_DYLIB:
    case llvm::MachO::MH_BUNDLE:
      // All dynamic code needs TEXT segment to hold the load commands.
      segmentForName("__TEXT");
      break;
    default:
      break;
  }
  segmentForName("__LINKEDIT");

  // Group sections into segments.
  for (SectionInfo *si : _sectionInfos) {
    SegmentInfo *seg = segmentForName(si->segmentName);
    seg->sections.push_back(si);
  }
  // Sort segments.
  std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter());

  // Sort sections within segments.
  for (SegmentInfo *seg : _segmentInfos) {
    if (seg->name.equals("__TEXT")) {
      std::sort(seg->sections.begin(), seg->sections.end(),
                TextSectionSorter());
    }
  }

  // Record final section indexes.
  uint32_t segmentIndex = 0;
  uint32_t sectionIndex = 1;
  for (SegmentInfo *seg : _segmentInfos) {
    seg->normalizedSegmentIndex = segmentIndex++;
    for (SectionInfo *sect : seg->sections)
      sect->finalSectionIndex = sectionIndex++;
  }
}

void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) {
  seg->address = addr;
  for (SectionInfo *sect : seg->sections) {
    sect->address = llvm::alignTo(addr, sect->alignment);
    addr = sect->address + sect->size;
  }
  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
}

// __TEXT segment lays out backwards so padding is at front after load commands.
void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg,
                                                               uint64_t &addr) {
  seg->address = addr;
  // Walks sections starting at end to calculate padding for start.
  int64_t taddr = 0;
  for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) {
    SectionInfo *sect = *it;
    taddr -= sect->size;
    taddr = taddr & (0 - sect->alignment);
  }
  int64_t padding = taddr - hlcSize;
  while (padding < 0)
    padding += _ctx.pageSize();
  // Start assigning section address starting at padded offset.
  addr += (padding + hlcSize);
  for (SectionInfo *sect : seg->sections) {
    sect->address = llvm::alignTo(addr, sect->alignment);
    addr = sect->address + sect->size;
  }
  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
}

void Util::assignAddressesToSections(const NormalizedFile &file) {
  // NOTE!: Keep this in sync with organizeSections.
  size_t hlcSize = headerAndLoadCommandsSize(file,
                                      _ctx.generateFunctionStartsLoadCommand());
  uint64_t address = 0;
  for (SegmentInfo *seg : _segmentInfos) {
    if (seg->name.equals("__PAGEZERO")) {
      seg->size = _ctx.pageZeroSize();
      address += seg->size;
    }
    else if (seg->name.equals("__TEXT")) {
      // _ctx.baseAddress()  == 0 implies it was either unspecified or
      // pageZeroSize is also 0. In either case resetting address is safe.
      address = _ctx.baseAddress() ? _ctx.baseAddress() : address;
      layoutSectionsInTextSegment(hlcSize, seg, address);
    } else
      layoutSectionsInSegment(seg, address);

    address = llvm::alignTo(address, _ctx.pageSize());
  }
  DEBUG_WITH_TYPE("WriterMachO-norm",
    llvm::dbgs() << "assignAddressesToSections()\n";
    for (SegmentInfo *sgi : _segmentInfos) {
      llvm::dbgs()  << "   address=" << llvm::format("0x%08llX", sgi->address)
                    << ", size="  << llvm::format("0x%08llX", sgi->size)
                    << ", segment-name='" << sgi->name
                    << "'\n";
      for (SectionInfo *si : sgi->sections) {
        llvm::dbgs()<< "      addr="  << llvm::format("0x%08llX", si->address)
                    << ", size="  << llvm::format("0x%08llX", si->size)
                    << ", section-name='" << si->sectionName
                    << "\n";
      }
    }
  );
}

void Util::copySegmentInfo(NormalizedFile &file) {
  for (SegmentInfo *sgi : _segmentInfos) {
    Segment seg;
    seg.name    = sgi->name;
    seg.address = sgi->address;
    seg.size    = sgi->size;
    seg.init_access  = sgi->init_access;
    seg.max_access  = sgi->max_access;
    file.segments.push_back(seg);
  }
}

void Util::appendSection(SectionInfo *si, NormalizedFile &file) {
   // Add new empty section to end of file.sections.
  Section temp;
  file.sections.push_back(std::move(temp));
  Section* normSect = &file.sections.back();
  // Copy fields to normalized section.
  normSect->segmentName   = si->segmentName;
  normSect->sectionName   = si->sectionName;
  normSect->type          = si->type;
  normSect->attributes    = si->attributes;
  normSect->address       = si->address;
  normSect->alignment     = si->alignment;
  // Record where normalized section is.
  si->normalizedSectionIndex = file.sections.size()-1;
}

void Util::copySectionContent(NormalizedFile &file) {
  const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);

  // Utility function for ArchHandler to find address of atom in output file.
  auto addrForAtom = [&] (const Atom &atom) -> uint64_t {
    auto pos = _atomToAddress.find(&atom);
    assert(pos != _atomToAddress.end());
    return pos->second;
  };

  auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t {
    for (const SectionInfo *sectInfo : _sectionInfos)
      for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets)
        if (atomInfo.atom == &atom)
          return sectInfo->address;
    llvm_unreachable("atom not assigned to section");
  };

  for (SectionInfo *si : _sectionInfos) {
    Section *normSect = &file.sections[si->normalizedSectionIndex];
    if (isZeroFillSection(si->type)) {
      const uint8_t *empty = nullptr;
      normSect->content = llvm::makeArrayRef(empty, si->size);
      continue;
    }
    // Copy content from atoms to content buffer for section.
    llvm::MutableArrayRef<uint8_t> sectionContent;
    if (si->size) {
      uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size);
      sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size);
      normSect->content = sectionContent;
    }
    for (AtomInfo &ai : si->atomsAndOffsets) {
      if (!ai.atom->size()) {
        assert(ai.atom->begin() == ai.atom->end() &&
               "Cannot have references without content");
        continue;
      }
      auto atomContent = sectionContent.slice(ai.offsetInSection,
                                              ai.atom->size());
      _archHandler.generateAtomContent(*ai.atom, r, addrForAtom,
                                       sectionAddrForAtom, _ctx.baseAddress(),
                                       atomContent);
    }
  }
}

void Util::copySectionInfo(NormalizedFile &file) {
  file.sections.reserve(_sectionInfos.size());
  // Write sections grouped by segment.
  for (SegmentInfo *sgi : _segmentInfos) {
    for (SectionInfo *si : sgi->sections) {
      appendSection(si, file);
    }
  }
}

void Util::updateSectionInfo(NormalizedFile &file) {
  file.sections.reserve(_sectionInfos.size());
  // sections grouped by segment.
  for (SegmentInfo *sgi : _segmentInfos) {
    Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex];
    normSeg->address = sgi->address;
    normSeg->size = sgi->size;
    for (SectionInfo *si : sgi->sections) {
      Section *normSect = &file.sections[si->normalizedSectionIndex];
      normSect->address = si->address;
    }
  }
}

void Util::copyEntryPointAddress(NormalizedFile &nFile) {
  if (!_entryAtom) {
    nFile.entryAddress = 0;
    return;
  }

  if (_ctx.outputTypeHasEntry()) {
    if (_archHandler.isThumbFunction(*_entryAtom))
      nFile.entryAddress = (_atomToAddress[_entryAtom] | 1);
    else
      nFile.entryAddress = _atomToAddress[_entryAtom];
  }
}

void Util::buildAtomToAddressMap() {
  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
                   << "assign atom addresses:\n");
  const bool lookForEntry = _ctx.outputTypeHasEntry();
  for (SectionInfo *sect : _sectionInfos) {
    for (const AtomInfo &info : sect->atomsAndOffsets) {
      _atomToAddress[info.atom] = sect->address + info.offsetInSection;
      if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) &&
          (info.atom->size() != 0) &&
          info.atom->name() == _ctx.entrySymbolName()) {
        _entryAtom = info.atom;
      }
      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
                      << "   address="
                      << llvm::format("0x%016X", _atomToAddress[info.atom])
                      << llvm::format("    0x%09lX", info.atom)
                      << ", file=#"
                      << info.atom->file().ordinal()
                      << ", atom=#"
                      << info.atom->ordinal()
                      << ", name="
                      << info.atom->name()
                      << ", type="
                      << info.atom->contentType()
                      << "\n");
    }
  }
  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
                  << "assign header alias atom addresses:\n");
  for (const Atom *atom : _machHeaderAliasAtoms) {
    _atomToAddress[atom] = _ctx.baseAddress();
#ifndef NDEBUG
    if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) {
      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
                      << "   address="
                      << llvm::format("0x%016X", _atomToAddress[atom])
                      << llvm::format("    0x%09lX", atom)
                      << ", file=#"
                      << definedAtom->file().ordinal()
                      << ", atom=#"
                      << definedAtom->ordinal()
                      << ", name="
                      << definedAtom->name()
                      << ", type="
                      << definedAtom->contentType()
                      << "\n");
    } else {
      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
                      << "   address="
                      << llvm::format("0x%016X", _atomToAddress[atom])
                      << " atom=" << atom
                      << " name=" << atom->name() << "\n");
    }
#endif
  }
}

llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) {

  // Bail out early if we don't need to generate a debug map.
  if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap)
    return llvm::Error::success();

  std::vector<const DefinedAtom*> atomsNeedingDebugNotes;
  std::set<const mach_o::MachOFile*> filesWithStabs;
  bool objFileHasDwarf = false;
  const File *objFile = nullptr;

  for (SectionInfo *sect : _sectionInfos) {
    for (const AtomInfo &info : sect->atomsAndOffsets) {
      if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) {

        // FIXME: No stabs/debug-notes for symbols that wouldn't be in the
        //        symbol table.
        // FIXME: No stabs/debug-notes for kernel dtrace probes.

        if (atom->contentType() == DefinedAtom::typeCFI ||
            atom->contentType() == DefinedAtom::typeCString)
          continue;

        // Whenever we encounter a new file, update the 'objfileHasDwarf' flag.
        if (&info.atom->file() != objFile) {
          objFileHasDwarf = false;
          if (const mach_o::MachOFile *atomFile =
              dyn_cast<mach_o::MachOFile>(&info.atom->file())) {
            if (atomFile->debugInfo()) {
              if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo()))
                objFileHasDwarf = true;
              else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo()))
                filesWithStabs.insert(atomFile);
            }
          }
        }

        // If this atom is from a file that needs dwarf, add it to the list.
        if (objFileHasDwarf)
          atomsNeedingDebugNotes.push_back(info.atom);
      }
    }
  }

  // Sort atoms needing debug notes by file ordinal, then atom ordinal.
  std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(),
            [](const DefinedAtom *lhs, const DefinedAtom *rhs) {
              if (lhs->file().ordinal() != rhs->file().ordinal())
                return (lhs->file().ordinal() < rhs->file().ordinal());
              return (lhs->ordinal() < rhs->ordinal());
            });

  // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \
  //        linker which add N_AST stab entry to output
  // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64.

  StringRef oldFileName = "";
  StringRef oldDirPath = "";
  bool wroteStartSO = false;
  std::unordered_set<std::string> seenFiles;
  for (const DefinedAtom *atom : atomsNeedingDebugNotes) {
    const auto &atomFile = cast<mach_o::MachOFile>(atom->file());
    assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo())
           && "file for atom needing debug notes does not contain dwarf");
    auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo());

    auto &tu = dwarf.translationUnitSource();
    StringRef newFileName = tu.name;
    StringRef newDirPath = tu.path;

    // Add an SO whenever the TU source file changes.
    if (newFileName != oldFileName || newDirPath != oldDirPath) {
      // Translation unit change, emit ending SO
      if (oldFileName != "")
        _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));

      oldFileName = newFileName;
      oldDirPath = newDirPath;

      // If newDirPath doesn't end with a '/' we need to add one:
      if (newDirPath.back() != '/') {
        char *p =
          file.ownedAllocations.Allocate<char>(newDirPath.size() + 2);
        memcpy(p, newDirPath.data(), newDirPath.size());
        p[newDirPath.size()] = '/';
        p[newDirPath.size() + 1] = '\0';
        newDirPath = p;
      }

      // New translation unit, emit start SOs:
      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath));
      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName));

      // Synthesize OSO for start of file.
      char *fullPath = nullptr;
      {
        SmallString<1024> pathBuf(atomFile.path());
        if (auto EC = llvm::sys::fs::make_absolute(pathBuf))
          return llvm::errorCodeToError(EC);
        fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1);
        memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1);
      }

      // Get mod time.
      uint32_t modTime = 0;
      llvm::sys::fs::file_status stat;
      if (!llvm::sys::fs::status(fullPath, stat))
        if (llvm::sys::fs::exists(stat))
          modTime = llvm::sys::toTimeT(stat.getLastModificationTime());

      _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1,
                                    modTime, fullPath));
      // <rdar://problem/6337329> linker should put cpusubtype in n_sect field
      // of nlist entry for N_OSO debug note entries.
      wroteStartSO = true;
    }

    if (atom->contentType() == DefinedAtom::typeCode) {
      // Synthesize BNSYM and start FUN stabs.
      _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, ""));
      _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name()));
      // Synthesize any SOL stabs needed
      // FIXME: add SOL stabs.
      _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0,
                                    atom->rawContent().size(), ""));
      _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0,
                                    atom->rawContent().size(), ""));
    } else {
      if (atom->scope() == Atom::scopeTranslationUnit)
        _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name()));
      else
        _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name()));
    }
  }

  // Emit ending SO if necessary.
  if (wroteStartSO)
    _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));

  // Copy any stabs from .o file.
  for (const auto *objFile : filesWithStabs) {
    const auto &stabsList =
      cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs();
    for (auto &stab : stabsList) {
      // FIXME: Drop stabs whose atoms have been dead-stripped.
      _stabs.push_back(stab);
    }
  }

  return llvm::Error::success();
}

uint16_t Util::descBits(const DefinedAtom* atom) {
  uint16_t desc = 0;
  switch (atom->merge()) {
  case lld::DefinedAtom::mergeNo:
  case lld::DefinedAtom::mergeAsTentative:
    break;
  case lld::DefinedAtom::mergeAsWeak:
  case lld::DefinedAtom::mergeAsWeakAndAddressUsed:
    desc |= N_WEAK_DEF;
    break;
  case lld::DefinedAtom::mergeSameNameAndSize:
  case lld::DefinedAtom::mergeByLargestSection:
  case lld::DefinedAtom::mergeByContent:
    llvm_unreachable("Unsupported DefinedAtom::merge()");
    break;
  }
  if (atom->contentType() == lld::DefinedAtom::typeResolver)
    desc |= N_SYMBOL_RESOLVER;
  if (atom->contentType() == lld::DefinedAtom::typeMachHeader)
    desc |= REFERENCED_DYNAMICALLY;
  if (_archHandler.isThumbFunction(*atom))
    desc |= N_ARM_THUMB_DEF;
  if (atom->deadStrip() == DefinedAtom::deadStripNever &&
      _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) {
    if ((atom->contentType() != DefinedAtom::typeInitializerPtr)
     && (atom->contentType() != DefinedAtom::typeTerminatorPtr))
    desc |= N_NO_DEAD_STRIP;
  }
  return desc;
}

bool Util::AtomSorter::operator()(const AtomAndIndex &left,
                                  const AtomAndIndex &right) {
  return (left.atom->name().compare(right.atom->name()) < 0);
}

llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom,
                                       bool &inGlobalsRegion,
                                       SymbolScope &scope) {
  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
  switch (atom->scope()) {
  case Atom::scopeTranslationUnit:
    scope = 0;
    inGlobalsRegion = false;
    return llvm::Error::success();
  case Atom::scopeLinkageUnit:
    if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::exported) &&
        _ctx.exportSymbolNamed(atom->name())) {
      return llvm::make_error<GenericError>(
                          Twine("cannot export hidden symbol ") + atom->name());
    }
    if (rMode) {
      if (_ctx.keepPrivateExterns()) {
        // -keep_private_externs means keep in globals region as N_PEXT.
        scope = N_PEXT | N_EXT;
        inGlobalsRegion = true;
        return llvm::Error::success();
      }
    }
    // scopeLinkageUnit symbols are no longer global once linked.
    scope = N_PEXT;
    inGlobalsRegion = false;
    return llvm::Error::success();
  case Atom::scopeGlobal:
    if (_ctx.exportRestrictMode()) {
      if (_ctx.exportSymbolNamed(atom->name())) {
        scope = N_EXT;
        inGlobalsRegion = true;
        return llvm::Error::success();
      } else {
        scope = N_PEXT;
        inGlobalsRegion = false;
        return llvm::Error::success();
      }
    } else {
      scope = N_EXT;
      inGlobalsRegion = true;
      return llvm::Error::success();
    }
    break;
  }
  llvm_unreachable("atom->scope() unknown enum value");
}


llvm::Error Util::addSymbols(const lld::File &atomFile,
                             NormalizedFile &file) {
  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
  // Mach-O symbol table has four regions: stabs, locals, globals, undefs.

  // Add all stabs.
  for (auto &stab : _stabs) {
    lld::mach_o::normalized::Symbol sym;
    sym.type = static_cast<NListType>(stab.type);
    sym.scope = 0;
    sym.sect = stab.other;
    sym.desc = stab.desc;
    if (stab.atom)
      sym.value = _atomToAddress[stab.atom];
    else
      sym.value = stab.value;
    sym.name = stab.str;
    file.stabsSymbols.push_back(sym);
  }

  // Add all local (non-global) symbols in address order
  std::vector<AtomAndIndex> globals;
  globals.reserve(512);
  for (SectionInfo *sect : _sectionInfos) {
    for (const AtomInfo &info : sect->atomsAndOffsets) {
      const DefinedAtom *atom = info.atom;
      if (!atom->name().empty()) {
        SymbolScope symbolScope;
        bool inGlobalsRegion;
        if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){
          return ec;
        }
        if (inGlobalsRegion) {
          AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope };
          globals.push_back(ai);
        } else {
          lld::mach_o::normalized::Symbol sym;
          sym.name  = atom->name();
          sym.type  = N_SECT;
          sym.scope = symbolScope;
          sym.sect  = sect->finalSectionIndex;
          sym.desc  = descBits(atom);
          sym.value = _atomToAddress[atom];
          _atomToSymbolIndex[atom] = file.localSymbols.size();
          file.localSymbols.push_back(sym);
        }
      } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){
        // Create 'Lxxx' labels for anonymous atoms if archHandler says so.
        static unsigned tempNum = 1;
        char tmpName[16];
        sprintf(tmpName, "L%04u", tempNum++);
        StringRef tempRef(tmpName);
        lld::mach_o::normalized::Symbol sym;
        sym.name  = tempRef.copy(file.ownedAllocations);
        sym.type  = N_SECT;
        sym.scope = 0;
        sym.sect  = sect->finalSectionIndex;
        sym.desc  = 0;
        sym.value = _atomToAddress[atom];
        _atomToSymbolIndex[atom] = file.localSymbols.size();
        file.localSymbols.push_back(sym);
      }
    }
  }

  // Sort global symbol alphabetically, then add to symbol table.
  std::sort(globals.begin(), globals.end(), AtomSorter());
  const uint32_t globalStartIndex = file.localSymbols.size();
  for (AtomAndIndex &ai : globals) {
    lld::mach_o::normalized::Symbol sym;
    sym.name  = ai.atom->name();
    sym.type  = N_SECT;
    sym.scope = ai.scope;
    sym.sect  = ai.index;
    sym.desc  = descBits(static_cast<const DefinedAtom*>(ai.atom));
    sym.value = _atomToAddress[ai.atom];
    _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size();
    file.globalSymbols.push_back(sym);
  }

  // Sort undefined symbol alphabetically, then add to symbol table.
  std::vector<AtomAndIndex> undefs;
  undefs.reserve(128);
  for (const UndefinedAtom *atom : atomFile.undefined()) {
    AtomAndIndex ai = { atom, 0, N_EXT };
    undefs.push_back(ai);
  }
  for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) {
    AtomAndIndex ai = { atom, 0, N_EXT };
    undefs.push_back(ai);
  }
  std::sort(undefs.begin(), undefs.end(), AtomSorter());
  const uint32_t start = file.globalSymbols.size() + file.localSymbols.size();
  for (AtomAndIndex &ai : undefs) {
    lld::mach_o::normalized::Symbol sym;
    uint16_t desc = 0;
    if (!rMode) {
      uint8_t ordinal = 0;
      if (!_ctx.useFlatNamespace())
        ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom));
      llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal);
    }
    sym.name  = ai.atom->name();
    sym.type  = N_UNDF;
    sym.scope = ai.scope;
    sym.sect  = 0;
    sym.desc  = desc;
    sym.value = 0;
    _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start;
    file.undefinedSymbols.push_back(sym);
  }

  return llvm::Error::success();
}

const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) {
  for (const Reference *ref : *lpAtom) {
    if (_archHandler.isLazyPointer(*ref)) {
      return ref->target();
    }
  }
  return nullptr;
}

const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) {
  for (const Reference *ref : *stubAtom) {
    if (const Atom *ta = ref->target()) {
      if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) {
        const Atom *target = targetOfLazyPointer(lpAtom);
        if (target)
          return target;
      }
    }
  }
  return nullptr;
}

void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) {
  for (SectionInfo *si : _sectionInfos) {
    Section &normSect = file.sections[si->normalizedSectionIndex];
    switch (si->type) {
    case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS:
      for (const AtomInfo &info : si->atomsAndOffsets) {
        bool foundTarget = false;
        for (const Reference *ref : *info.atom) {
          const Atom *target = ref->target();
          if (target) {
            if (isa<const SharedLibraryAtom>(target)) {
              uint32_t index = _atomToSymbolIndex[target];
              normSect.indirectSymbols.push_back(index);
              foundTarget = true;
            } else {
              normSect.indirectSymbols.push_back(
                                            llvm::MachO::INDIRECT_SYMBOL_LOCAL);
            }
          }
        }
        if (!foundTarget) {
          normSect.indirectSymbols.push_back(
                                             llvm::MachO::INDIRECT_SYMBOL_ABS);
        }
      }
      break;
    case llvm::MachO::S_LAZY_SYMBOL_POINTERS:
      for (const AtomInfo &info : si->atomsAndOffsets) {
        const Atom *target = targetOfLazyPointer(info.atom);
        if (target) {
          uint32_t index = _atomToSymbolIndex[target];
          normSect.indirectSymbols.push_back(index);
        }
      }
      break;
    case llvm::MachO::S_SYMBOL_STUBS:
      for (const AtomInfo &info : si->atomsAndOffsets) {
        const Atom *target = targetOfStub(info.atom);
        if (target) {
          uint32_t index = _atomToSymbolIndex[target];
          normSect.indirectSymbols.push_back(index);
        }
      }
      break;
    default:
      break;
    }
  }
}

void Util::addDependentDylibs(const lld::File &atomFile,
                              NormalizedFile &nFile) {
  // Scan all imported symbols and build up list of dylibs they are from.
  int ordinal = 1;
  for (const auto *dylib : _ctx.allDylibs()) {
    DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName());
    if (pos == _dylibInfo.end()) {
      DylibInfo info;
      bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile();

      // If we're in -flat_namespace mode (or this atom came from the flat
      // namespace file under -undefined dynamic_lookup) then use the flat
      // lookup ordinal.
      if (flatNamespaceAtom || _ctx.useFlatNamespace())
        info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
      else
        info.ordinal = ordinal++;
      info.hasWeak = false;
      info.hasNonWeak = !info.hasWeak;
      _dylibInfo[dylib->installName()] = info;

      // Unless this was a flat_namespace atom, record the source dylib.
      if (!flatNamespaceAtom) {
        DependentDylib depInfo;
        depInfo.path = dylib->installName();
        depInfo.kind = llvm::MachO::LC_LOAD_DYLIB;
        depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path());
        depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path());
        nFile.dependentDylibs.push_back(depInfo);
      }
    } else {
      pos->second.hasWeak = false;
      pos->second.hasNonWeak = !pos->second.hasWeak;
    }
  }
  // Automatically weak link dylib in which all symbols are weak (canBeNull).
  for (DependentDylib &dep : nFile.dependentDylibs) {
    DylibInfo &info = _dylibInfo[dep.path];
    if (info.hasWeak && !info.hasNonWeak)
      dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB;
    else if (_ctx.isUpwardDylib(dep.path))
      dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB;
  }
}

int Util::dylibOrdinal(const SharedLibraryAtom *sa) {
  return _dylibInfo[sa->loadName()].ordinal;
}

void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex,
                                                  uint64_t &segmentStartAddr) {
  segmentIndex = 0;
  for (const SegmentInfo *seg : _segmentInfos) {
    if ((seg->address <= sect->address)
      && (seg->address+seg->size >= sect->address+sect->size)) {
      segmentStartAddr = seg->address;
      return;
    }
    ++segmentIndex;
  }
  llvm_unreachable("section not in any segment");
}

uint32_t Util::sectionIndexForAtom(const Atom *atom) {
  uint64_t address = _atomToAddress[atom];
  for (const SectionInfo *si : _sectionInfos) {
    if ((si->address <= address) && (address < si->address+si->size))
      return si->finalSectionIndex;
  }
  llvm_unreachable("atom not in any section");
}

void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
  if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
    return;

  // Utility function for ArchHandler to find symbol index for an atom.
  auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t {
    auto pos = _atomToSymbolIndex.find(&atom);
    assert(pos != _atomToSymbolIndex.end());
    return pos->second;
  };

  // Utility function for ArchHandler to find section index for an atom.
  auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t {
    return sectionIndexForAtom(&atom);
  };

  // Utility function for ArchHandler to find address of atom in output file.
  auto addressForAtom = [&] (const Atom &atom) -> uint64_t {
    auto pos = _atomToAddress.find(&atom);
    assert(pos != _atomToAddress.end());
    return pos->second;
  };

  for (SectionInfo *si : _sectionInfos) {
    Section &normSect = file.sections[si->normalizedSectionIndex];
    for (const AtomInfo &info : si->atomsAndOffsets) {
      const DefinedAtom *atom = info.atom;
      for (const Reference *ref : *atom) {
        // Skip emitting relocs for sections which are always able to be
        // implicitly regenerated and where the relocation targets an address
        // which is defined.
        if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target()))
          continue;
        _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref,
                                              symIndexForAtom,
                                              sectIndexForAtom,
                                              addressForAtom,
                                              normSect.relocations);
      }
    }
  }
}

void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) {
  if (!_ctx.generateFunctionStartsLoadCommand())
    return;
  file.functionStarts.reserve(8192);
  // Delta compress function starts, starting with the mach header symbol.
  const uint64_t badAddress = ~0ULL;
  uint64_t addr = badAddress;
  for (SectionInfo *si : _sectionInfos) {
    for (const AtomInfo &info : si->atomsAndOffsets) {
      auto type = info.atom->contentType();
      if (type == DefinedAtom::typeMachHeader) {
        addr = _atomToAddress[info.atom];
        continue;
      }
      if (type != DefinedAtom::typeCode)
        continue;
      assert(addr != badAddress && "Missing mach header symbol");
      // Skip atoms which have 0 size.  This is so that LC_FUNCTION_STARTS
      // can't spill in to the next section.
      if (!info.atom->size())
        continue;
      uint64_t nextAddr = _atomToAddress[info.atom];
      if (_archHandler.isThumbFunction(*info.atom))
        nextAddr |= 1;
      uint64_t delta = nextAddr - addr;
      if (delta) {
        ByteBuffer buffer;
        buffer.append_uleb128(delta);
        file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(),
                                   buffer.bytes() + buffer.size());
      }
      addr = nextAddr;
    }
  }

  // Null terminate, and pad to pointer size for this arch.
  file.functionStarts.push_back(0);

  auto size = file.functionStarts.size();
  for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4);
       i != e; ++i)
    file.functionStarts.push_back(0);
}

void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
  if (!_ctx.generateDataInCodeLoadCommand())
    return;
  for (SectionInfo *si : _sectionInfos) {
    for (const AtomInfo &info : si->atomsAndOffsets) {
      // Atoms that contain data-in-code have "transition" references
      // which mark a point where the embedded data starts of ends.
      // This needs to be converted to the mach-o format which is an array
      // of data-in-code ranges.
      uint32_t startOffset = 0;
      DataRegionType mode = DataRegionType(0);
      for (const Reference *ref : *info.atom) {
        if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
          continue;
        if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
          DataRegionType nextMode = (DataRegionType)ref->addend();
          if (mode != nextMode) {
            if (mode != 0) {
              // Found end data range, so make range entry.
              DataInCode entry;
              entry.offset = si->address + info.offsetInSection + startOffset;
              entry.length = ref->offsetInAtom() - startOffset;
              entry.kind   = mode;
              file.dataInCode.push_back(entry);
            }
          }
          mode = nextMode;
          startOffset = ref->offsetInAtom();
        }
      }
      if (mode != 0) {
        // Function ends with data (no end transition).
        DataInCode entry;
        entry.offset = si->address + info.offsetInSection + startOffset;
        entry.length = info.atom->size() - startOffset;
        entry.kind   = mode;
        file.dataInCode.push_back(entry);
      }
    }
  }
}

void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
                                                        NormalizedFile &nFile) {
  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
    return;

  uint8_t segmentIndex;
  uint64_t segmentStartAddr;
  uint32_t offsetInBindInfo = 0;

  for (SectionInfo *sect : _sectionInfos) {
    segIndexForSection(sect, segmentIndex, segmentStartAddr);
    for (const AtomInfo &info : sect->atomsAndOffsets) {
      const DefinedAtom *atom = info.atom;
      for (const Reference *ref : *atom) {
        uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom()
                                - segmentStartAddr;
        const Atom* targ = ref->target();
        if (_archHandler.isPointer(*ref)) {
          // A pointer to a DefinedAtom requires rebasing.
          if (isa<DefinedAtom>(targ)) {
            RebaseLocation rebase;
            rebase.segIndex = segmentIndex;
            rebase.segOffset = segmentOffset;
            rebase.kind = llvm::MachO::REBASE_TYPE_POINTER;
            nFile.rebasingInfo.push_back(rebase);
          }
          // A pointer to an SharedLibraryAtom requires binding.
          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
            BindLocation bind;
            bind.segIndex = segmentIndex;
            bind.segOffset = segmentOffset;
            bind.kind = llvm::MachO::BIND_TYPE_POINTER;
            bind.canBeNull = sa->canBeNullAtRuntime();
            bind.ordinal = dylibOrdinal(sa);
            bind.symbolName = targ->name();
            bind.addend = ref->addend();
            nFile.bindingInfo.push_back(bind);
          }
        }
        else if (_archHandler.isLazyPointer(*ref)) {
          BindLocation bind;
          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
            bind.ordinal = dylibOrdinal(sa);
          } else {
            bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF;
          }
          bind.segIndex = segmentIndex;
          bind.segOffset = segmentOffset;
          bind.kind = llvm::MachO::BIND_TYPE_POINTER;
          bind.canBeNull = false; //sa->canBeNullAtRuntime();
          bind.symbolName = targ->name();
          bind.addend = ref->addend();
          nFile.lazyBindingInfo.push_back(bind);

          // Now that we know the segmentOffset and the ordinal attribute,
          // we can fix the helper's code

          fixLazyReferenceImm(atom, offsetInBindInfo, nFile);

          // 5 bytes for opcodes + variable sizes (target name + \0 and offset
          // encode's size)
          offsetInBindInfo +=
              6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset);
          if (bind.ordinal > BIND_IMMEDIATE_MASK)
            offsetInBindInfo += llvm::getULEB128Size(bind.ordinal);
        }
      }
    }
  }
}

void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
                               NormalizedFile &file) {
  for (const Reference *ref : *atom) {
    const DefinedAtom *da = dyn_cast<DefinedAtom>(ref->target());
    if (da == nullptr)
      return;

    const Reference *helperRef = nullptr;
    for (const Reference *hr : *da) {
      if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) {
        helperRef = hr;
        break;
      }
    }
    if (helperRef == nullptr)
      continue;

    // TODO: maybe get the fixed atom content from _archHandler ?
    for (SectionInfo *sectInfo : _sectionInfos) {
      for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) {
        if (atomInfo.atom == helperRef->target()) {
          auto sectionContent =
              file.sections[sectInfo->normalizedSectionIndex].content;
          uint8_t *rawb =
              file.ownedAllocations.Allocate<uint8_t>(sectionContent.size());
          llvm::MutableArrayRef<uint8_t> newContent{rawb,
                                                    sectionContent.size()};
          std::copy(sectionContent.begin(), sectionContent.end(),
                    newContent.begin());
          llvm::support::ulittle32_t *loc =
              reinterpret_cast<llvm::support::ulittle32_t *>(
                  &newContent[atomInfo.offsetInSection +
                              helperRef->offsetInAtom()]);
          *loc = offset;
          file.sections[sectInfo->normalizedSectionIndex].content = newContent;
        }
      }
    }
  }
}

void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) {
  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
    return;

  for (SectionInfo *sect : _sectionInfos) {
    for (const AtomInfo &info : sect->atomsAndOffsets) {
      const DefinedAtom *atom = info.atom;
      if (atom->scope() != Atom::scopeGlobal)
        continue;
      if (_ctx.exportRestrictMode()) {
        if (!_ctx.exportSymbolNamed(atom->name()))
          continue;
      }
      Export exprt;
      exprt.name = atom->name();
      exprt.offset = _atomToAddress[atom] - _ctx.baseAddress();
      exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
      if (atom->merge() == DefinedAtom::mergeAsWeak)
        exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
      else
        exprt.flags = 0;
      exprt.otherOffset = 0;
      exprt.otherName = StringRef();
      nFile.exportInfo.push_back(exprt);
    }
  }
}

uint32_t Util::fileFlags() {
  // FIXME: these need to determined at runtime.
  if (_ctx.outputMachOType() == MH_OBJECT) {
    return _subsectionsViaSymbols ? (uint32_t)MH_SUBSECTIONS_VIA_SYMBOLS : 0;
  } else {
    uint32_t flags = MH_DYLDLINK;
    if (!_ctx.useFlatNamespace())
        flags |= MH_TWOLEVEL | MH_NOUNDEFS;
    if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE())
      flags |= MH_PIE;
    if (_hasTLVDescriptors)
      flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS);
    return flags;
  }
}

} // end anonymous namespace

namespace lld {
namespace mach_o {
namespace normalized {

/// Convert a set of Atoms into a normalized mach-o file.
llvm::Expected<std::unique_ptr<NormalizedFile>>
normalizedFromAtoms(const lld::File &atomFile,
                                           const MachOLinkingContext &context) {
  // The util object buffers info until the normalized file can be made.
  Util util(context);
  util.processDefinedAtoms(atomFile);
  util.organizeSections();

  std::unique_ptr<NormalizedFile> f(new NormalizedFile());
  NormalizedFile &normFile = *f.get();
  normFile.arch = context.arch();
  normFile.fileType = context.outputMachOType();
  normFile.flags = util.fileFlags();
  normFile.stackSize = context.stackSize();
  normFile.installName = context.installName();
  normFile.currentVersion = context.currentVersion();
  normFile.compatVersion = context.compatibilityVersion();
  normFile.os = context.os();

  // If we are emitting an object file, then the min version is the maximum
  // of the min's of all the source files and the cmdline.
  if (normFile.fileType == llvm::MachO::MH_OBJECT)
    normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion());
  else
    normFile.minOSverson = context.osMinVersion();

  normFile.minOSVersionKind = util.minVersionCommandType();

  normFile.sdkVersion = context.sdkVersion();
  normFile.sourceVersion = context.sourceVersion();

  if (context.generateVersionLoadCommand() &&
      context.os() != MachOLinkingContext::OS::unknown)
    normFile.hasMinVersionLoadCommand = true;
  else if (normFile.fileType == llvm::MachO::MH_OBJECT &&
           util.allSourceFilesHaveMinVersions() &&
           ((normFile.os != MachOLinkingContext::OS::unknown) ||
            util.minVersionCommandType())) {
    // If we emit an object file, then it should contain a min version load
    // command if all of the source files also contained min version commands.
    // Also, we either need to have a platform, or found a platform from the
    // source object files.
    normFile.hasMinVersionLoadCommand = true;
  }
  normFile.generateDataInCodeLoadCommand =
    context.generateDataInCodeLoadCommand();
  normFile.pageSize = context.pageSize();
  normFile.rpaths = context.rpaths();
  util.addDependentDylibs(atomFile, normFile);
  util.copySegmentInfo(normFile);
  util.copySectionInfo(normFile);
  util.assignAddressesToSections(normFile);
  util.buildAtomToAddressMap();
  if (auto err = util.synthesizeDebugNotes(normFile))
    return std::move(err);
  util.updateSectionInfo(normFile);
  util.copySectionContent(normFile);
  if (auto ec = util.addSymbols(atomFile, normFile)) {
    return std::move(ec);
  }
  util.addIndirectSymbols(atomFile, normFile);
  util.addRebaseAndBindingInfo(atomFile, normFile);
  util.addExportInfo(atomFile, normFile);
  util.addSectionRelocs(atomFile, normFile);
  util.addFunctionStarts(atomFile, normFile);
  util.buildDataInCodeArray(atomFile, normFile);
  util.copyEntryPointAddress(normFile);

  return std::move(f);
}

} // namespace normalized
} // namespace mach_o
} // namespace lld