llvm-project/lld/MachO/Config.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

204 lines
6.0 KiB
C
Raw Normal View History

//===- Config.h -------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_CONFIG_H
#define LLD_MACHO_CONFIG_H
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/Platform.h"
#include "llvm/TextAPI/Target.h"
#include <vector>
namespace lld {
namespace macho {
class Symbol;
struct SymbolPriorityEntry;
using NamePair = std::pair<llvm::StringRef, llvm::StringRef>;
using SectionRenameMap = llvm::DenseMap<NamePair, NamePair>;
using SegmentRenameMap = llvm::DenseMap<llvm::StringRef, llvm::StringRef>;
struct PlatformInfo {
llvm::MachO::Target target;
llvm::VersionTuple minimum;
llvm::VersionTuple sdk;
};
inline uint32_t encodeVersion(const llvm::VersionTuple &version) {
return ((version.getMajor() << 020) |
(version.getMinor().getValueOr(0) << 010) |
version.getSubminor().getValueOr(0));
}
enum class NamespaceKind {
twolevel,
flat,
};
enum class UndefinedSymbolTreatment {
unknown,
error,
warning,
suppress,
dynamic_lookup,
};
enum class ICFLevel {
unknown,
none,
safe,
all,
};
struct SectionAlign {
llvm::StringRef segName;
llvm::StringRef sectName;
uint32_t align;
};
struct SegmentProtection {
llvm::StringRef name;
uint32_t maxProt;
uint32_t initProt;
};
class SymbolPatterns {
public:
// GlobPattern can also match literals,
// but we prefer the O(1) lookup of DenseSet.
llvm::DenseSet<llvm::CachedHashStringRef> literals;
std::vector<llvm::GlobPattern> globs;
bool empty() const { return literals.empty() && globs.empty(); }
void clear();
void insert(llvm::StringRef symbolName);
bool matchLiteral(llvm::StringRef symbolName) const;
bool matchGlob(llvm::StringRef symbolName) const;
bool match(llvm::StringRef symbolName) const;
};
struct Configuration {
Symbol *entry = nullptr;
bool hasReexports = false;
bool allLoad = false;
bool applicationExtension = false;
bool archMultiple = false;
bool exportDynamic = false;
bool forceLoadObjC = false;
bool forceLoadSwift = false;
bool staticLink = false;
bool implicitDylibs = false;
bool isPic = false;
bool headerPadMaxInstallNames = false;
bool ltoNewPassManager = LLVM_ENABLE_NEW_PASS_MANAGER;
bool markDeadStrippableDylib = false;
bool printDylibSearch = false;
bool printEachFile = false;
bool printWhyLoad = false;
bool searchDylibsFirst = false;
bool saveTemps = false;
bool adhocCodesign = false;
bool emitFunctionStarts = false;
bool emitBitcodeBundle = false;
bool emitDataInCodeInfo = false;
bool emitEncryptionInfo = false;
bool timeTraceEnabled = false;
bool dataConst = false;
[lld-macho] Implement cstring deduplication Our implementation draws heavily from LLD-ELF's, which in turn delegates its string deduplication to llvm-mc's StringTableBuilder. The messiness of this diff is largely due to the fact that we've previously assumed that all InputSections get concatenated together to form the output. This is no longer true with CStringInputSections, which split their contents into StringPieces. StringPieces are much more lightweight than InputSections, which is important as we create a lot of them. They may also overlap in the output, which makes it possible for strings to be tail-merged. In fact, the initial version of this diff implemented tail merging, but I've dropped it for reasons I'll explain later. **Alignment Issues** Mergeable cstring literals are found under the `__TEXT,__cstring` section. In contrast to ELF, which puts strings that need different alignments into different sections, clang's Mach-O backend puts them all in one section. Strings that need to be aligned have the `.p2align` directive emitted before them, which simply translates into zero padding in the object file. I *think* ld64 extracts the desired per-string alignment from this data by preserving each string's offset from the last section-aligned address. I'm not entirely certain since it doesn't seem consistent about doing this; but perhaps this can be chalked up to cases where ld64 has to deduplicate strings with different offset/alignment combos -- it seems to pick one of their alignments to preserve. This doesn't seem correct in general; we can in fact can induce ld64 to produce a crashing binary just by linking in an additional object file that only contains cstrings and no code. See PR50563 for details. Moreover, this scheme seems rather inefficient: since unaligned and aligned strings are all put in the same section, which has a single alignment value, it doesn't seem possible to tell whether a given string doesn't have any alignment requirements. Preserving offset+alignments for strings that don't need it is wasteful. In practice, the crashes seen so far seem to stem from x86_64 SIMD operations on cstrings. X86_64 requires SIMD accesses to be 16-byte-aligned. So for now, I'm thinking of just aligning all strings to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise it's simpler than preserving per-string alignment+offsets. It also avoids the aforementioned crash after deduplication of differently-aligned strings. Finally, the overhead is not huge: using 16-byte alignment (vs no alignment) is only a 0.5% size overhead when linking chromium_framework. With these alignment requirements, it doesn't make sense to attempt tail merging -- most strings will not be eligible since their overlaps aren't likely to start at a 16-byte boundary. Tail-merging (with alignment) for chromium_framework only improves size by 0.3%. It's worth noting that LLD-ELF only does tail merging at `-O2`. By default (at `-O1`), it just deduplicates w/o tail merging. @thakis has also mentioned that they saw it regress compressed size in some cases and therefore turned it off. `ld64` does not seem to do tail merging at all. **Performance Numbers** CString deduplication reduces chromium_framework from 250MB to 242MB, or about a 3.2% reduction. Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W: N Min Max Median Avg Stddev x 20 3.91 4.03 3.935 3.95 0.034641016 + 20 3.99 4.14 4.015 4.0365 0.0492336 Difference at 95.0% confidence 0.0865 +/- 0.027245 2.18987% +/- 0.689746% (Student's t, pooled s = 0.0425673) As expected, cstring merging incurs some non-trivial overhead. When passing `--no-literal-merge`, it seems that performance is the same, i.e. the refactoring in this diff didn't cost us. N Min Max Median Avg Stddev x 20 3.91 4.03 3.935 3.95 0.034641016 + 20 3.89 4.02 3.935 3.9435 0.043197831 No difference proven at 95.0% confidence Reviewed By: #lld-macho, gkm Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
bool dedupLiterals = true;
uint32_t headerPad;
uint32_t dylibCompatibilityVersion = 0;
uint32_t dylibCurrentVersion = 0;
uint32_t timeTraceGranularity = 500;
unsigned optimize;
std::string progName;
// For `clang -arch arm64 -arch x86_64`, clang will:
// 1. invoke the linker twice, to write one temporary output per arch
// 2. invoke `lipo` to merge the two outputs into a single file
// `outputFile` is the name of the temporary file the linker writes to.
// `finalOutput `is the name of the file lipo writes to after the link.
llvm::StringRef outputFile;
llvm::StringRef finalOutput;
llvm::StringRef installName;
llvm::StringRef mapFile;
llvm::StringRef ltoObjPath;
llvm::StringRef thinLTOJobs;
llvm::StringRef umbrella;
uint32_t ltoo = 2;
llvm::CachePruningPolicy thinLTOCachePolicy;
llvm::StringRef thinLTOCacheDir;
bool deadStripDylibs = false;
bool demangle = false;
[lld/mac] Implement -dead_strip Also adds support for live_support sections, no_dead_strip sections, .no_dead_strip symbols. Chromium Framework 345MB unstripped -> 250MB stripped (vs 290MB unstripped -> 236M stripped with ld64). Doing dead stripping is a bit faster than not, because so much less data needs to be processed: % ministat lld_* x lld_nostrip.txt + lld_strip.txt N Min Max Median Avg Stddev x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794 + 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651 Difference at 95.0% confidence -0.144711 +/- 0.0336749 -3.60967% +/- 0.839989% (Student's t, pooled s = 0.0358398) This interacts with many parts of the linker. I tried to add test coverage for all added `isLive()` checks, so that some test will fail if any of them is removed. I checked that the test expectations for the most part match ld64's behavior (except for live-support-iterations.s, see the comment in the test). Interacts with: - debug info - export tries - import opcodes - flags like -exported_symbol(s_list) - -U / dynamic_lookup - mod_init_funcs, mod_term_funcs - weak symbol handling - unwind info - stubs - map files - -sectcreate - undefined, dylib, common, defined (both absolute and normal) symbols It's possible it interacts with more features I didn't think of, of course. I also did some manual testing: - check-llvm check-clang check-lld work with lld with this patch as host linker and -dead_strip enabled - Chromium still starts - Chromium's base_unittests still pass, including unwind tests Implemenation-wise, this is InputSection-based, so it'll work for object files with .subsections_via_symbols (which includes all object files generated by clang). I first based this on the COFF implementation, but later realized that things are more similar to ELF. I think it'd be good to refactor MarkLive.cpp to look more like the ELF part at some point, but I'd like to get a working state checked in first. Mechanical parts: - Rename canOmitFromOutput to wasCoalesced (no behavior change) since it really is for weak coalesced symbols - Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP (`.no_dead_strip` in asm) Fixes PR49276. Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
bool deadStrip = false;
PlatformInfo platformInfo;
NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment =
UndefinedSymbolTreatment::error;
ICFLevel icfLevel = ICFLevel::none;
llvm::MachO::HeaderFileType outputType;
std::vector<llvm::StringRef> systemLibraryRoots;
std::vector<llvm::StringRef> librarySearchPaths;
std::vector<llvm::StringRef> frameworkSearchPaths;
std::vector<llvm::StringRef> runtimePaths;
std::vector<std::string> astPaths;
std::vector<Symbol *> explicitUndefineds;
llvm::StringSet<> explicitDynamicLookups;
// There are typically few custom sectionAlignments or segmentProtections,
// so use a vector instead of a map.
std::vector<SectionAlign> sectionAlignments;
std::vector<SegmentProtection> segmentProtections;
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
SectionRenameMap sectionRenameMap;
SegmentRenameMap segmentRenameMap;
SymbolPatterns exportedSymbols;
SymbolPatterns unexportedSymbols;
bool zeroModTime = false;
llvm::MachO::Architecture arch() const { return platformInfo.target.Arch; }
llvm::MachO::PlatformKind platform() const {
return platformInfo.target.Platform;
}
};
// The symbol with the highest priority should be ordered first in the output
// section (modulo input section contiguity constraints). Using priority
// (highest first) instead of order (lowest first) has the convenient property
// that the default-constructed zero priority -- for symbols/sections without a
// user-defined order -- naturally ends up putting them at the end of the
// output.
struct SymbolPriorityEntry {
// The priority given to a matching symbol, regardless of which object file
// it originated from.
size_t anyObjectFile = 0;
// The priority given to a matching symbol from a particular object file.
llvm::DenseMap<llvm::StringRef, size_t> objectFiles;
};
extern Configuration *config;
} // namespace macho
} // namespace lld
#endif