2015-08-14 22:12:54 +08:00
|
|
|
//===- Chunks.h -------------------------------------------------*- C++ -*-===//
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_COFF_CHUNKS_H
|
|
|
|
#define LLD_COFF_CHUNKS_H
|
|
|
|
|
2015-09-17 05:40:47 +08:00
|
|
|
#include "Config.h"
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
#include "InputFiles.h"
|
2017-10-03 05:00:41 +08:00
|
|
|
#include "lld/Common/LLVM.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2019-05-25 04:25:40 +08:00
|
|
|
#include "llvm/ADT/PointerIntPair.h"
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
#include "llvm/ADT/iterator.h"
|
2015-06-25 08:33:38 +08:00
|
|
|
#include "llvm/ADT/iterator_range.h"
|
2018-03-16 05:14:02 +08:00
|
|
|
#include "llvm/MC/StringTableBuilder.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/Object/COFF.h"
|
2016-06-04 00:57:13 +08:00
|
|
|
#include <utility>
|
2015-05-29 03:09:30 +08:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
|
|
|
|
|
|
|
using llvm::COFF::ImportDirectoryTableEntry;
|
|
|
|
using llvm::object::COFFSymbolRef;
|
|
|
|
using llvm::object::SectionRef;
|
|
|
|
using llvm::object::coff_relocation;
|
|
|
|
using llvm::object::coff_section;
|
|
|
|
|
2015-07-25 09:44:32 +08:00
|
|
|
class Baserel;
|
2015-05-29 03:09:30 +08:00
|
|
|
class Defined;
|
|
|
|
class DefinedImportData;
|
2015-07-25 09:44:32 +08:00
|
|
|
class DefinedRegular;
|
2017-07-27 07:05:24 +08:00
|
|
|
class ObjFile;
|
2015-05-29 03:09:30 +08:00
|
|
|
class OutputSection;
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
class RuntimePseudoReloc;
|
2017-11-04 05:21:47 +08:00
|
|
|
class Symbol;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2018-04-21 05:23:16 +08:00
|
|
|
// Mask for permissions (discardable, writable, readable, executable, etc).
|
|
|
|
const uint32_t permMask = 0xFE000000;
|
|
|
|
|
|
|
|
// Mask for section types (code, data, bss).
|
|
|
|
const uint32_t typeMask = 0x000000E0;
|
2015-08-06 03:51:28 +08:00
|
|
|
|
2019-05-23 04:21:52 +08:00
|
|
|
// The log base 2 of the largest section alignment, which is log2(8192), or 13.
|
|
|
|
enum : unsigned { Log2MaxSectionAlignment = 13 };
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// A Chunk represents a chunk of data that will occupy space in the
|
|
|
|
// output (if the resolver chose that). It may or may not be backed by
|
|
|
|
// a section of an input file. It could be linker-created data, or
|
|
|
|
// doesn't even have actual data (if common or bss).
|
|
|
|
class Chunk {
|
|
|
|
public:
|
2019-05-29 01:38:04 +08:00
|
|
|
enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
|
2015-06-26 03:10:58 +08:00
|
|
|
Kind kind() const { return chunkKind; }
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Returns the size of this chunk (even if this is a common or BSS.)
|
2019-05-25 04:25:40 +08:00
|
|
|
size_t getSize() const;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2019-05-23 04:21:52 +08:00
|
|
|
// Returns chunk alignment in power of two form. Value values are powers of
|
|
|
|
// two from 1 to 8192.
|
|
|
|
uint32_t getAlignment() const { return 1U << p2Align; }
|
2019-05-25 04:25:40 +08:00
|
|
|
|
|
|
|
// Update the chunk section alignment measured in bytes. Internally alignment
|
|
|
|
// is stored in log2.
|
2019-05-23 04:21:52 +08:00
|
|
|
void setAlignment(uint32_t align) {
|
|
|
|
// Treat zero byte alignment as 1 byte alignment.
|
|
|
|
align = align ? align : 1;
|
|
|
|
assert(llvm::isPowerOf2_32(align) && "alignment is not a power of 2");
|
|
|
|
p2Align = llvm::Log2_32(align);
|
|
|
|
assert(p2Align <= Log2MaxSectionAlignment &&
|
|
|
|
"impossible requested alignment");
|
|
|
|
}
|
|
|
|
|
2015-06-06 12:07:39 +08:00
|
|
|
// Write this chunk to a mmap'ed file, assuming Buf is pointing to
|
|
|
|
// beginning of the file. Because this function may use RVA values
|
|
|
|
// of other chunks for relocations, you need to set them properly
|
|
|
|
// before calling this function.
|
2019-05-25 04:25:40 +08:00
|
|
|
void writeTo(uint8_t *buf) const;
|
2015-05-29 03:45:43 +08:00
|
|
|
|
2019-05-08 04:30:41 +08:00
|
|
|
// The writer sets and uses the addresses. In practice, PE images cannot be
|
|
|
|
// larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
|
|
|
|
// can be stored with 32 bits.
|
|
|
|
uint32_t getRVA() const { return rva; }
|
|
|
|
void setRVA(uint64_t v) {
|
|
|
|
rva = (uint32_t)v;
|
|
|
|
assert(rva == v && "RVA truncated");
|
|
|
|
}
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Returns readable/writable/executable bits.
|
2019-05-25 04:25:40 +08:00
|
|
|
uint32_t getOutputCharacteristics() const;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Returns the section name if this is a section chunk.
|
|
|
|
// It is illegal to call this function on non-section chunks.
|
2019-05-25 04:25:40 +08:00
|
|
|
StringRef getSectionName() const;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// An output section has pointers to chunks in the section, and each
|
|
|
|
// chunk has a back pointer to an output section.
|
2019-05-25 02:25:49 +08:00
|
|
|
void setOutputSectionIdx(uint16_t o) { osidx = o; }
|
|
|
|
uint16_t getOutputSectionIdx() const { return osidx; }
|
|
|
|
OutputSection *getOutputSection() const;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2015-06-15 09:23:58 +08:00
|
|
|
// Windows-specific.
|
|
|
|
// Collect all locations that contain absolute addresses for base relocations.
|
2019-05-25 04:25:40 +08:00
|
|
|
void getBaserels(std::vector<Baserel> *res);
|
2015-06-15 09:23:58 +08:00
|
|
|
|
2015-06-24 08:00:52 +08:00
|
|
|
// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
|
|
|
|
// bytes, so this is used only for logging or debugging.
|
2019-05-25 04:25:40 +08:00
|
|
|
StringRef getDebugName() const;
|
2015-06-24 08:00:52 +08:00
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
// Return true if this file has the hotpatch flag set to true in the
|
|
|
|
// S_COMPILE3 record in codeview debug info. Also returns true for some thunks
|
|
|
|
// synthesized by the linker.
|
|
|
|
bool isHotPatchable() const;
|
2019-02-23 09:46:18 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
protected:
|
2019-05-25 04:25:40 +08:00
|
|
|
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
|
2015-06-26 03:10:58 +08:00
|
|
|
|
2019-05-24 04:26:41 +08:00
|
|
|
const Kind chunkKind;
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
|
2019-07-10 17:10:01 +08:00
|
|
|
public:
|
|
|
|
// Returns true if this has non-zero data. BSS chunks return
|
|
|
|
// false. If false is returned, the space occupied by this chunk
|
|
|
|
// will be filled with zeros. Corresponds to the
|
2019-05-25 04:25:40 +08:00
|
|
|
// IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
|
|
|
|
uint8_t hasData : 1;
|
|
|
|
|
2019-07-10 17:10:01 +08:00
|
|
|
public:
|
2019-05-23 04:21:52 +08:00
|
|
|
// The alignment of this chunk, stored in log2 form. The writer uses the
|
|
|
|
// value.
|
2019-05-25 04:25:40 +08:00
|
|
|
uint8_t p2Align : 7;
|
2019-05-23 04:21:52 +08:00
|
|
|
|
2019-05-25 02:25:49 +08:00
|
|
|
// The output section index for this chunk. The first valid section number is
|
|
|
|
// one.
|
|
|
|
uint16_t osidx = 0;
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// The RVA of this chunk in the output. The writer sets a value.
|
2019-05-08 04:30:41 +08:00
|
|
|
uint32_t rva = 0;
|
2015-05-29 03:09:30 +08:00
|
|
|
};
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
class NonSectionChunk : public Chunk {
|
|
|
|
public:
|
|
|
|
virtual ~NonSectionChunk() = default;
|
|
|
|
|
|
|
|
// Returns the size of this chunk (even if this is a common or BSS.)
|
|
|
|
virtual size_t getSize() const = 0;
|
|
|
|
|
|
|
|
virtual uint32_t getOutputCharacteristics() const { return 0; }
|
|
|
|
|
|
|
|
// Write this chunk to a mmap'ed file, assuming Buf is pointing to
|
|
|
|
// beginning of the file. Because this function may use RVA values
|
|
|
|
// of other chunks for relocations, you need to set them properly
|
|
|
|
// before calling this function.
|
|
|
|
virtual void writeTo(uint8_t *buf) const {}
|
|
|
|
|
|
|
|
// Returns the section name if this is a section chunk.
|
|
|
|
// It is illegal to call this function on non-section chunks.
|
|
|
|
virtual StringRef getSectionName() const {
|
|
|
|
llvm_unreachable("unimplemented getSectionName");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Windows-specific.
|
|
|
|
// Collect all locations that contain absolute addresses for base relocations.
|
|
|
|
virtual void getBaserels(std::vector<Baserel> *res) {}
|
|
|
|
|
|
|
|
// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
|
|
|
|
// bytes, so this is used only for logging or debugging.
|
|
|
|
virtual StringRef getDebugName() const { return ""; }
|
|
|
|
|
2019-05-29 01:38:04 +08:00
|
|
|
static bool classof(const Chunk *c) { return c->kind() != SectionKind; }
|
2019-05-25 04:25:40 +08:00
|
|
|
|
|
|
|
protected:
|
2019-05-29 01:38:04 +08:00
|
|
|
NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
|
2019-05-25 04:25:40 +08:00
|
|
|
};
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// A chunk corresponding a section of an input file.
|
2017-07-14 04:29:59 +08:00
|
|
|
class SectionChunk final : public Chunk {
|
2015-09-16 22:19:10 +08:00
|
|
|
// Identical COMDAT Folding feature accesses section internal data.
|
|
|
|
friend class ICF;
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
public:
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
class symbol_iterator : public llvm::iterator_adaptor_base<
|
|
|
|
symbol_iterator, const coff_relocation *,
|
2017-11-04 05:21:47 +08:00
|
|
|
std::random_access_iterator_tag, Symbol *> {
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
friend SectionChunk;
|
|
|
|
|
2017-07-27 07:05:24 +08:00
|
|
|
ObjFile *file;
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
|
2017-07-27 07:05:24 +08:00
|
|
|
symbol_iterator(ObjFile *file, const coff_relocation *i)
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
: symbol_iterator::iterator_adaptor_base(i), file(file) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
symbol_iterator() = default;
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
};
|
|
|
|
|
2017-07-27 07:05:24 +08:00
|
|
|
SectionChunk(ObjFile *file, const coff_section *header);
|
2015-06-26 03:10:58 +08:00
|
|
|
static bool classof(const Chunk *c) { return c->kind() == SectionKind; }
|
2019-05-25 04:25:40 +08:00
|
|
|
size_t getSize() const { return header->SizeOfRawData; }
|
2016-03-15 17:48:27 +08:00
|
|
|
ArrayRef<uint8_t> getContents() const;
|
2019-05-25 04:25:40 +08:00
|
|
|
void writeTo(uint8_t *buf) const;
|
|
|
|
|
|
|
|
uint32_t getOutputCharacteristics() const {
|
|
|
|
return header->Characteristics & (permMask | typeMask);
|
|
|
|
}
|
|
|
|
StringRef getSectionName() const {
|
2019-05-04 04:17:14 +08:00
|
|
|
return StringRef(sectionNameData, sectionNameSize);
|
|
|
|
}
|
2019-05-25 04:25:40 +08:00
|
|
|
void getBaserels(std::vector<Baserel> *res);
|
2015-06-26 03:10:58 +08:00
|
|
|
bool isCOMDAT() const;
|
[COFF] Allow debug info to relocate against discarded symbols
Summary:
In order to do this without switching on the symbol kind multiple times,
I created Defined::getChunkAndOffset and use that instead of
SymbolBody::getRVA in the inner relocation loop.
Now we get the symbol's chunk before switching over relocation types, so
we can test if it has been discarded outside the inner relocation type
switch. This also simplifies application of section relative
relocations. Previously we would switch on symbol kind to compute the
RVA, then the relocation type, and then the symbol kind again to get the
output section so we could subtract that from the symbol RVA. Now we
*always* have an OutputSection, so applying SECREL and SECTION
relocations isn't as much of a special case.
I'm still not quite happy with the cleanliness of this code. I'm not
sure what offsets and bases we should be using during the relocation
processing loop: VA, RVA, or OutputSectionOffset.
Reviewers: ruiu, pcc
Reviewed By: ruiu
Subscribers: majnemer, inglorion, llvm-commits, aprantl
Differential Revision: https://reviews.llvm.org/D34650
llvm-svn: 306566
2017-06-29 01:06:35 +08:00
|
|
|
void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
|
|
|
|
uint64_t p) const;
|
|
|
|
void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
|
|
|
|
uint64_t p) const;
|
|
|
|
void applyRelARM(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
|
|
|
|
uint64_t p) const;
|
2017-07-11 15:22:44 +08:00
|
|
|
void applyRelARM64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
|
|
|
|
uint64_t p) const;
|
2015-06-26 03:10:58 +08:00
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &res);
|
|
|
|
|
2015-06-26 03:10:58 +08:00
|
|
|
// Called if the garbage collector decides to not include this chunk
|
|
|
|
// in a final output. It's supposed to print out a log message to stdout.
|
|
|
|
void printDiscardedMessage() const;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Adds COMDAT associative sections to this COMDAT section. A chunk
|
|
|
|
// and its children are treated as a group by the garbage collector.
|
|
|
|
void addAssociative(SectionChunk *child);
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
StringRef getDebugName() const;
|
2015-06-24 08:00:52 +08:00
|
|
|
|
2017-06-21 01:14:09 +08:00
|
|
|
// True if this is a codeview debug info chunk. These will not be laid out in
|
|
|
|
// the image. Instead they will end up in the PDB, if one is requested.
|
|
|
|
bool isCodeView() const {
|
2019-05-04 04:17:14 +08:00
|
|
|
return getSectionName() == ".debug" || getSectionName().startswith(".debug$");
|
2017-06-21 01:14:09 +08:00
|
|
|
}
|
|
|
|
|
2017-10-10 14:05:29 +08:00
|
|
|
// True if this is a DWARF debug info or exception handling chunk.
|
|
|
|
bool isDWARF() const {
|
2019-05-04 04:17:14 +08:00
|
|
|
return getSectionName().startswith(".debug_") || getSectionName() == ".eh_frame";
|
2017-10-10 14:05:29 +08:00
|
|
|
}
|
2017-07-18 23:11:05 +08:00
|
|
|
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
// Allow iteration over the bodies of this chunk's relocated symbols.
|
|
|
|
llvm::iterator_range<symbol_iterator> symbols() const {
|
2019-05-04 04:17:14 +08:00
|
|
|
return llvm::make_range(symbol_iterator(file, relocsData),
|
|
|
|
symbol_iterator(file, relocsData + relocsSize));
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<coff_relocation> getRelocs() const {
|
|
|
|
return llvm::makeArrayRef(relocsData, relocsSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reloc setter used by ARM range extension thunk insertion.
|
|
|
|
void setRelocs(ArrayRef<coff_relocation> newRelocs) {
|
|
|
|
relocsData = newRelocs.data();
|
|
|
|
relocsSize = newRelocs.size();
|
|
|
|
assert(relocsSize == newRelocs.size() && "reloc size truncation");
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
}
|
|
|
|
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
// Single linked list iterator for associated comdat children.
|
|
|
|
class AssociatedIterator
|
|
|
|
: public llvm::iterator_facade_base<
|
|
|
|
AssociatedIterator, std::forward_iterator_tag, SectionChunk> {
|
|
|
|
public:
|
|
|
|
AssociatedIterator() = default;
|
2019-11-26 04:07:57 +08:00
|
|
|
AssociatedIterator(const AssociatedIterator&) = default;
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
AssociatedIterator(SectionChunk *head) : cur(head) {}
|
|
|
|
AssociatedIterator &operator=(const AssociatedIterator &r) {
|
|
|
|
cur = r.cur;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
bool operator==(const AssociatedIterator &r) const { return cur == r.cur; }
|
|
|
|
const SectionChunk &operator*() const { return *cur; }
|
|
|
|
SectionChunk &operator*() { return *cur; }
|
|
|
|
AssociatedIterator &operator++() {
|
|
|
|
cur = cur->assocChildren;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
SectionChunk *cur = nullptr;
|
|
|
|
};
|
|
|
|
|
[opt] Replace the recursive walk for GC with a worklist algorithm.
This flattens the entire liveness walk from a recursive mark approach to
a worklist approach. It also sinks the worklist management completely
out of the SectionChunk and into the Writer by exposing the ability to
iterato over children of a chunk and over the symbol bodies of relocated
symbols. I'm not 100% happy with the API names, so suggestions welcome
there.
This allows us to use a single worklist for the entire recursive walk
and would also be a natural place to take advantage of parallelism at
some future point.
With this, we completely inline away the GC walk into the
Writer::markLive function and it makes it very easy to profile what is
slow. Currently, time is being wasted checking whether a Chunk isa
SectionChunk (it essentially always is), finding (or skipping)
a replacement for a symbol, and chasing pointers between symbols and
their chunks. There are a bunch of things we can do to fix this, and its
easier to do them after this change IMO.
This change alone saves 1-2% of the time for my self-link of lld.exe
(which I'm running and benchmarking on Linux ironically).
Perhaps more notably, we'll no longer blow out the stack for large
links. =]
Just as an FYI, at this point, I/O is starting to really dominate the
profile. Well over 10% of the time appears to be inside the kernel doing
page table silliness. I think a decent chunk of this can be nuked as
well, but it's a little odd as cross-linking in this way isn't really
the primary goal here.
Differential Revision: http://reviews.llvm.org/D10790
llvm-svn: 240995
2015-06-30 05:12:49 +08:00
|
|
|
// Allow iteration over the associated child chunks for this section.
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
llvm::iterator_range<AssociatedIterator> children() const {
|
|
|
|
return llvm::make_range(AssociatedIterator(assocChildren),
|
|
|
|
AssociatedIterator(nullptr));
|
|
|
|
}
|
2015-06-24 12:36:52 +08:00
|
|
|
|
2018-10-05 20:56:46 +08:00
|
|
|
// The section ID this chunk belongs to in its Obj.
|
|
|
|
uint32_t getSectionNumber() const;
|
|
|
|
|
2019-02-23 09:46:18 +08:00
|
|
|
ArrayRef<uint8_t> consumeDebugMagic();
|
|
|
|
|
|
|
|
static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> data,
|
|
|
|
StringRef sectionName);
|
|
|
|
|
|
|
|
static SectionChunk *findByName(ArrayRef<SectionChunk *> sections,
|
|
|
|
StringRef name);
|
|
|
|
|
2017-01-14 11:14:46 +08:00
|
|
|
// The file that this chunk was created from.
|
2017-07-27 07:05:24 +08:00
|
|
|
ObjFile *file;
|
2015-06-26 06:00:42 +08:00
|
|
|
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
// Pointer to the COFF section header in the input file.
|
|
|
|
const coff_section *header;
|
|
|
|
|
2017-11-28 09:30:07 +08:00
|
|
|
// The COMDAT leader symbol if this is a COMDAT chunk.
|
|
|
|
DefinedRegular *sym = nullptr;
|
|
|
|
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
// The CRC of the contents as described in the COFF spec 4.5.5.
|
|
|
|
// Auxiliary Format 5: Section Definitions. Used for ICF.
|
|
|
|
uint32_t checksum = 0;
|
|
|
|
|
2018-08-31 15:45:20 +08:00
|
|
|
// Used by the garbage collector.
|
|
|
|
bool live;
|
|
|
|
|
2019-05-24 04:26:41 +08:00
|
|
|
// Whether this section needs to be kept distinct from other sections during
|
|
|
|
// ICF. This is set by the driver using address-significance tables.
|
|
|
|
bool keepUnique = false;
|
|
|
|
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
// The COMDAT selection if this is a COMDAT chunk.
|
|
|
|
llvm::COFF::COMDATType selection = (llvm::COFF::COMDATType)0;
|
|
|
|
|
|
|
|
// A pointer pointing to a replacement for this chunk.
|
|
|
|
// Initially it points to "this" object. If this chunk is merged
|
|
|
|
// with other chunk by ICF, it points to another chunk,
|
|
|
|
// and this chunk is considered as dead.
|
|
|
|
SectionChunk *repl;
|
|
|
|
|
2017-01-14 11:14:46 +08:00
|
|
|
private:
|
[COFF] Reduce the size of Chunk and SectionChunk, NFC
Summary:
Reorder the fields in both to use padding more efficiently, and add more
comments on the purpose of the fields.
Replace `std::vector<SectionChunk*> AssociativeChildren` with a
singly-linked list. This avoids the separate vector allocation to list
associative children, and shrinks the 3 pointers used for the typically
empty vector down to 1.
In the end, this reduces the sum of heap allocations used to link
browser_tests.exe with NO PDB by 13.10%, going from 2,248,728 KB to
1,954,071 KB of heap. These numbers exclude memory mapped files, which
are of course a significant factor in LLD's memory usage.
Reviewers: ruiu, mstorsjo, aganea
Subscribers: jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59797
llvm-svn: 357535
2019-04-03 06:11:58 +08:00
|
|
|
SectionChunk *assocChildren = nullptr;
|
2015-06-24 08:00:52 +08:00
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
// Used for ICF (Identical COMDAT Folding)
|
2015-09-22 03:36:51 +08:00
|
|
|
void replace(SectionChunk *other);
|
2017-05-06 07:52:24 +08:00
|
|
|
uint32_t eqClass[2] = {0, 0};
|
2019-05-04 04:17:14 +08:00
|
|
|
|
|
|
|
// Relocations for this section. Size is stored below.
|
|
|
|
const coff_relocation *relocsData;
|
|
|
|
|
|
|
|
// Section name string. Size is stored below.
|
|
|
|
const char *sectionNameData;
|
|
|
|
|
|
|
|
uint32_t relocsSize = 0;
|
|
|
|
uint32_t sectionNameSize = 0;
|
2015-05-29 03:09:30 +08:00
|
|
|
};
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
// Inline methods to implement faux-virtual dispatch for SectionChunk.
|
|
|
|
|
|
|
|
inline size_t Chunk::getSize() const {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
return static_cast<const SectionChunk *>(this)->getSize();
|
|
|
|
else
|
|
|
|
return static_cast<const NonSectionChunk *>(this)->getSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline uint32_t Chunk::getOutputCharacteristics() const {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
return static_cast<const SectionChunk *>(this)->getOutputCharacteristics();
|
|
|
|
else
|
|
|
|
return static_cast<const NonSectionChunk *>(this)
|
|
|
|
->getOutputCharacteristics();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void Chunk::writeTo(uint8_t *buf) const {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
static_cast<const SectionChunk *>(this)->writeTo(buf);
|
|
|
|
else
|
|
|
|
static_cast<const NonSectionChunk *>(this)->writeTo(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline StringRef Chunk::getSectionName() const {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
return static_cast<const SectionChunk *>(this)->getSectionName();
|
|
|
|
else
|
|
|
|
return static_cast<const NonSectionChunk *>(this)->getSectionName();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void Chunk::getBaserels(std::vector<Baserel> *res) {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
static_cast<SectionChunk *>(this)->getBaserels(res);
|
|
|
|
else
|
|
|
|
static_cast<NonSectionChunk *>(this)->getBaserels(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline StringRef Chunk::getDebugName() const {
|
|
|
|
if (isa<SectionChunk>(this))
|
|
|
|
return static_cast<const SectionChunk *>(this)->getDebugName();
|
|
|
|
else
|
|
|
|
return static_cast<const NonSectionChunk *>(this)->getDebugName();
|
|
|
|
}
|
|
|
|
|
2018-03-16 05:14:02 +08:00
|
|
|
// This class is used to implement an lld-specific feature (not implemented in
|
|
|
|
// MSVC) that minimizes the output size by finding string literals sharing tail
|
|
|
|
// parts and merging them.
|
|
|
|
//
|
|
|
|
// If string tail merging is enabled and a section is identified as containing a
|
|
|
|
// string literal, it is added to a MergeChunk with an appropriate alignment.
|
|
|
|
// The MergeChunk then tail merges the strings using the StringTableBuilder
|
|
|
|
// class and assigns RVAs and section offsets to each of the member chunks based
|
|
|
|
// on the offsets assigned by the StringTableBuilder.
|
2019-05-25 04:25:40 +08:00
|
|
|
class MergeChunk : public NonSectionChunk {
|
2018-03-16 05:14:02 +08:00
|
|
|
public:
|
|
|
|
MergeChunk(uint32_t alignment);
|
|
|
|
static void addSection(SectionChunk *c);
|
2019-05-24 08:02:00 +08:00
|
|
|
void finalizeContents();
|
|
|
|
void assignSubsectionRVAs();
|
2018-03-16 05:14:02 +08:00
|
|
|
|
2018-04-20 04:03:24 +08:00
|
|
|
uint32_t getOutputCharacteristics() const override;
|
2018-03-16 05:14:02 +08:00
|
|
|
StringRef getSectionName() const override { return ".rdata"; }
|
|
|
|
size_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
2019-05-23 04:21:52 +08:00
|
|
|
static MergeChunk *instances[Log2MaxSectionAlignment + 1];
|
2018-03-16 05:14:02 +08:00
|
|
|
std::vector<SectionChunk *> sections;
|
|
|
|
|
|
|
|
private:
|
|
|
|
llvm::StringTableBuilder builder;
|
2018-09-25 18:59:29 +08:00
|
|
|
bool finalized = false;
|
2018-03-16 05:14:02 +08:00
|
|
|
};
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// A chunk for common symbols. Common chunks don't have actual data.
|
2019-05-25 04:25:40 +08:00
|
|
|
class CommonChunk : public NonSectionChunk {
|
2015-05-29 03:09:30 +08:00
|
|
|
public:
|
2015-06-08 11:17:07 +08:00
|
|
|
CommonChunk(const COFFSymbolRef sym);
|
2015-05-29 03:09:30 +08:00
|
|
|
size_t getSize() const override { return sym.getValue(); }
|
2018-04-20 04:03:24 +08:00
|
|
|
uint32_t getOutputCharacteristics() const override;
|
2015-05-29 03:09:30 +08:00
|
|
|
StringRef getSectionName() const override { return ".bss"; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const COFFSymbolRef sym;
|
|
|
|
};
|
|
|
|
|
|
|
|
// A chunk for linker-created strings.
|
2019-05-25 04:25:40 +08:00
|
|
|
class StringChunk : public NonSectionChunk {
|
2015-05-29 03:09:30 +08:00
|
|
|
public:
|
2015-05-29 03:45:43 +08:00
|
|
|
explicit StringChunk(StringRef s) : str(s) {}
|
|
|
|
size_t getSize() const override { return str.size() + 1; }
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
private:
|
2015-05-29 03:45:43 +08:00
|
|
|
StringRef str;
|
2015-05-29 03:09:30 +08:00
|
|
|
};
|
|
|
|
|
2015-07-25 09:16:06 +08:00
|
|
|
static const uint8_t importThunkX86[] = {
|
2015-05-29 03:09:30 +08:00
|
|
|
0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
|
|
|
|
};
|
|
|
|
|
2015-07-25 11:39:29 +08:00
|
|
|
static const uint8_t importThunkARM[] = {
|
|
|
|
0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
|
|
|
|
0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
|
|
|
|
0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
|
|
|
|
};
|
|
|
|
|
2017-07-11 15:22:44 +08:00
|
|
|
static const uint8_t importThunkARM64[] = {
|
|
|
|
0x10, 0x00, 0x00, 0x90, // adrp x16, #0
|
|
|
|
0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16]
|
|
|
|
0x00, 0x02, 0x1f, 0xd6, // br x16
|
|
|
|
};
|
|
|
|
|
2015-06-07 09:15:04 +08:00
|
|
|
// Windows-specific.
|
2019-01-15 03:05:21 +08:00
|
|
|
// A chunk for DLL import jump table entry. In a final output, its
|
2015-05-29 03:09:30 +08:00
|
|
|
// contents will be a JMP instruction to some __imp_ symbol.
|
2019-05-29 01:38:04 +08:00
|
|
|
class ImportThunkChunk : public NonSectionChunk {
|
|
|
|
public:
|
|
|
|
ImportThunkChunk(Defined *s)
|
|
|
|
: NonSectionChunk(ImportThunkKind), impSymbol(s) {}
|
|
|
|
static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
Defined *impSymbol;
|
|
|
|
};
|
|
|
|
|
|
|
|
class ImportThunkChunkX64 : public ImportThunkChunk {
|
2015-05-29 03:09:30 +08:00
|
|
|
public:
|
2015-07-25 09:16:06 +08:00
|
|
|
explicit ImportThunkChunkX64(Defined *s);
|
|
|
|
size_t getSize() const override { return sizeof(importThunkX86); }
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-07-25 09:16:06 +08:00
|
|
|
};
|
|
|
|
|
2019-05-29 01:38:04 +08:00
|
|
|
class ImportThunkChunkX86 : public ImportThunkChunk {
|
2015-07-25 09:16:06 +08:00
|
|
|
public:
|
2019-05-29 01:38:04 +08:00
|
|
|
explicit ImportThunkChunkX86(Defined *s) : ImportThunkChunk(s) {}
|
2015-07-25 09:16:06 +08:00
|
|
|
size_t getSize() const override { return sizeof(importThunkX86); }
|
2015-07-25 09:44:32 +08:00
|
|
|
void getBaserels(std::vector<Baserel> *res) override;
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-05-29 03:09:30 +08:00
|
|
|
};
|
|
|
|
|
2019-05-29 01:38:04 +08:00
|
|
|
class ImportThunkChunkARM : public ImportThunkChunk {
|
2015-07-25 11:39:29 +08:00
|
|
|
public:
|
2019-05-29 01:38:04 +08:00
|
|
|
explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {}
|
2015-07-25 11:39:29 +08:00
|
|
|
size_t getSize() const override { return sizeof(importThunkARM); }
|
|
|
|
void getBaserels(std::vector<Baserel> *res) override;
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-07-25 11:39:29 +08:00
|
|
|
};
|
|
|
|
|
2019-05-29 01:38:04 +08:00
|
|
|
class ImportThunkChunkARM64 : public ImportThunkChunk {
|
2017-07-11 15:22:44 +08:00
|
|
|
public:
|
2019-05-29 01:38:04 +08:00
|
|
|
explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {}
|
2017-07-11 15:22:44 +08:00
|
|
|
size_t getSize() const override { return sizeof(importThunkARM64); }
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
};
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
class RangeExtensionThunkARM : public NonSectionChunk {
|
2018-09-25 18:59:29 +08:00
|
|
|
public:
|
2019-02-02 06:08:09 +08:00
|
|
|
explicit RangeExtensionThunkARM(Defined *t) : target(t) {}
|
|
|
|
size_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
|
|
|
Defined *target;
|
|
|
|
};
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
class RangeExtensionThunkARM64 : public NonSectionChunk {
|
2019-02-02 06:08:09 +08:00
|
|
|
public:
|
|
|
|
explicit RangeExtensionThunkARM64(Defined *t) : target(t) {}
|
2018-09-25 18:59:29 +08:00
|
|
|
size_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
|
|
|
Defined *target;
|
|
|
|
};
|
|
|
|
|
2015-06-25 11:31:47 +08:00
|
|
|
// Windows-specific.
|
|
|
|
// See comments for DefinedLocalImport class.
|
2019-05-25 04:25:40 +08:00
|
|
|
class LocalImportChunk : public NonSectionChunk {
|
2015-06-25 11:31:47 +08:00
|
|
|
public:
|
2018-10-12 01:45:51 +08:00
|
|
|
explicit LocalImportChunk(Defined *s) : sym(s) {
|
2019-05-23 04:21:52 +08:00
|
|
|
setAlignment(config->wordsize);
|
2018-10-12 01:45:51 +08:00
|
|
|
}
|
2015-07-10 05:15:58 +08:00
|
|
|
size_t getSize() const override;
|
2015-07-25 09:44:32 +08:00
|
|
|
void getBaserels(std::vector<Baserel> *res) override;
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-06-25 11:31:47 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
Defined *sym;
|
|
|
|
};
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
|
|
|
|
// offset into the chunk. Order does not matter as the RVA table will be sorted
|
|
|
|
// later.
|
|
|
|
struct ChunkAndOffset {
|
|
|
|
Chunk *inputChunk;
|
|
|
|
uint32_t offset;
|
|
|
|
|
|
|
|
struct DenseMapInfo {
|
|
|
|
static ChunkAndOffset getEmptyKey() {
|
|
|
|
return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0};
|
|
|
|
}
|
|
|
|
static ChunkAndOffset getTombstoneKey() {
|
|
|
|
return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0};
|
|
|
|
}
|
|
|
|
static unsigned getHashValue(const ChunkAndOffset &co) {
|
|
|
|
return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue(
|
|
|
|
{co.inputChunk, co.offset});
|
|
|
|
}
|
|
|
|
static bool isEqual(const ChunkAndOffset &lhs, const ChunkAndOffset &rhs) {
|
|
|
|
return lhs.inputChunk == rhs.inputChunk && lhs.offset == rhs.offset;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
|
|
|
|
|
|
|
|
// Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
|
2019-05-25 04:25:40 +08:00
|
|
|
class RVATableChunk : public NonSectionChunk {
|
2015-07-25 07:51:14 +08:00
|
|
|
public:
|
2018-02-06 09:58:26 +08:00
|
|
|
explicit RVATableChunk(SymbolRVASet s) : syms(std::move(s)) {}
|
2015-07-25 07:51:14 +08:00
|
|
|
size_t getSize() const override { return syms.size() * 4; }
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-07-25 07:51:14 +08:00
|
|
|
|
|
|
|
private:
|
2018-02-06 09:58:26 +08:00
|
|
|
SymbolRVASet syms;
|
2015-07-25 07:51:14 +08:00
|
|
|
};
|
|
|
|
|
2015-06-15 09:23:58 +08:00
|
|
|
// Windows-specific.
|
|
|
|
// This class represents a block in .reloc section.
|
|
|
|
// See the PE/COFF spec 5.6 for details.
|
2019-05-25 04:25:40 +08:00
|
|
|
class BaserelChunk : public NonSectionChunk {
|
2015-06-15 09:23:58 +08:00
|
|
|
public:
|
2015-07-25 09:44:32 +08:00
|
|
|
BaserelChunk(uint32_t page, Baserel *begin, Baserel *end);
|
2015-06-15 09:23:58 +08:00
|
|
|
size_t getSize() const override { return data.size(); }
|
2015-09-20 07:28:57 +08:00
|
|
|
void writeTo(uint8_t *buf) const override;
|
2015-06-15 09:23:58 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<uint8_t> data;
|
|
|
|
};
|
|
|
|
|
2015-07-25 09:44:32 +08:00
|
|
|
class Baserel {
|
|
|
|
public:
|
|
|
|
Baserel(uint32_t v, uint8_t ty) : rva(v), type(ty) {}
|
|
|
|
explicit Baserel(uint32_t v) : Baserel(v, getDefaultType()) {}
|
|
|
|
uint8_t getDefaultType();
|
|
|
|
|
|
|
|
uint32_t rva;
|
|
|
|
uint8_t type;
|
|
|
|
};
|
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
|
|
|
|
// specific place in a section, without any data. This is used for the MinGW
|
|
|
|
// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
|
|
|
|
// of an empty chunk isn't MinGW specific.
|
2019-05-25 04:25:40 +08:00
|
|
|
class EmptyChunk : public NonSectionChunk {
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
public:
|
|
|
|
EmptyChunk() {}
|
|
|
|
size_t getSize() const override { return 0; }
|
|
|
|
void writeTo(uint8_t *buf) const override {}
|
|
|
|
};
|
|
|
|
|
|
|
|
// MinGW specific, for the "automatic import of variables from DLLs" feature.
|
|
|
|
// This provides the table of runtime pseudo relocations, for variable
|
|
|
|
// references that turned out to need to be imported from a DLL even though
|
|
|
|
// the reference didn't use the dllimport attribute. The MinGW runtime will
|
|
|
|
// process this table after loading, before handling control over to user
|
|
|
|
// code.
|
2019-05-25 04:25:40 +08:00
|
|
|
class PseudoRelocTableChunk : public NonSectionChunk {
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
public:
|
|
|
|
PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &relocs)
|
|
|
|
: relocs(std::move(relocs)) {
|
2019-05-23 04:21:52 +08:00
|
|
|
setAlignment(4);
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
}
|
|
|
|
size_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<RuntimePseudoReloc> relocs;
|
|
|
|
};
|
|
|
|
|
|
|
|
// MinGW specific; information about one individual location in the image
|
|
|
|
// that needs to be fixed up at runtime after loading. This represents
|
|
|
|
// one individual element in the PseudoRelocTableChunk table.
|
|
|
|
class RuntimePseudoReloc {
|
|
|
|
public:
|
|
|
|
RuntimePseudoReloc(Defined *sym, SectionChunk *target, uint32_t targetOffset,
|
|
|
|
int flags)
|
|
|
|
: sym(sym), target(target), targetOffset(targetOffset), flags(flags) {}
|
|
|
|
|
|
|
|
Defined *sym;
|
|
|
|
SectionChunk *target;
|
|
|
|
uint32_t targetOffset;
|
|
|
|
// The Flags field contains the size of the relocation, in bits. No other
|
|
|
|
// flags are currently defined.
|
|
|
|
int flags;
|
|
|
|
};
|
|
|
|
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
// MinGW specific. A Chunk that contains one pointer-sized absolute value.
|
2019-05-25 04:25:40 +08:00
|
|
|
class AbsolutePointerChunk : public NonSectionChunk {
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
public:
|
|
|
|
AbsolutePointerChunk(uint64_t value) : value(value) {
|
2019-05-23 04:21:52 +08:00
|
|
|
setAlignment(getSize());
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
}
|
|
|
|
size_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
uint64_t value;
|
|
|
|
};
|
|
|
|
|
2019-05-29 01:38:04 +08:00
|
|
|
// Return true if this file has the hotpatch flag set to true in the S_COMPILE3
|
|
|
|
// record in codeview debug info. Also returns true for some thunks synthesized
|
|
|
|
// by the linker.
|
|
|
|
inline bool Chunk::isHotPatchable() const {
|
|
|
|
if (auto *sc = dyn_cast<SectionChunk>(this))
|
|
|
|
return sc->file->hotPatchable;
|
|
|
|
else if (isa<ImportThunkChunk>(this))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-07-26 04:00:37 +08:00
|
|
|
void applyMOV32T(uint8_t *off, uint32_t v);
|
|
|
|
void applyBranch24T(uint8_t *off, int32_t v);
|
|
|
|
|
2018-09-18 15:22:01 +08:00
|
|
|
void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift);
|
|
|
|
void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit);
|
|
|
|
void applyArm64Branch26(uint8_t *off, int64_t v);
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
} // namespace coff
|
|
|
|
} // namespace lld
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
namespace llvm {
|
|
|
|
template <>
|
|
|
|
struct DenseMapInfo<lld::coff::ChunkAndOffset>
|
|
|
|
: lld::coff::ChunkAndOffset::DenseMapInfo {};
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
#endif
|