2015-05-29 03:09:30 +08:00
|
|
|
//===- Writer.cpp ---------------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-10-12 03:45:07 +08:00
|
|
|
#include "Writer.h"
|
2020-07-22 04:46:11 +08:00
|
|
|
#include "CallGraphSort.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "Config.h"
|
2015-08-06 07:43:53 +08:00
|
|
|
#include "DLL.h"
|
|
|
|
#include "InputFiles.h"
|
2020-03-24 05:06:48 +08:00
|
|
|
#include "LLDMapFile.h"
|
2017-01-14 11:14:46 +08:00
|
|
|
#include "MapFile.h"
|
2016-10-12 03:45:07 +08:00
|
|
|
#include "PDB.h"
|
2015-08-06 07:43:53 +08:00
|
|
|
#include "SymbolTable.h"
|
|
|
|
#include "Symbols.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-29 04:39:17 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2018-01-18 03:16:26 +08:00
|
|
|
#include "lld/Common/Timer.h"
|
2015-07-28 08:17:25 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2019-11-15 05:46:00 +08:00
|
|
|
#include "llvm/ADT/StringSet.h"
|
2015-07-28 08:17:25 +08:00
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
#include "llvm/Support/BinaryStreamReader.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
#include "llvm/Support/FileOutputBuffer.h"
|
2017-05-11 08:03:52 +08:00
|
|
|
#include "llvm/Support/Parallel.h"
|
2018-02-06 09:58:26 +08:00
|
|
|
#include "llvm/Support/Path.h"
|
2016-08-30 05:20:46 +08:00
|
|
|
#include "llvm/Support/RandomNumberGenerator.h"
|
2018-03-09 03:33:47 +08:00
|
|
|
#include "llvm/Support/xxhash.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include <algorithm>
|
2015-05-31 03:09:50 +08:00
|
|
|
#include <cstdio>
|
2015-05-29 03:09:30 +08:00
|
|
|
#include <map>
|
2015-08-06 07:43:53 +08:00
|
|
|
#include <memory>
|
2015-05-29 03:09:30 +08:00
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::COFF;
|
2015-05-31 03:09:50 +08:00
|
|
|
using namespace llvm::object;
|
|
|
|
using namespace llvm::support;
|
|
|
|
using namespace llvm::support::endian;
|
2020-02-20 09:05:42 +08:00
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::coff;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2018-03-08 22:27:28 +08:00
|
|
|
/* To re-generate DOSProgram:
|
|
|
|
$ cat > /tmp/DOSProgram.asm
|
|
|
|
org 0
|
|
|
|
; Copy cs to ds.
|
|
|
|
push cs
|
|
|
|
pop ds
|
|
|
|
; Point ds:dx at the $-terminated string.
|
|
|
|
mov dx, str
|
|
|
|
; Int 21/AH=09h: Write string to standard output.
|
|
|
|
mov ah, 0x9
|
|
|
|
int 0x21
|
|
|
|
; Int 21/AH=4Ch: Exit with return code (in AL).
|
|
|
|
mov ax, 0x4C01
|
|
|
|
int 0x21
|
|
|
|
str:
|
|
|
|
db 'This program cannot be run in DOS mode.$'
|
|
|
|
align 8, db 0
|
|
|
|
$ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin
|
|
|
|
$ xxd -i /tmp/DOSProgram.bin
|
|
|
|
*/
|
|
|
|
static unsigned char dosProgram[] = {
|
|
|
|
0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c,
|
|
|
|
0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72,
|
|
|
|
0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65,
|
|
|
|
0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20,
|
|
|
|
0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00
|
|
|
|
};
|
|
|
|
static_assert(sizeof(dosProgram) % 8 == 0,
|
|
|
|
"DOSProgram size must be multiple of 8");
|
|
|
|
|
|
|
|
static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram);
|
|
|
|
static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8");
|
|
|
|
|
2018-11-14 18:26:47 +08:00
|
|
|
static const int numberOfDataDirectory = 16;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2019-05-25 02:25:49 +08:00
|
|
|
// Global vector of all output sections. After output sections are finalized,
|
|
|
|
// this can be indexed by Chunk::getOutputSection.
|
|
|
|
static std::vector<OutputSection *> outputSections;
|
|
|
|
|
|
|
|
OutputSection *Chunk::getOutputSection() const {
|
|
|
|
return osidx == 0 ? nullptr : outputSections[osidx - 1];
|
|
|
|
}
|
|
|
|
|
2020-09-25 03:00:43 +08:00
|
|
|
void OutputSection::clear() { outputSections.clear(); }
|
|
|
|
|
2015-08-06 07:43:53 +08:00
|
|
|
namespace {
|
2016-08-30 05:20:46 +08:00
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
class DebugDirectoryChunk : public NonSectionChunk {
|
2016-08-30 05:20:46 +08:00
|
|
|
public:
|
2020-03-13 18:41:18 +08:00
|
|
|
DebugDirectoryChunk(const std::vector<std::pair<COFF::DebugType, Chunk *>> &r,
|
|
|
|
bool writeRepro)
|
2018-09-06 02:02:43 +08:00
|
|
|
: records(r), writeRepro(writeRepro) {}
|
2016-08-30 05:20:46 +08:00
|
|
|
|
|
|
|
size_t getSize() const override {
|
2018-09-06 02:02:43 +08:00
|
|
|
return (records.size() + int(writeRepro)) * sizeof(debug_directory);
|
2016-08-30 05:20:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *b) const override {
|
2019-05-10 05:21:22 +08:00
|
|
|
auto *d = reinterpret_cast<debug_directory *>(b);
|
2016-08-30 05:20:46 +08:00
|
|
|
|
2020-03-17 13:51:48 +08:00
|
|
|
for (const std::pair<COFF::DebugType, Chunk *>& record : records) {
|
2020-03-13 18:41:18 +08:00
|
|
|
Chunk *c = record.second;
|
|
|
|
OutputSection *os = c->getOutputSection();
|
|
|
|
uint64_t offs = os->getFileOff() + (c->getRVA() - os->getRVA());
|
|
|
|
fillEntry(d, record.first, c->getSize(), c->getRVA(), offs);
|
2016-08-30 05:20:46 +08:00
|
|
|
++d;
|
|
|
|
}
|
2018-09-06 02:02:43 +08:00
|
|
|
|
|
|
|
if (writeRepro) {
|
|
|
|
// FIXME: The COFF spec allows either a 0-sized entry to just say
|
|
|
|
// "the timestamp field is really a hash", or a 4-byte size field
|
|
|
|
// followed by that many bytes containing a longer hash (with the
|
|
|
|
// lowest 4 bytes usually being the timestamp in little-endian order).
|
|
|
|
// Consider storing the full 8 bytes computed by xxHash64 here.
|
|
|
|
fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0);
|
|
|
|
}
|
2016-08-30 05:20:46 +08:00
|
|
|
}
|
|
|
|
|
2018-03-09 03:33:47 +08:00
|
|
|
void setTimeDateStamp(uint32_t timeDateStamp) {
|
|
|
|
for (support::ulittle32_t *tds : timeDateStamps)
|
|
|
|
*tds = timeDateStamp;
|
|
|
|
}
|
|
|
|
|
2016-08-30 05:20:46 +08:00
|
|
|
private:
|
2018-09-06 02:02:43 +08:00
|
|
|
void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size,
|
|
|
|
uint64_t rva, uint64_t offs) const {
|
|
|
|
d->Characteristics = 0;
|
|
|
|
d->TimeDateStamp = 0;
|
|
|
|
d->MajorVersion = 0;
|
|
|
|
d->MinorVersion = 0;
|
|
|
|
d->Type = debugType;
|
|
|
|
d->SizeOfData = size;
|
|
|
|
d->AddressOfRawData = rva;
|
|
|
|
d->PointerToRawData = offs;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-09-06 02:02:43 +08:00
|
|
|
timeDateStamps.push_back(&d->TimeDateStamp);
|
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-03-09 03:33:47 +08:00
|
|
|
mutable std::vector<support::ulittle32_t *> timeDateStamps;
|
2020-03-13 18:41:18 +08:00
|
|
|
const std::vector<std::pair<COFF::DebugType, Chunk *>> &records;
|
2018-09-06 02:02:43 +08:00
|
|
|
bool writeRepro;
|
2016-08-30 05:20:46 +08:00
|
|
|
};
|
|
|
|
|
2019-05-25 04:25:40 +08:00
|
|
|
class CVDebugRecordChunk : public NonSectionChunk {
|
2017-08-05 04:02:55 +08:00
|
|
|
public:
|
2016-08-30 05:20:46 +08:00
|
|
|
size_t getSize() const override {
|
2018-07-12 11:22:39 +08:00
|
|
|
return sizeof(codeview::DebugInfo) + config->pdbAltPath.size() + 1;
|
2016-08-30 05:20:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *b) const override {
|
2016-09-10 03:26:03 +08:00
|
|
|
// Save off the DebugInfo entry to backfill the file signature (build id)
|
|
|
|
// in Writer::writeBuildId
|
2019-05-10 05:21:22 +08:00
|
|
|
buildId = reinterpret_cast<codeview::DebugInfo *>(b);
|
2016-08-30 05:20:46 +08:00
|
|
|
|
|
|
|
// variable sized field (PDB Path)
|
2019-05-10 05:21:22 +08:00
|
|
|
char *p = reinterpret_cast<char *>(b + sizeof(*buildId));
|
2018-07-12 11:22:39 +08:00
|
|
|
if (!config->pdbAltPath.empty())
|
|
|
|
memcpy(p, config->pdbAltPath.data(), config->pdbAltPath.size());
|
|
|
|
p[config->pdbAltPath.size()] = '\0';
|
2016-08-30 05:20:46 +08:00
|
|
|
}
|
2016-09-10 03:26:03 +08:00
|
|
|
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
mutable codeview::DebugInfo *buildId = nullptr;
|
2016-08-30 05:20:46 +08:00
|
|
|
};
|
|
|
|
|
2020-03-13 18:41:18 +08:00
|
|
|
class ExtendedDllCharacteristicsChunk : public NonSectionChunk {
|
|
|
|
public:
|
|
|
|
ExtendedDllCharacteristicsChunk(uint32_t c) : characteristics(c) {}
|
|
|
|
|
|
|
|
size_t getSize() const override { return 4; }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override { write32le(buf, characteristics); }
|
|
|
|
|
|
|
|
uint32_t characteristics = 0;
|
|
|
|
};
|
|
|
|
|
2019-01-28 09:45:35 +08:00
|
|
|
// PartialSection represents a group of chunks that contribute to an
|
|
|
|
// OutputSection. Collating a collection of PartialSections of same name and
|
|
|
|
// characteristics constitutes the OutputSection.
|
2019-02-05 16:16:10 +08:00
|
|
|
class PartialSectionKey {
|
2019-01-28 09:45:35 +08:00
|
|
|
public:
|
|
|
|
StringRef name;
|
|
|
|
unsigned characteristics;
|
|
|
|
|
2019-02-05 16:16:10 +08:00
|
|
|
bool operator<(const PartialSectionKey &other) const {
|
2019-01-28 09:45:35 +08:00
|
|
|
int c = name.compare(other.name);
|
|
|
|
if (c == 1)
|
|
|
|
return false;
|
|
|
|
if (c == 0)
|
|
|
|
return characteristics < other.characteristics;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-08-06 07:43:53 +08:00
|
|
|
// The writer writes a SymbolTable result to a file.
|
|
|
|
class Writer {
|
|
|
|
public:
|
2017-11-14 02:15:22 +08:00
|
|
|
Writer() : buffer(errorHandler().outputBuffer) {}
|
2015-08-06 22:58:50 +08:00
|
|
|
void run();
|
2015-08-06 07:43:53 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
void createSections();
|
|
|
|
void createMiscChunks();
|
|
|
|
void createImportTables();
|
2018-09-22 06:01:06 +08:00
|
|
|
void appendImportThunks();
|
2019-01-28 09:45:35 +08:00
|
|
|
void locateImportTables();
|
2015-08-06 07:43:53 +08:00
|
|
|
void createExportTable();
|
2018-04-21 05:10:33 +08:00
|
|
|
void mergeSections();
|
2018-11-28 04:48:09 +08:00
|
|
|
void removeUnusedSections();
|
2015-08-06 07:43:53 +08:00
|
|
|
void assignAddresses();
|
2018-09-25 18:59:29 +08:00
|
|
|
void finalizeAddresses();
|
2015-08-06 07:43:53 +08:00
|
|
|
void removeEmptySections();
|
2019-05-25 02:25:49 +08:00
|
|
|
void assignOutputSectionIndices();
|
2017-11-21 09:14:14 +08:00
|
|
|
void createSymbolAndStringTable();
|
2015-08-06 22:58:50 +08:00
|
|
|
void openFile(StringRef outputPath);
|
2015-08-06 07:43:53 +08:00
|
|
|
template <typename PEHeaderTy> void writeHeader();
|
2018-04-06 11:25:49 +08:00
|
|
|
void createSEHTable();
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
void createRuntimePseudoRelocs();
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
void insertCtorDtorSymbols();
|
2018-04-06 11:25:49 +08:00
|
|
|
void createGuardCFTables();
|
2018-02-06 09:58:26 +08:00
|
|
|
void markSymbolsForRVATable(ObjFile *file,
|
|
|
|
ArrayRef<SectionChunk *> symIdxChunks,
|
|
|
|
SymbolRVASet &tableSymbols);
|
2018-04-06 11:25:49 +08:00
|
|
|
void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
|
|
|
|
StringRef countSym);
|
2016-06-20 11:39:39 +08:00
|
|
|
void setSectionPermissions();
|
2015-08-06 07:43:53 +08:00
|
|
|
void writeSections();
|
2016-09-10 03:26:03 +08:00
|
|
|
void writeBuildId();
|
2020-07-22 04:46:11 +08:00
|
|
|
void sortSections();
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
void sortExceptionTable();
|
2018-10-05 20:56:46 +08:00
|
|
|
void sortCRTSectionChunks(std::vector<Chunk *> &chunks);
|
2019-01-28 09:45:35 +08:00
|
|
|
void addSyntheticIdata();
|
2019-06-29 01:13:52 +08:00
|
|
|
void fixPartialSectionChars(StringRef name, uint32_t chars);
|
2019-01-28 09:45:35 +08:00
|
|
|
bool fixGnuImportChunks();
|
|
|
|
PartialSection *createPartialSection(StringRef name, uint32_t outChars);
|
|
|
|
PartialSection *findPartialSection(StringRef name, uint32_t outChars);
|
2015-08-06 07:43:53 +08:00
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
llvm::Optional<coff_symbol16> createSymbol(Defined *d);
|
|
|
|
size_t addEntryToStringTable(StringRef str);
|
|
|
|
|
2015-08-06 07:43:53 +08:00
|
|
|
OutputSection *findSection(StringRef name);
|
2018-04-06 11:25:49 +08:00
|
|
|
void addBaserels();
|
|
|
|
void addBaserelBlocks(std::vector<Baserel> &v);
|
2015-08-06 07:43:53 +08:00
|
|
|
|
|
|
|
uint32_t getSizeOfInitializedData();
|
|
|
|
|
2017-11-14 02:15:22 +08:00
|
|
|
std::unique_ptr<FileOutputBuffer> &buffer;
|
2019-02-05 16:16:10 +08:00
|
|
|
std::map<PartialSectionKey, PartialSection *> partialSections;
|
2015-08-06 07:43:53 +08:00
|
|
|
std::vector<char> strtab;
|
|
|
|
std::vector<llvm::object::coff_symbol16> outputSymtab;
|
|
|
|
IdataContents idata;
|
2018-09-22 06:01:06 +08:00
|
|
|
Chunk *importTableStart = nullptr;
|
|
|
|
uint64_t importTableSize = 0;
|
2019-08-20 17:53:06 +08:00
|
|
|
Chunk *edataStart = nullptr;
|
|
|
|
Chunk *edataEnd = nullptr;
|
2018-09-22 06:01:06 +08:00
|
|
|
Chunk *iatStart = nullptr;
|
|
|
|
uint64_t iatSize = 0;
|
2015-08-06 07:43:53 +08:00
|
|
|
DelayLoadContents delayIdata;
|
|
|
|
EdataContents edata;
|
2018-04-19 06:37:10 +08:00
|
|
|
bool setNoSEHCharacteristic = false;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-03-09 03:33:47 +08:00
|
|
|
DebugDirectoryChunk *debugDirectory = nullptr;
|
2020-03-13 18:41:18 +08:00
|
|
|
std::vector<std::pair<COFF::DebugType, Chunk *>> debugRecords;
|
2016-09-10 03:26:03 +08:00
|
|
|
CVDebugRecordChunk *buildId = nullptr;
|
2016-10-12 03:45:07 +08:00
|
|
|
ArrayRef<uint8_t> sectionTable;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2015-08-06 07:43:53 +08:00
|
|
|
uint64_t fileSize;
|
2017-11-21 09:14:14 +08:00
|
|
|
uint32_t pointerToSymbolTable = 0;
|
2015-08-06 07:43:53 +08:00
|
|
|
uint64_t sizeOfImage;
|
|
|
|
uint64_t sizeOfHeaders;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-04-06 11:25:49 +08:00
|
|
|
OutputSection *textSec;
|
|
|
|
OutputSection *rdataSec;
|
2018-07-20 13:44:34 +08:00
|
|
|
OutputSection *buildidSec;
|
2018-04-06 11:25:49 +08:00
|
|
|
OutputSection *dataSec;
|
2018-04-21 05:10:33 +08:00
|
|
|
OutputSection *pdataSec;
|
2018-04-06 11:25:49 +08:00
|
|
|
OutputSection *idataSec;
|
|
|
|
OutputSection *edataSec;
|
|
|
|
OutputSection *didatSec;
|
|
|
|
OutputSection *rsrcSec;
|
|
|
|
OutputSection *relocSec;
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
OutputSection *ctorsSec;
|
|
|
|
OutputSection *dtorsSec;
|
2018-04-07 08:46:55 +08:00
|
|
|
|
|
|
|
// The first and last .pdata sections in the output file.
|
|
|
|
//
|
|
|
|
// We need to keep track of the location of .pdata in whichever section it
|
|
|
|
// gets merged into so that we can sort its contents and emit a correct data
|
|
|
|
// directory entry for the exception table. This is also the case for some
|
|
|
|
// other sections (such as .edata) but because the contents of those sections
|
|
|
|
// are entirely linker-generated we can keep track of their locations using
|
|
|
|
// the chunks that the linker creates. All .pdata chunks come from input
|
|
|
|
// files, so we need to keep track of them separately.
|
|
|
|
Chunk *firstPdata = nullptr;
|
|
|
|
Chunk *lastPdata;
|
2015-08-06 07:43:53 +08:00
|
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
|
2018-01-18 03:16:26 +08:00
|
|
|
static Timer codeLayoutTimer("Code Layout", Timer::root());
|
|
|
|
static Timer diskCommitTimer("Commit Output File", Timer::root());
|
|
|
|
|
2020-02-20 09:05:42 +08:00
|
|
|
void lld::coff::writeResult() { Writer().run(); }
|
2015-06-07 07:32:08 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
void OutputSection::addChunk(Chunk *c) {
|
|
|
|
chunks.push_back(c);
|
|
|
|
}
|
|
|
|
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
void OutputSection::insertChunkAtStart(Chunk *c) {
|
|
|
|
chunks.insert(chunks.begin(), c);
|
|
|
|
}
|
|
|
|
|
2016-06-20 11:39:39 +08:00
|
|
|
void OutputSection::setPermissions(uint32_t c) {
|
2018-04-21 05:23:16 +08:00
|
|
|
header.Characteristics &= ~permMask;
|
|
|
|
header.Characteristics |= c;
|
2016-06-20 11:39:39 +08:00
|
|
|
}
|
|
|
|
|
2018-04-21 05:10:33 +08:00
|
|
|
void OutputSection::merge(OutputSection *other) {
|
|
|
|
chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end());
|
|
|
|
other->chunks.clear();
|
2019-03-30 04:25:34 +08:00
|
|
|
contribSections.insert(contribSections.end(), other->contribSections.begin(),
|
|
|
|
other->contribSections.end());
|
|
|
|
other->contribSections.clear();
|
2018-04-21 05:10:33 +08:00
|
|
|
}
|
|
|
|
|
2015-05-31 03:09:50 +08:00
|
|
|
// Write the section header to a given buffer.
|
2015-06-07 07:19:38 +08:00
|
|
|
void OutputSection::writeHeaderTo(uint8_t *buf) {
|
2015-05-31 03:09:50 +08:00
|
|
|
auto *hdr = reinterpret_cast<coff_section *>(buf);
|
|
|
|
*hdr = header;
|
|
|
|
if (stringTableOff) {
|
|
|
|
// If name is too long, write offset into the string table as a name.
|
|
|
|
sprintf(hdr->Name, "/%d", stringTableOff);
|
|
|
|
} else {
|
2017-11-16 20:06:42 +08:00
|
|
|
assert(!config->debug || name.size() <= COFF::NameSize ||
|
|
|
|
(hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0);
|
2015-07-09 00:37:50 +08:00
|
|
|
strncpy(hdr->Name, name.data(),
|
|
|
|
std::min(name.size(), (size_t)COFF::NameSize));
|
2015-05-31 03:09:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-30 04:25:34 +08:00
|
|
|
void OutputSection::addContributingPartialSection(PartialSection *sec) {
|
|
|
|
contribSections.push_back(sec);
|
|
|
|
}
|
|
|
|
|
2018-09-25 18:59:29 +08:00
|
|
|
// Check whether the target address S is in range from a relocation
|
2019-07-16 16:26:38 +08:00
|
|
|
// of type relType at address P.
|
2018-09-25 18:59:29 +08:00
|
|
|
static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
|
2019-02-02 06:08:09 +08:00
|
|
|
if (config->machine == ARMNT) {
|
|
|
|
int64_t diff = AbsoluteDifference(s, p + 4) + margin;
|
|
|
|
switch (relType) {
|
|
|
|
case IMAGE_REL_ARM_BRANCH20T:
|
|
|
|
return isInt<21>(diff);
|
|
|
|
case IMAGE_REL_ARM_BRANCH24T:
|
|
|
|
case IMAGE_REL_ARM_BLX23T:
|
|
|
|
return isInt<25>(diff);
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (config->machine == ARM64) {
|
|
|
|
int64_t diff = AbsoluteDifference(s, p) + margin;
|
|
|
|
switch (relType) {
|
|
|
|
case IMAGE_REL_ARM64_BRANCH26:
|
|
|
|
return isInt<28>(diff);
|
|
|
|
case IMAGE_REL_ARM64_BRANCH19:
|
|
|
|
return isInt<21>(diff);
|
|
|
|
case IMAGE_REL_ARM64_BRANCH14:
|
|
|
|
return isInt<16>(diff);
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
llvm_unreachable("Unexpected architecture");
|
2018-09-25 18:59:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the last thunk for the given target if it is in range,
|
|
|
|
// or create a new one.
|
|
|
|
static std::pair<Defined *, bool>
|
|
|
|
getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target, uint64_t p,
|
|
|
|
uint16_t type, int margin) {
|
|
|
|
Defined *&lastThunk = lastThunks[target->getRVA()];
|
|
|
|
if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin))
|
|
|
|
return {lastThunk, false};
|
2019-02-02 06:08:09 +08:00
|
|
|
Chunk *c;
|
|
|
|
switch (config->machine) {
|
|
|
|
case ARMNT:
|
|
|
|
c = make<RangeExtensionThunkARM>(target);
|
|
|
|
break;
|
|
|
|
case ARM64:
|
|
|
|
c = make<RangeExtensionThunkARM64>(target);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected architecture");
|
|
|
|
}
|
2018-09-25 18:59:29 +08:00
|
|
|
Defined *d = make<DefinedSynthetic>("", c);
|
|
|
|
lastThunk = d;
|
|
|
|
return {d, true};
|
|
|
|
}
|
|
|
|
|
|
|
|
// This checks all relocations, and for any relocation which isn't in range
|
|
|
|
// it adds a thunk after the section chunk that contains the relocation.
|
|
|
|
// If the latest thunk for the specific target is in range, that is used
|
|
|
|
// instead of creating a new thunk. All range checks are done with the
|
|
|
|
// specified margin, to make sure that relocations that originally are in
|
|
|
|
// range, but only barely, also get thunks - in case other added thunks makes
|
|
|
|
// the target go out of range.
|
|
|
|
//
|
|
|
|
// After adding thunks, we verify that all relocations are in range (with
|
|
|
|
// no extra margin requirements). If this failed, we restart (throwing away
|
|
|
|
// the previously created thunks) and retry with a wider margin.
|
2019-02-02 06:08:03 +08:00
|
|
|
static bool createThunks(OutputSection *os, int margin) {
|
2018-09-25 18:59:29 +08:00
|
|
|
bool addressesChanged = false;
|
|
|
|
DenseMap<uint64_t, Defined *> lastThunks;
|
2019-03-29 02:30:03 +08:00
|
|
|
DenseMap<std::pair<ObjFile *, Defined *>, uint32_t> thunkSymtabIndices;
|
2018-09-25 18:59:29 +08:00
|
|
|
size_t thunksSize = 0;
|
|
|
|
// Recheck Chunks.size() each iteration, since we can insert more
|
|
|
|
// elements into it.
|
2019-02-02 06:08:03 +08:00
|
|
|
for (size_t i = 0; i != os->chunks.size(); ++i) {
|
|
|
|
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
|
2018-09-25 18:59:29 +08:00
|
|
|
if (!sc)
|
|
|
|
continue;
|
|
|
|
size_t thunkInsertionSpot = i + 1;
|
|
|
|
|
|
|
|
// Try to get a good enough estimate of where new thunks will be placed.
|
|
|
|
// Offset this by the size of the new thunks added so far, to make the
|
|
|
|
// estimate slightly better.
|
|
|
|
size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize;
|
2019-03-29 02:30:03 +08:00
|
|
|
ObjFile *file = sc->file;
|
|
|
|
std::vector<std::pair<uint32_t, uint32_t>> relocReplacements;
|
|
|
|
ArrayRef<coff_relocation> originalRelocs =
|
|
|
|
file->getCOFFObj()->getRelocations(sc->header);
|
|
|
|
for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) {
|
|
|
|
const coff_relocation &rel = originalRelocs[j];
|
|
|
|
Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex);
|
2018-09-25 18:59:29 +08:00
|
|
|
|
|
|
|
// The estimate of the source address P should be pretty accurate,
|
|
|
|
// but we don't know whether the target Symbol address should be
|
2019-07-16 16:26:38 +08:00
|
|
|
// offset by thunksSize or not (or by some of thunksSize but not all of
|
2018-09-25 18:59:29 +08:00
|
|
|
// it), giving us some uncertainty once we have added one thunk.
|
|
|
|
uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize;
|
|
|
|
|
|
|
|
Defined *sym = dyn_cast_or_null<Defined>(relocTarget);
|
|
|
|
if (!sym)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
uint64_t s = sym->getRVA();
|
|
|
|
|
|
|
|
if (isInRange(rel.Type, s, p, margin))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// If the target isn't in range, hook it up to an existing or new
|
|
|
|
// thunk.
|
|
|
|
Defined *thunk;
|
|
|
|
bool wasNew;
|
|
|
|
std::tie(thunk, wasNew) = getThunk(lastThunks, sym, p, rel.Type, margin);
|
|
|
|
if (wasNew) {
|
|
|
|
Chunk *thunkChunk = thunk->getChunk();
|
|
|
|
thunkChunk->setRVA(
|
|
|
|
thunkInsertionRVA); // Estimate of where it will be located.
|
2019-02-02 06:08:03 +08:00
|
|
|
os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk);
|
2018-09-25 18:59:29 +08:00
|
|
|
thunkInsertionSpot++;
|
|
|
|
thunksSize += thunkChunk->getSize();
|
|
|
|
thunkInsertionRVA += thunkChunk->getSize();
|
|
|
|
addressesChanged = true;
|
|
|
|
}
|
2019-03-29 02:30:03 +08:00
|
|
|
|
|
|
|
// To redirect the relocation, add a symbol to the parent object file's
|
|
|
|
// symbol table, and replace the relocation symbol table index with the
|
|
|
|
// new index.
|
|
|
|
auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U});
|
|
|
|
uint32_t &thunkSymbolIndex = insertion.first->second;
|
|
|
|
if (insertion.second)
|
|
|
|
thunkSymbolIndex = file->addRangeThunkSymbol(thunk);
|
|
|
|
relocReplacements.push_back({j, thunkSymbolIndex});
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get a writable copy of this section's relocations so they can be
|
|
|
|
// modified. If the relocations point into the object file, allocate new
|
|
|
|
// memory. Otherwise, this must be previously allocated memory that can be
|
|
|
|
// modified in place.
|
2019-05-04 04:17:14 +08:00
|
|
|
ArrayRef<coff_relocation> curRelocs = sc->getRelocs();
|
2019-03-29 02:30:03 +08:00
|
|
|
MutableArrayRef<coff_relocation> newRelocs;
|
2019-05-04 04:17:14 +08:00
|
|
|
if (originalRelocs.data() == curRelocs.data()) {
|
2019-03-29 02:30:03 +08:00
|
|
|
newRelocs = makeMutableArrayRef(
|
|
|
|
bAlloc.Allocate<coff_relocation>(originalRelocs.size()),
|
|
|
|
originalRelocs.size());
|
|
|
|
} else {
|
|
|
|
newRelocs = makeMutableArrayRef(
|
2019-05-04 04:17:14 +08:00
|
|
|
const_cast<coff_relocation *>(curRelocs.data()), curRelocs.size());
|
2019-03-29 02:30:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Copy each relocation, but replace the symbol table indices which need
|
|
|
|
// thunks.
|
|
|
|
auto nextReplacement = relocReplacements.begin();
|
|
|
|
auto endReplacement = relocReplacements.end();
|
|
|
|
for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) {
|
|
|
|
newRelocs[i] = originalRelocs[i];
|
|
|
|
if (nextReplacement != endReplacement && nextReplacement->first == i) {
|
|
|
|
newRelocs[i].SymbolTableIndex = nextReplacement->second;
|
|
|
|
++nextReplacement;
|
|
|
|
}
|
2018-09-25 18:59:29 +08:00
|
|
|
}
|
2019-03-29 02:30:03 +08:00
|
|
|
|
2019-05-04 04:17:14 +08:00
|
|
|
sc->setRelocs(newRelocs);
|
2018-09-25 18:59:29 +08:00
|
|
|
}
|
|
|
|
return addressesChanged;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify that all relocations are in range, with no extra margin requirements.
|
|
|
|
static bool verifyRanges(const std::vector<Chunk *> chunks) {
|
|
|
|
for (Chunk *c : chunks) {
|
|
|
|
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
|
|
|
|
if (!sc)
|
|
|
|
continue;
|
|
|
|
|
2019-05-04 04:17:14 +08:00
|
|
|
ArrayRef<coff_relocation> relocs = sc->getRelocs();
|
|
|
|
for (size_t j = 0, e = relocs.size(); j < e; ++j) {
|
|
|
|
const coff_relocation &rel = relocs[j];
|
2019-03-29 02:30:03 +08:00
|
|
|
Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex);
|
2018-09-25 18:59:29 +08:00
|
|
|
|
|
|
|
Defined *sym = dyn_cast_or_null<Defined>(relocTarget);
|
|
|
|
if (!sym)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
uint64_t p = sc->getRVA() + rel.VirtualAddress;
|
|
|
|
uint64_t s = sym->getRVA();
|
|
|
|
|
|
|
|
if (!isInRange(rel.Type, s, p, 0))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assign addresses and add thunks if necessary.
|
|
|
|
void Writer::finalizeAddresses() {
|
|
|
|
assignAddresses();
|
2019-02-02 06:08:09 +08:00
|
|
|
if (config->machine != ARMNT && config->machine != ARM64)
|
2018-09-25 18:59:29 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
size_t origNumChunks = 0;
|
|
|
|
for (OutputSection *sec : outputSections) {
|
|
|
|
sec->origChunks = sec->chunks;
|
|
|
|
origNumChunks += sec->chunks.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
int pass = 0;
|
|
|
|
int margin = 1024 * 100;
|
|
|
|
while (true) {
|
|
|
|
// First check whether we need thunks at all, or if the previous pass of
|
|
|
|
// adding them turned out ok.
|
|
|
|
bool rangesOk = true;
|
|
|
|
size_t numChunks = 0;
|
|
|
|
for (OutputSection *sec : outputSections) {
|
|
|
|
if (!verifyRanges(sec->chunks)) {
|
|
|
|
rangesOk = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
numChunks += sec->chunks.size();
|
|
|
|
}
|
|
|
|
if (rangesOk) {
|
|
|
|
if (pass > 0)
|
|
|
|
log("Added " + Twine(numChunks - origNumChunks) + " thunks with " +
|
|
|
|
"margin " + Twine(margin) + " in " + Twine(pass) + " passes");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pass >= 10)
|
|
|
|
fatal("adding thunks hasn't converged after " + Twine(pass) + " passes");
|
|
|
|
|
|
|
|
if (pass > 0) {
|
|
|
|
// If the previous pass didn't work out, reset everything back to the
|
|
|
|
// original conditions before retrying with a wider margin. This should
|
|
|
|
// ideally never happen under real circumstances.
|
2019-03-29 02:30:03 +08:00
|
|
|
for (OutputSection *sec : outputSections)
|
2018-09-25 18:59:29 +08:00
|
|
|
sec->chunks = sec->origChunks;
|
|
|
|
margin *= 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try adding thunks everywhere where it is needed, with a margin
|
|
|
|
// to avoid things going out of range due to the added thunks.
|
|
|
|
bool addressesChanged = false;
|
|
|
|
for (OutputSection *sec : outputSections)
|
2019-02-02 06:08:03 +08:00
|
|
|
addressesChanged |= createThunks(sec, margin);
|
2018-09-25 18:59:29 +08:00
|
|
|
// If the verification above thought we needed thunks, we should have
|
|
|
|
// added some.
|
|
|
|
assert(addressesChanged);
|
|
|
|
|
|
|
|
// Recalculate the layout for the whole image (and verify the ranges at
|
|
|
|
// the start of the next round).
|
|
|
|
assignAddresses();
|
|
|
|
|
|
|
|
pass++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-06 07:43:53 +08:00
|
|
|
// The main function of the writer.
|
2015-08-06 22:58:50 +08:00
|
|
|
void Writer::run() {
|
2018-01-18 03:16:26 +08:00
|
|
|
ScopedTimer t1(codeLayoutTimer);
|
|
|
|
|
2018-09-22 06:01:06 +08:00
|
|
|
createImportTables();
|
2015-08-06 07:43:53 +08:00
|
|
|
createSections();
|
2020-10-01 17:07:40 +08:00
|
|
|
createMiscChunks();
|
2020-10-02 02:27:32 +08:00
|
|
|
appendImportThunks();
|
2015-08-06 07:43:53 +08:00
|
|
|
createExportTable();
|
2018-04-21 05:10:33 +08:00
|
|
|
mergeSections();
|
2018-11-28 04:48:09 +08:00
|
|
|
removeUnusedSections();
|
2018-09-25 18:59:29 +08:00
|
|
|
finalizeAddresses();
|
2015-08-06 07:43:53 +08:00
|
|
|
removeEmptySections();
|
2019-05-25 02:25:49 +08:00
|
|
|
assignOutputSectionIndices();
|
2016-06-20 11:39:39 +08:00
|
|
|
setSectionPermissions();
|
2017-11-21 09:14:14 +08:00
|
|
|
createSymbolAndStringTable();
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
|
[LLD][COFF] Report error when file will exceed Windows maximum image size (4GB)
Patch by Colden Cullen.
Currently, when a large PE (>4 GiB) is to be produced, a crash occurs
because:
1. Calling setOffset with a number greater than UINT32_MAX causes the
PointerToRawData to overflow
2. When adding the symbol table to the end of the file, the last section's
offset was used to calculate file size. Because this had overflowed,
this number was too low, and the file created would not be large enough.
This lead to the actual crash I saw, which was a buffer overrun.
This change:
1. Adds comment to setOffset, clarifying that overflow can occur, but it's
somewhat safe because the error will be handled elsewhere
2. Adds file size check after all output data has been created This matches
the MS link.exe error, which looks prints as: "LINK : fatal error
LNK1248: image size (10000EFC9) exceeds maximum allowable size
(FFFFFFFF)"
3. Changes calculate of the symbol table offset to just use the existing
FileSize. This should match the previous calculations, but doesn't rely
on the use of a u32 that can overflow.
4. Removes trivial usage of a magic number that bugged me while I was
debugging the issue
I'm not sure how to add a test for this outside of adding 4GB of object
files to the repo. If there's an easier way, let me know and I'll be
happy to add a test.
Differential Revision: https://reviews.llvm.org/D42010
llvm-svn: 322605
2018-01-17 09:08:02 +08:00
|
|
|
if (fileSize > UINT32_MAX)
|
|
|
|
fatal("image size (" + Twine(fileSize) + ") " +
|
|
|
|
"exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")");
|
|
|
|
|
2015-08-06 22:58:50 +08:00
|
|
|
openFile(config->outputFile);
|
2015-08-06 07:43:53 +08:00
|
|
|
if (config->is64()) {
|
|
|
|
writeHeader<pe32plus_header>();
|
|
|
|
} else {
|
|
|
|
writeHeader<pe32_header>();
|
|
|
|
}
|
|
|
|
writeSections();
|
|
|
|
sortExceptionTable();
|
2016-10-12 03:45:07 +08:00
|
|
|
|
2018-01-18 03:16:26 +08:00
|
|
|
t1.stop();
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
|
2018-01-18 03:16:26 +08:00
|
|
|
if (!config->pdbPath.empty() && config->debug) {
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
assert(buildId);
|
2018-09-16 02:37:22 +08:00
|
|
|
createPDB(symtab, outputSections, sectionTable, buildId->buildId);
|
2017-02-07 12:28:02 +08:00
|
|
|
}
|
2018-09-16 02:37:22 +08:00
|
|
|
writeBuildId();
|
2016-10-12 03:45:07 +08:00
|
|
|
|
2020-03-24 05:06:48 +08:00
|
|
|
writeLLDMapFile(outputSections);
|
2017-01-14 11:14:46 +08:00
|
|
|
writeMapFile(outputSections);
|
|
|
|
|
2019-08-21 05:08:14 +08:00
|
|
|
if (errorCount())
|
|
|
|
return;
|
|
|
|
|
2018-01-18 03:16:26 +08:00
|
|
|
ScopedTimer t2(diskCommitTimer);
|
2017-11-08 09:50:34 +08:00
|
|
|
if (auto e = buffer->commit())
|
|
|
|
fatal("failed to write the output file: " + toString(std::move(e)));
|
2015-08-06 07:43:53 +08:00
|
|
|
}
|
|
|
|
|
2018-04-07 08:46:55 +08:00
|
|
|
static StringRef getOutputSectionName(StringRef name) {
|
2015-07-05 07:37:32 +08:00
|
|
|
StringRef s = name.split('$').first;
|
2017-11-28 16:08:37 +08:00
|
|
|
|
|
|
|
// Treat a later period as a separator for MinGW, for sections like
|
|
|
|
// ".ctors.01234".
|
2018-04-07 08:46:55 +08:00
|
|
|
return s.substr(0, s.find('.', 1));
|
2015-07-05 07:37:32 +08:00
|
|
|
}
|
|
|
|
|
2018-01-27 08:34:46 +08:00
|
|
|
// For /order.
|
|
|
|
static void sortBySectionOrder(std::vector<Chunk *> &chunks) {
|
|
|
|
auto getPriority = [](const Chunk *c) {
|
|
|
|
if (auto *sec = dyn_cast<SectionChunk>(c))
|
|
|
|
if (sec->sym)
|
|
|
|
return config->order.lookup(sec->sym->getName());
|
|
|
|
return 0;
|
|
|
|
};
|
|
|
|
|
2019-04-23 10:42:06 +08:00
|
|
|
llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) {
|
|
|
|
return getPriority(a) < getPriority(b);
|
|
|
|
});
|
2018-01-27 08:34:46 +08:00
|
|
|
}
|
|
|
|
|
2019-06-29 01:13:52 +08:00
|
|
|
// Change the characteristics of existing PartialSections that belong to the
|
|
|
|
// section Name to Chars.
|
|
|
|
void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) {
|
|
|
|
for (auto it : partialSections) {
|
|
|
|
PartialSection *pSec = it.second;
|
|
|
|
StringRef curName = pSec->name;
|
|
|
|
if (!curName.consume_front(name) ||
|
|
|
|
(!curName.empty() && !curName.startswith("$")))
|
|
|
|
continue;
|
|
|
|
if (pSec->characteristics == chars)
|
|
|
|
continue;
|
|
|
|
PartialSection *destSec = createPartialSection(pSec->name, chars);
|
|
|
|
destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(),
|
|
|
|
pSec->chunks.end());
|
|
|
|
pSec->chunks.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-22 06:01:06 +08:00
|
|
|
// Sort concrete section chunks from GNU import libraries.
|
|
|
|
//
|
|
|
|
// GNU binutils doesn't use short import files, but instead produces import
|
|
|
|
// libraries that consist of object files, with section chunks for the .idata$*
|
|
|
|
// sections. These are linked just as regular static libraries. Each import
|
|
|
|
// library consists of one header object, one object file for every imported
|
|
|
|
// symbol, and one trailer object. In order for the .idata tables/lists to
|
|
|
|
// be formed correctly, the section chunks within each .idata$* section need
|
|
|
|
// to be grouped by library, and sorted alphabetically within each library
|
|
|
|
// (which makes sure the header comes first and the trailer last).
|
2019-01-28 09:45:35 +08:00
|
|
|
bool Writer::fixGnuImportChunks() {
|
2018-09-22 06:01:06 +08:00
|
|
|
uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
|
|
|
|
|
|
|
|
// Make sure all .idata$* section chunks are mapped as RDATA in order to
|
|
|
|
// be sorted into the same sections as our own synthesized .idata chunks.
|
2019-06-29 01:13:52 +08:00
|
|
|
fixPartialSectionChars(".idata", rdata);
|
2018-09-22 06:01:06 +08:00
|
|
|
|
|
|
|
bool hasIdata = false;
|
|
|
|
// Sort all .idata$* chunks, grouping chunks from the same library,
|
|
|
|
// with alphabetical ordering of the object fils within a library.
|
2019-02-05 16:16:10 +08:00
|
|
|
for (auto it : partialSections) {
|
|
|
|
PartialSection *pSec = it.second;
|
2019-01-28 09:45:35 +08:00
|
|
|
if (!pSec->name.startswith(".idata"))
|
2018-09-22 06:01:06 +08:00
|
|
|
continue;
|
|
|
|
|
2019-04-23 10:42:06 +08:00
|
|
|
if (!pSec->chunks.empty())
|
2018-09-22 06:01:06 +08:00
|
|
|
hasIdata = true;
|
2019-04-23 10:42:06 +08:00
|
|
|
llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) {
|
2018-09-22 06:01:06 +08:00
|
|
|
SectionChunk *sc1 = dyn_cast_or_null<SectionChunk>(s);
|
|
|
|
SectionChunk *sc2 = dyn_cast_or_null<SectionChunk>(t);
|
|
|
|
if (!sc1 || !sc2) {
|
|
|
|
// if SC1, order them ascending. If SC2 or both null,
|
|
|
|
// S is not less than T.
|
|
|
|
return sc1 != nullptr;
|
|
|
|
}
|
|
|
|
// Make a string with "libraryname/objectfile" for sorting, achieving
|
|
|
|
// both grouping by library and sorting of objects within a library,
|
|
|
|
// at once.
|
|
|
|
std::string key1 =
|
|
|
|
(sc1->file->parentName + "/" + sc1->file->getName()).str();
|
|
|
|
std::string key2 =
|
|
|
|
(sc2->file->parentName + "/" + sc2->file->getName()).str();
|
|
|
|
return key1 < key2;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
return hasIdata;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add generated idata chunks, for imported symbols and DLLs, and a
|
|
|
|
// terminator in .idata$2.
|
2019-01-28 09:45:35 +08:00
|
|
|
void Writer::addSyntheticIdata() {
|
2018-09-22 06:01:06 +08:00
|
|
|
uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
|
|
|
|
idata.create();
|
|
|
|
|
|
|
|
// Add the .idata content in the right section groups, to allow
|
|
|
|
// chunks from other linked in object files to be grouped together.
|
|
|
|
// See Microsoft PE/COFF spec 5.4 for details.
|
|
|
|
auto add = [&](StringRef n, std::vector<Chunk *> &v) {
|
2019-01-28 09:45:35 +08:00
|
|
|
PartialSection *pSec = createPartialSection(n, rdata);
|
|
|
|
pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end());
|
2018-09-22 06:01:06 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// The loader assumes a specific order of data.
|
|
|
|
// Add each type in the correct order.
|
|
|
|
add(".idata$2", idata.dirs);
|
|
|
|
add(".idata$4", idata.lookups);
|
|
|
|
add(".idata$5", idata.addresses);
|
2019-10-09 14:48:24 +08:00
|
|
|
if (!idata.hints.empty())
|
|
|
|
add(".idata$6", idata.hints);
|
2018-09-22 06:01:06 +08:00
|
|
|
add(".idata$7", idata.dllNames);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Locate the first Chunk and size of the import directory list and the
|
|
|
|
// IAT.
|
2019-01-28 09:45:35 +08:00
|
|
|
void Writer::locateImportTables() {
|
2018-09-22 06:01:06 +08:00
|
|
|
uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
|
2019-01-28 09:45:35 +08:00
|
|
|
|
|
|
|
if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) {
|
|
|
|
if (!importDirs->chunks.empty())
|
|
|
|
importTableStart = importDirs->chunks.front();
|
|
|
|
for (Chunk *c : importDirs->chunks)
|
|
|
|
importTableSize += c->getSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) {
|
|
|
|
if (!importAddresses->chunks.empty())
|
|
|
|
iatStart = importAddresses->chunks.front();
|
|
|
|
for (Chunk *c : importAddresses->chunks)
|
|
|
|
iatSize += c->getSize();
|
|
|
|
}
|
2018-09-22 06:01:06 +08:00
|
|
|
}
|
|
|
|
|
2019-07-23 14:38:04 +08:00
|
|
|
// Return whether a SectionChunk's suffix (the dollar and any trailing
|
|
|
|
// suffix) should be removed and sorted into the main suffixless
|
|
|
|
// PartialSection.
|
|
|
|
static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) {
|
|
|
|
// On MinGW, comdat groups are formed by putting the comdat group name
|
|
|
|
// after the '$' in the section name. For .eh_frame$<symbol>, that must
|
|
|
|
// still be sorted before the .eh_frame trailer from crtend.o, thus just
|
|
|
|
// strip the section name trailer. For other sections, such as
|
|
|
|
// .tls$$<symbol> (where non-comdat .tls symbols are otherwise stored in
|
|
|
|
// ".tls$"), they must be strictly sorted after .tls. And for the
|
|
|
|
// hypothetical case of comdat .CRT$XCU, we definitely need to keep the
|
|
|
|
// suffix for sorting. Thus, to play it safe, only strip the suffix for
|
|
|
|
// the standard sections.
|
|
|
|
if (!config->mingw)
|
|
|
|
return false;
|
|
|
|
if (!sc || !sc->isCOMDAT())
|
|
|
|
return false;
|
|
|
|
return name.startswith(".text$") || name.startswith(".data$") ||
|
|
|
|
name.startswith(".rdata$") || name.startswith(".pdata$") ||
|
|
|
|
name.startswith(".xdata$") || name.startswith(".eh_frame$");
|
|
|
|
}
|
|
|
|
|
2020-07-22 04:46:11 +08:00
|
|
|
void Writer::sortSections() {
|
|
|
|
if (!config->callGraphProfile.empty()) {
|
|
|
|
DenseMap<const SectionChunk *, int> order = computeCallGraphProfileOrder();
|
|
|
|
for (auto it : order) {
|
|
|
|
if (DefinedRegular *sym = it.first->sym)
|
|
|
|
config->order[sym->getName()] = it.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!config->order.empty())
|
|
|
|
for (auto it : partialSections)
|
|
|
|
sortBySectionOrder(it.second->chunks);
|
|
|
|
}
|
|
|
|
|
2015-06-07 07:32:08 +08:00
|
|
|
// Create output section objects and add them to OutputSections.
|
2015-05-29 03:09:30 +08:00
|
|
|
void Writer::createSections() {
|
2018-04-06 11:25:49 +08:00
|
|
|
// First, create the builtin sections.
|
|
|
|
const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA;
|
|
|
|
const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
|
|
|
|
const uint32_t code = IMAGE_SCN_CNT_CODE;
|
|
|
|
const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE;
|
|
|
|
const uint32_t r = IMAGE_SCN_MEM_READ;
|
|
|
|
const uint32_t w = IMAGE_SCN_MEM_WRITE;
|
|
|
|
const uint32_t x = IMAGE_SCN_MEM_EXECUTE;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-04-21 05:10:33 +08:00
|
|
|
SmallDenseMap<std::pair<StringRef, uint32_t>, OutputSection *> sections;
|
|
|
|
auto createSection = [&](StringRef name, uint32_t outChars) {
|
|
|
|
OutputSection *&sec = sections[{name, outChars}];
|
2018-04-07 08:46:55 +08:00
|
|
|
if (!sec) {
|
2018-04-21 05:10:33 +08:00
|
|
|
sec = make<OutputSection>(name, outChars);
|
2018-04-07 08:46:55 +08:00
|
|
|
outputSections.push_back(sec);
|
|
|
|
}
|
2018-04-06 11:25:49 +08:00
|
|
|
return sec;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Try to match the section order used by link.exe.
|
|
|
|
textSec = createSection(".text", code | r | x);
|
|
|
|
createSection(".bss", bss | r | w);
|
|
|
|
rdataSec = createSection(".rdata", data | r);
|
2018-07-20 13:44:34 +08:00
|
|
|
buildidSec = createSection(".buildid", data | r);
|
2018-04-06 11:25:49 +08:00
|
|
|
dataSec = createSection(".data", data | r | w);
|
2018-04-21 05:10:33 +08:00
|
|
|
pdataSec = createSection(".pdata", data | r);
|
2018-04-06 11:25:49 +08:00
|
|
|
idataSec = createSection(".idata", data | r);
|
|
|
|
edataSec = createSection(".edata", data | r);
|
|
|
|
didatSec = createSection(".didat", data | r);
|
|
|
|
rsrcSec = createSection(".rsrc", data | r);
|
|
|
|
relocSec = createSection(".reloc", data | discardable | r);
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
ctorsSec = createSection(".ctors", data | r | w);
|
|
|
|
dtorsSec = createSection(".dtors", data | r | w);
|
2018-04-06 11:25:49 +08:00
|
|
|
|
2018-04-21 05:10:33 +08:00
|
|
|
// Then bin chunks by name and output characteristics.
|
2015-05-29 03:09:30 +08:00
|
|
|
for (Chunk *c : symtab->getChunks()) {
|
2015-09-17 05:40:47 +08:00
|
|
|
auto *sc = dyn_cast<SectionChunk>(c);
|
2018-08-31 15:45:20 +08:00
|
|
|
if (sc && !sc->live) {
|
2015-09-17 05:40:47 +08:00
|
|
|
if (config->verbose)
|
|
|
|
sc->printDiscardedMessage();
|
|
|
|
continue;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2019-06-15 05:02:09 +08:00
|
|
|
StringRef name = c->getSectionName();
|
2019-07-23 14:38:04 +08:00
|
|
|
if (shouldStripSectionSuffix(sc, name))
|
2019-06-15 05:02:09 +08:00
|
|
|
name = name.split('$').first;
|
|
|
|
PartialSection *pSec = createPartialSection(name,
|
2019-01-28 09:45:35 +08:00
|
|
|
c->getOutputCharacteristics());
|
|
|
|
pSec->chunks.push_back(c);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2019-06-29 01:13:52 +08:00
|
|
|
fixPartialSectionChars(".rsrc", data | r);
|
2019-08-20 17:53:06 +08:00
|
|
|
fixPartialSectionChars(".edata", data | r);
|
2018-09-22 06:01:06 +08:00
|
|
|
// Even in non MinGW cases, we might need to link against GNU import
|
|
|
|
// libraries.
|
2019-01-28 09:45:35 +08:00
|
|
|
bool hasIdata = fixGnuImportChunks();
|
2018-09-22 06:01:06 +08:00
|
|
|
if (!idata.empty())
|
|
|
|
hasIdata = true;
|
|
|
|
|
|
|
|
if (hasIdata)
|
2019-01-28 09:45:35 +08:00
|
|
|
addSyntheticIdata();
|
2018-09-22 06:01:06 +08:00
|
|
|
|
2020-07-22 04:46:11 +08:00
|
|
|
sortSections();
|
2018-01-27 08:34:46 +08:00
|
|
|
|
2018-09-22 06:01:06 +08:00
|
|
|
if (hasIdata)
|
2019-01-28 09:45:35 +08:00
|
|
|
locateImportTables();
|
2018-09-22 06:01:06 +08:00
|
|
|
|
2015-06-07 07:32:08 +08:00
|
|
|
// Then create an OutputSection for each section.
|
2015-06-08 16:26:28 +08:00
|
|
|
// '$' and all following characters in input section names are
|
|
|
|
// discarded when determining output section. So, .text$foo
|
|
|
|
// contributes to .text, for example. See PE/COFF spec 3.2.
|
2019-02-05 16:16:10 +08:00
|
|
|
for (auto it : partialSections) {
|
|
|
|
PartialSection *pSec = it.second;
|
2019-01-28 09:45:35 +08:00
|
|
|
StringRef name = getOutputSectionName(pSec->name);
|
|
|
|
uint32_t outChars = pSec->characteristics;
|
2018-04-21 05:10:33 +08:00
|
|
|
|
2018-10-05 20:56:46 +08:00
|
|
|
if (name == ".CRT") {
|
|
|
|
// In link.exe, there is a special case for the I386 target where .CRT
|
|
|
|
// sections are treated as if they have output characteristics DATA | R if
|
|
|
|
// their characteristics are DATA | R | W. This implements the same
|
|
|
|
// special case for all architectures.
|
2018-04-21 05:10:33 +08:00
|
|
|
outChars = data | r;
|
|
|
|
|
2019-01-28 09:45:35 +08:00
|
|
|
log("Processing section " + pSec->name + " -> " + name);
|
2018-10-05 20:56:46 +08:00
|
|
|
|
2019-01-28 09:45:35 +08:00
|
|
|
sortCRTSectionChunks(pSec->chunks);
|
2018-10-05 20:56:46 +08:00
|
|
|
}
|
|
|
|
|
2018-04-21 05:10:33 +08:00
|
|
|
OutputSection *sec = createSection(name, outChars);
|
2019-01-28 09:45:35 +08:00
|
|
|
for (Chunk *c : pSec->chunks)
|
2015-05-29 03:09:30 +08:00
|
|
|
sec->addChunk(c);
|
2019-03-30 04:25:34 +08:00
|
|
|
|
|
|
|
sec->addContributingPartialSection(pSec);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2018-04-06 11:25:49 +08:00
|
|
|
|
|
|
|
// Finally, move some output sections to the end.
|
2019-04-23 10:42:06 +08:00
|
|
|
auto sectionOrder = [&](const OutputSection *s) {
|
2019-06-11 09:14:23 +08:00
|
|
|
// Move DISCARDABLE (or non-memory-mapped) sections to the end of file
|
|
|
|
// because the loader cannot handle holes. Stripping can remove other
|
|
|
|
// discardable ones than .reloc, which is first of them (created early).
|
2018-04-20 05:48:37 +08:00
|
|
|
if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
|
2018-04-06 11:25:49 +08:00
|
|
|
return 2;
|
|
|
|
// .rsrc should come at the end of the non-discardable sections because its
|
|
|
|
// size may change by the Win32 UpdateResources() function, causing
|
|
|
|
// subsequent sections to move (see https://crbug.com/827082).
|
|
|
|
if (s == rsrcSec)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
};
|
2019-04-23 10:42:06 +08:00
|
|
|
llvm::stable_sort(outputSections,
|
|
|
|
[&](const OutputSection *s, const OutputSection *t) {
|
|
|
|
return sectionOrder(s) < sectionOrder(t);
|
|
|
|
});
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 11:31:47 +08:00
|
|
|
void Writer::createMiscChunks() {
|
2019-05-24 08:02:00 +08:00
|
|
|
for (MergeChunk *p : MergeChunk::instances) {
|
|
|
|
if (p) {
|
|
|
|
p->finalizeContents();
|
2019-05-23 04:21:52 +08:00
|
|
|
rdataSec->addChunk(p);
|
2019-05-24 08:02:00 +08:00
|
|
|
}
|
|
|
|
}
|
2018-03-16 05:14:02 +08:00
|
|
|
|
2015-07-25 07:51:14 +08:00
|
|
|
// Create thunks for locally-dllimported symbols.
|
|
|
|
if (!symtab->localImportChunks.empty()) {
|
|
|
|
for (Chunk *c : symtab->localImportChunks)
|
2018-04-06 11:25:49 +08:00
|
|
|
rdataSec->addChunk(c);
|
2015-07-25 07:51:14 +08:00
|
|
|
}
|
|
|
|
|
2016-08-30 05:20:46 +08:00
|
|
|
// Create Debug Information Chunks
|
2018-09-06 02:02:43 +08:00
|
|
|
OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec;
|
2020-03-13 18:41:18 +08:00
|
|
|
if (config->debug || config->repro || config->cetCompat) {
|
2018-09-06 02:02:43 +08:00
|
|
|
debugDirectory = make<DebugDirectoryChunk>(debugRecords, config->repro);
|
2020-03-13 18:41:18 +08:00
|
|
|
debugDirectory->setAlignment(4);
|
2018-09-06 02:02:43 +08:00
|
|
|
debugInfoSec->addChunk(debugDirectory);
|
|
|
|
}
|
2018-07-20 13:44:34 +08:00
|
|
|
|
2018-09-06 02:02:43 +08:00
|
|
|
if (config->debug) {
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
// Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We
|
|
|
|
// output a PDB no matter what, and this chunk provides the only means of
|
|
|
|
// allowing a debugger to match a PDB and an executable. So we need it even
|
|
|
|
// if we're ultimately not going to write CodeView data to the PDB.
|
2018-09-10 21:20:16 +08:00
|
|
|
buildId = make<CVDebugRecordChunk>();
|
2020-03-13 18:41:18 +08:00
|
|
|
debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_CODEVIEW, buildId});
|
|
|
|
}
|
|
|
|
|
|
|
|
if (config->cetCompat) {
|
|
|
|
ExtendedDllCharacteristicsChunk *extendedDllChars =
|
|
|
|
make<ExtendedDllCharacteristicsChunk>(
|
|
|
|
IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT);
|
|
|
|
debugRecords.push_back(
|
|
|
|
{COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS, extendedDllChars});
|
|
|
|
}
|
2016-08-30 05:20:46 +08:00
|
|
|
|
2020-03-13 18:41:18 +08:00
|
|
|
if (debugRecords.size() > 0) {
|
|
|
|
for (std::pair<COFF::DebugType, Chunk *> r : debugRecords)
|
|
|
|
debugInfoSec->addChunk(r.second);
|
2016-08-30 05:20:46 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Create SEH table. x86-only.
|
[COFF] Implement /safeseh:no and check @feat.00 flags by default
Summary:
Fixes PR41828. Before this, LLD always emitted SafeSEH chunks and
defined __safe_se_handler_table & size. Now, /safeseh:no leaves those
undefined.
Additionally, we were checking for the safeseh @feat.00 flag in two
places: once to emit errors, and once during safeseh table construction.
The error was set up to be off by default, but safeseh is supposed to be
on by default. I combined the two checks, so now LLD emits an error if
an input object lacks @feat.00 and safeseh is enabled. This caused the
majority of 32-bit LLD tests to fail, since many test input object files
lack @feat.00 symbols. I explicitly added -safeseh:no to those tests to
preserve behavior.
Finally, LLD no longer sets IMAGE_DLL_CHARACTERISTICS_NO_SEH if any
input file wasn't compiled for safeseh.
Reviewers: mstorsjo, ruiu, thakis
Reviewed By: ruiu, thakis
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63570
llvm-svn: 366238
2019-07-17 02:17:33 +08:00
|
|
|
if (config->safeSEH)
|
2018-04-06 11:25:49 +08:00
|
|
|
createSEHTable();
|
2018-02-06 09:58:26 +08:00
|
|
|
|
2018-02-14 04:32:53 +08:00
|
|
|
// Create /guard:cf tables if requested.
|
|
|
|
if (config->guardCF != GuardCFLevel::Off)
|
2018-04-06 11:25:49 +08:00
|
|
|
createGuardCFTables();
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
|
2020-04-26 05:49:44 +08:00
|
|
|
if (config->autoImport)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
createRuntimePseudoRelocs();
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
|
2020-04-26 05:49:44 +08:00
|
|
|
if (config->mingw)
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
insertCtorDtorSymbols();
|
2015-06-25 11:31:47 +08:00
|
|
|
}
|
|
|
|
|
2015-06-07 06:56:55 +08:00
|
|
|
// Create .idata section for the DLL-imported symbol table.
|
|
|
|
// The format of this section is inherently Windows-specific.
|
|
|
|
// IdataContents class abstracted away the details for us,
|
|
|
|
// so we just let it create chunks and add them to the section.
|
2015-06-07 06:46:15 +08:00
|
|
|
void Writer::createImportTables() {
|
2015-08-17 16:30:31 +08:00
|
|
|
// Initialize DLLOrder so that import entries are ordered in
|
|
|
|
// the same order as in the command line. (That affects DLL
|
|
|
|
// initialization order, and this ordering is MSVC-compatible.)
|
2017-07-27 08:45:26 +08:00
|
|
|
for (ImportFile *file : ImportFile::instances) {
|
2017-05-25 06:30:06 +08:00
|
|
|
if (!file->live)
|
|
|
|
continue;
|
|
|
|
|
2015-09-02 15:27:31 +08:00
|
|
|
std::string dll = StringRef(file->dllName).lower();
|
|
|
|
if (config->dllOrder.count(dll) == 0)
|
|
|
|
config->dllOrder[dll] = config->dllOrder.size();
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-07-10 18:40:11 +08:00
|
|
|
if (file->impSym && !isa<DefinedImportData>(file->impSym))
|
|
|
|
fatal(toString(*file->impSym) + " was replaced");
|
|
|
|
DefinedImportData *impSym = cast_or_null<DefinedImportData>(file->impSym);
|
2015-09-03 22:49:47 +08:00
|
|
|
if (config->delayLoads.count(StringRef(file->dllName).lower())) {
|
2017-05-26 02:03:34 +08:00
|
|
|
if (!file->thunkSym)
|
|
|
|
fatal("cannot delay-load " + toString(file) +
|
2018-07-10 18:40:11 +08:00
|
|
|
" due to import of data: " + toString(*impSym));
|
|
|
|
delayIdata.add(impSym);
|
2015-08-17 15:27:45 +08:00
|
|
|
} else {
|
2018-07-10 18:40:11 +08:00
|
|
|
idata.add(impSym);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
}
|
2018-09-22 06:01:06 +08:00
|
|
|
}
|
2017-05-25 06:30:06 +08:00
|
|
|
|
2018-09-22 06:01:06 +08:00
|
|
|
void Writer::appendImportThunks() {
|
|
|
|
if (ImportFile::instances.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (ImportFile *file : ImportFile::instances) {
|
|
|
|
if (!file->live)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!file->thunkSym)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!isa<DefinedImportThunk>(file->thunkSym))
|
|
|
|
fatal(toString(*file->thunkSym) + " was replaced");
|
|
|
|
DefinedImportThunk *thunk = cast<DefinedImportThunk>(file->thunkSym);
|
|
|
|
if (file->thunkLive)
|
|
|
|
textSec->addChunk(thunk->getChunk());
|
|
|
|
}
|
2017-05-25 06:30:06 +08:00
|
|
|
|
2015-06-22 06:31:52 +08:00
|
|
|
if (!delayIdata.empty()) {
|
2016-12-10 05:55:24 +08:00
|
|
|
Defined *helper = cast<Defined>(config->delayLoadHelper);
|
2015-07-02 11:59:04 +08:00
|
|
|
delayIdata.create(helper);
|
2015-06-27 05:40:15 +08:00
|
|
|
for (Chunk *c : delayIdata.getChunks())
|
2018-04-06 11:25:49 +08:00
|
|
|
didatSec->addChunk(c);
|
2015-06-27 05:40:15 +08:00
|
|
|
for (Chunk *c : delayIdata.getDataChunks())
|
2018-04-06 11:25:49 +08:00
|
|
|
dataSec->addChunk(c);
|
2017-05-19 01:03:49 +08:00
|
|
|
for (Chunk *c : delayIdata.getCodeChunks())
|
2018-04-06 11:25:49 +08:00
|
|
|
textSec->addChunk(c);
|
2015-06-22 06:31:52 +08:00
|
|
|
}
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 08:16:33 +08:00
|
|
|
void Writer::createExportTable() {
|
2019-08-20 17:53:06 +08:00
|
|
|
if (!edataSec->chunks.empty()) {
|
|
|
|
// Allow using a custom built export table from input object files, instead
|
|
|
|
// of having the linker synthesize the tables.
|
|
|
|
if (config->hadExplicitExports)
|
|
|
|
warn("literal .edata sections override exports");
|
|
|
|
} else if (!config->exports.empty()) {
|
|
|
|
for (Chunk *c : edata.chunks)
|
|
|
|
edataSec->addChunk(c);
|
|
|
|
}
|
|
|
|
if (!edataSec->chunks.empty()) {
|
|
|
|
edataStart = edataSec->chunks.front();
|
|
|
|
edataEnd = edataSec->chunks.back();
|
|
|
|
}
|
2015-06-17 08:16:33 +08:00
|
|
|
}
|
|
|
|
|
2018-11-28 04:48:09 +08:00
|
|
|
void Writer::removeUnusedSections() {
|
|
|
|
// Remove sections that we can be sure won't get content, to avoid
|
|
|
|
// allocating space for their section headers.
|
|
|
|
auto isUnused = [this](OutputSection *s) {
|
|
|
|
if (s == relocSec)
|
|
|
|
return false; // This section is populated later.
|
|
|
|
// MergeChunks have zero size at this point, as their size is finalized
|
|
|
|
// later. Only remove sections that have no Chunks at all.
|
|
|
|
return s->chunks.empty();
|
|
|
|
};
|
|
|
|
outputSections.erase(
|
|
|
|
std::remove_if(outputSections.begin(), outputSections.end(), isUnused),
|
|
|
|
outputSections.end());
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// The Windows loader doesn't seem to like empty sections,
|
|
|
|
// so we remove them if any.
|
|
|
|
void Writer::removeEmptySections() {
|
2015-06-04 00:44:00 +08:00
|
|
|
auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; };
|
2015-05-29 03:09:30 +08:00
|
|
|
outputSections.erase(
|
|
|
|
std::remove_if(outputSections.begin(), outputSections.end(), isEmpty),
|
|
|
|
outputSections.end());
|
2019-05-25 02:25:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Writer::assignOutputSectionIndices() {
|
|
|
|
// Assign final output section indices, and assign each chunk to its output
|
|
|
|
// section.
|
2015-07-09 00:37:50 +08:00
|
|
|
uint32_t idx = 1;
|
2019-05-25 02:25:49 +08:00
|
|
|
for (OutputSection *os : outputSections) {
|
|
|
|
os->sectionIndex = idx;
|
|
|
|
for (Chunk *c : os->chunks)
|
|
|
|
c->setOutputSectionIdx(idx);
|
|
|
|
++idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge chunks are containers of chunks, so assign those an output section
|
|
|
|
// too.
|
|
|
|
for (MergeChunk *mc : MergeChunk::instances)
|
|
|
|
if (mc)
|
|
|
|
for (SectionChunk *sc : mc->sections)
|
|
|
|
if (sc && sc->live)
|
|
|
|
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
|
2015-07-09 00:37:50 +08:00
|
|
|
}
|
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
size_t Writer::addEntryToStringTable(StringRef str) {
|
|
|
|
assert(str.size() > COFF::NameSize);
|
|
|
|
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
|
|
|
|
strtab.insert(strtab.end(), str.begin(), str.end());
|
|
|
|
strtab.push_back('\0');
|
|
|
|
return offsetOfEntry;
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
|
2018-05-30 06:49:56 +08:00
|
|
|
coff_symbol16 sym;
|
|
|
|
switch (def->kind()) {
|
|
|
|
case Symbol::DefinedAbsoluteKind:
|
|
|
|
sym.Value = def->getRVA();
|
|
|
|
sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
|
|
|
|
break;
|
|
|
|
case Symbol::DefinedSyntheticKind:
|
|
|
|
// Relative symbols are unrepresentable in a COFF symbol table.
|
2017-11-21 09:14:14 +08:00
|
|
|
return None;
|
2018-05-30 06:49:56 +08:00
|
|
|
default: {
|
|
|
|
// Don't write symbols that won't be written to the output to the symbol
|
|
|
|
// table.
|
2018-06-13 05:19:33 +08:00
|
|
|
Chunk *c = def->getChunk();
|
|
|
|
if (!c)
|
|
|
|
return None;
|
|
|
|
OutputSection *os = c->getOutputSection();
|
2018-05-30 03:07:47 +08:00
|
|
|
if (!os)
|
2017-11-21 09:14:14 +08:00
|
|
|
return None;
|
2018-05-30 06:49:56 +08:00
|
|
|
|
|
|
|
sym.Value = def->getRVA() - os->getRVA();
|
|
|
|
sym.SectionNumber = os->sectionIndex;
|
|
|
|
break;
|
|
|
|
}
|
2018-05-30 03:07:47 +08:00
|
|
|
}
|
2017-11-21 09:14:14 +08:00
|
|
|
|
2019-08-05 19:57:00 +08:00
|
|
|
// Symbols that are runtime pseudo relocations don't point to the actual
|
|
|
|
// symbol data itself (as they are imported), but points to the IAT entry
|
|
|
|
// instead. Avoid emitting them to the symbol table, as they can confuse
|
|
|
|
// debuggers.
|
|
|
|
if (def->isRuntimePseudoReloc)
|
|
|
|
return None;
|
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
StringRef name = def->getName();
|
|
|
|
if (name.size() > COFF::NameSize) {
|
|
|
|
sym.Name.Offset.Zeroes = 0;
|
|
|
|
sym.Name.Offset.Offset = addEntryToStringTable(name);
|
|
|
|
} else {
|
|
|
|
memset(sym.Name.ShortName, 0, COFF::NameSize);
|
|
|
|
memcpy(sym.Name.ShortName, name.data(), name.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto *d = dyn_cast<DefinedCOFF>(def)) {
|
|
|
|
COFFSymbolRef ref = d->getCOFFSymbol();
|
|
|
|
sym.Type = ref.getType();
|
|
|
|
sym.StorageClass = ref.getStorageClass();
|
|
|
|
} else {
|
|
|
|
sym.Type = IMAGE_SYM_TYPE_NULL;
|
|
|
|
sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL;
|
|
|
|
}
|
|
|
|
sym.NumberOfAuxSymbols = 0;
|
|
|
|
return sym;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Writer::createSymbolAndStringTable() {
|
2018-03-17 04:20:01 +08:00
|
|
|
// PE/COFF images are limited to 8 byte section names. Longer names can be
|
|
|
|
// supported by writing a non-standard string table, but this string table is
|
|
|
|
// not mapped at runtime and the long names will therefore be inaccessible.
|
|
|
|
// link.exe always truncates section names to 8 bytes, whereas binutils always
|
|
|
|
// preserves long section names via the string table. LLD adopts a hybrid
|
|
|
|
// solution where discardable sections have long names preserved and
|
|
|
|
// non-discardable sections have their names truncated, to ensure that any
|
|
|
|
// section which is mapped at runtime also has its name mapped at runtime.
|
2015-07-09 00:37:50 +08:00
|
|
|
for (OutputSection *sec : outputSections) {
|
2018-03-16 05:13:46 +08:00
|
|
|
if (sec->name.size() <= COFF::NameSize)
|
2015-07-09 00:37:50 +08:00
|
|
|
continue;
|
2018-04-20 05:48:37 +08:00
|
|
|
if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0)
|
2017-11-16 20:06:42 +08:00
|
|
|
continue;
|
2019-10-30 06:57:40 +08:00
|
|
|
if (config->warnLongSectionNames) {
|
|
|
|
warn("section name " + sec->name +
|
|
|
|
" is longer than 8 characters and will use a non-standard string "
|
|
|
|
"table");
|
|
|
|
}
|
2018-03-16 05:13:46 +08:00
|
|
|
sec->setStringTableOff(addEntryToStringTable(sec->name));
|
2017-11-21 09:14:14 +08:00
|
|
|
}
|
|
|
|
|
2018-06-29 14:08:25 +08:00
|
|
|
if (config->debugDwarf || config->debugSymtab) {
|
2017-11-21 09:14:14 +08:00
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
|
|
|
for (Symbol *b : file->getSymbols()) {
|
|
|
|
auto *d = dyn_cast_or_null<Defined>(b);
|
|
|
|
if (!d || d->writtenToSymtab)
|
|
|
|
continue;
|
|
|
|
d->writtenToSymtab = true;
|
|
|
|
|
|
|
|
if (Optional<coff_symbol16> sym = createSymbol(d))
|
|
|
|
outputSymtab.push_back(*sym);
|
|
|
|
}
|
|
|
|
}
|
2015-07-09 00:37:50 +08:00
|
|
|
}
|
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
if (outputSymtab.empty() && strtab.empty())
|
2017-11-18 03:51:20 +08:00
|
|
|
return;
|
2015-07-09 00:37:50 +08:00
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
// We position the symbol table to be adjacent to the end of the last section.
|
[LLD][COFF] Report error when file will exceed Windows maximum image size (4GB)
Patch by Colden Cullen.
Currently, when a large PE (>4 GiB) is to be produced, a crash occurs
because:
1. Calling setOffset with a number greater than UINT32_MAX causes the
PointerToRawData to overflow
2. When adding the symbol table to the end of the file, the last section's
offset was used to calculate file size. Because this had overflowed,
this number was too low, and the file created would not be large enough.
This lead to the actual crash I saw, which was a buffer overrun.
This change:
1. Adds comment to setOffset, clarifying that overflow can occur, but it's
somewhat safe because the error will be handled elsewhere
2. Adds file size check after all output data has been created This matches
the MS link.exe error, which looks prints as: "LINK : fatal error
LNK1248: image size (10000EFC9) exceeds maximum allowable size
(FFFFFFFF)"
3. Changes calculate of the symbol table offset to just use the existing
FileSize. This should match the previous calculations, but doesn't rely
on the use of a u32 that can overflow.
4. Removes trivial usage of a magic number that bugged me while I was
debugging the issue
I'm not sure how to add a test for this outside of adding 4GB of object
files to the repo. If there's an easier way, let me know and I'll be
happy to add a test.
Differential Revision: https://reviews.llvm.org/D42010
llvm-svn: 322605
2018-01-17 09:08:02 +08:00
|
|
|
uint64_t fileOff = fileSize;
|
2017-11-21 09:14:14 +08:00
|
|
|
pointerToSymbolTable = fileOff;
|
|
|
|
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
|
|
|
|
fileOff += 4 + strtab.size();
|
2019-05-24 20:42:36 +08:00
|
|
|
fileSize = alignTo(fileOff, config->fileAlign);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2018-04-21 05:10:33 +08:00
|
|
|
void Writer::mergeSections() {
|
2018-09-25 18:59:29 +08:00
|
|
|
if (!pdataSec->chunks.empty()) {
|
|
|
|
firstPdata = pdataSec->chunks.front();
|
|
|
|
lastPdata = pdataSec->chunks.back();
|
2018-04-21 05:10:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &p : config->merge) {
|
|
|
|
StringRef toName = p.second;
|
|
|
|
if (p.first == toName)
|
|
|
|
continue;
|
|
|
|
StringSet<> names;
|
|
|
|
while (1) {
|
|
|
|
if (!names.insert(toName).second)
|
|
|
|
fatal("/merge: cycle found for section '" + p.first + "'");
|
|
|
|
auto i = config->merge.find(toName);
|
|
|
|
if (i == config->merge.end())
|
|
|
|
break;
|
|
|
|
toName = i->second;
|
|
|
|
}
|
|
|
|
OutputSection *from = findSection(p.first);
|
|
|
|
OutputSection *to = findSection(toName);
|
|
|
|
if (!from)
|
|
|
|
continue;
|
|
|
|
if (!to) {
|
|
|
|
from->name = toName;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
to->merge(from);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// Visits all sections to assign incremental, non-overlapping RVAs and
|
|
|
|
// file offsets.
|
|
|
|
void Writer::assignAddresses() {
|
2015-07-08 09:45:29 +08:00
|
|
|
sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
|
2018-11-14 18:26:47 +08:00
|
|
|
sizeof(data_directory) * numberOfDataDirectory +
|
2015-07-08 09:45:29 +08:00
|
|
|
sizeof(coff_section) * outputSections.size();
|
2015-07-10 00:40:39 +08:00
|
|
|
sizeOfHeaders +=
|
|
|
|
config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
|
2019-05-24 20:42:36 +08:00
|
|
|
sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign);
|
2015-08-12 07:09:00 +08:00
|
|
|
fileSize = sizeOfHeaders;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2019-08-07 18:16:21 +08:00
|
|
|
// The first page is kept unmapped.
|
|
|
|
uint64_t rva = alignTo(sizeOfHeaders, config->align);
|
|
|
|
|
2015-06-04 00:44:00 +08:00
|
|
|
for (OutputSection *sec : outputSections) {
|
2018-04-06 11:25:49 +08:00
|
|
|
if (sec == relocSec)
|
|
|
|
addBaserels();
|
2018-03-16 05:13:46 +08:00
|
|
|
uint64_t rawSize = 0, virtualSize = 0;
|
|
|
|
sec->header.VirtualAddress = rva;
|
2019-02-23 09:46:18 +08:00
|
|
|
|
|
|
|
// If /FUNCTIONPADMIN is used, functions are padded in order to create a
|
|
|
|
// hotpatchable image.
|
|
|
|
const bool isCodeSection =
|
|
|
|
(sec->header.Characteristics & IMAGE_SCN_CNT_CODE) &&
|
|
|
|
(sec->header.Characteristics & IMAGE_SCN_MEM_READ) &&
|
|
|
|
(sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE);
|
|
|
|
uint32_t padding = isCodeSection ? config->functionPadMin : 0;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2018-09-25 18:59:29 +08:00
|
|
|
for (Chunk *c : sec->chunks) {
|
2019-02-23 09:46:18 +08:00
|
|
|
if (padding && c->isHotPatchable())
|
|
|
|
virtualSize += padding;
|
2019-05-23 04:21:52 +08:00
|
|
|
virtualSize = alignTo(virtualSize, c->getAlignment());
|
2018-03-16 05:13:46 +08:00
|
|
|
c->setRVA(rva + virtualSize);
|
|
|
|
virtualSize += c->getSize();
|
2019-07-10 17:10:01 +08:00
|
|
|
if (c->hasData)
|
2019-05-24 20:42:36 +08:00
|
|
|
rawSize = alignTo(virtualSize, config->fileAlign);
|
2018-03-16 05:13:46 +08:00
|
|
|
}
|
|
|
|
if (virtualSize > UINT32_MAX)
|
|
|
|
error("section larger than 4 GiB: " + sec->name);
|
|
|
|
sec->header.VirtualSize = virtualSize;
|
|
|
|
sec->header.SizeOfRawData = rawSize;
|
|
|
|
if (rawSize != 0)
|
|
|
|
sec->header.PointerToRawData = fileSize;
|
2019-08-07 18:16:21 +08:00
|
|
|
rva += alignTo(virtualSize, config->align);
|
2019-05-24 20:42:36 +08:00
|
|
|
fileSize += alignTo(rawSize, config->fileAlign);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2019-08-07 18:16:21 +08:00
|
|
|
sizeOfImage = alignTo(rva, config->align);
|
2019-05-24 08:02:00 +08:00
|
|
|
|
|
|
|
// Assign addresses to sections in MergeChunks.
|
|
|
|
for (MergeChunk *mc : MergeChunk::instances)
|
|
|
|
if (mc)
|
|
|
|
mc->assignSubsectionRVAs();
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2015-07-08 09:45:29 +08:00
|
|
|
template <typename PEHeaderTy> void Writer::writeHeader() {
|
2018-03-08 22:27:28 +08:00
|
|
|
// Write DOS header. For backwards compatibility, the first part of a PE/COFF
|
|
|
|
// executable consists of an MS-DOS MZ executable. If the executable is run
|
|
|
|
// under DOS, that program gets run (usually to just print an error message).
|
|
|
|
// When run under Windows, the loader looks at AddressOfNewExeHeader and uses
|
|
|
|
// the PE header instead.
|
2015-05-29 03:09:30 +08:00
|
|
|
uint8_t *buf = buffer->getBufferStart();
|
|
|
|
auto *dos = reinterpret_cast<dos_header *>(buf);
|
2018-03-08 22:27:28 +08:00
|
|
|
buf += sizeof(dos_header);
|
2015-05-29 03:09:30 +08:00
|
|
|
dos->Magic[0] = 'M';
|
|
|
|
dos->Magic[1] = 'Z';
|
2018-03-08 22:27:28 +08:00
|
|
|
dos->UsedBytesInTheLastPage = dosStubSize % 512;
|
|
|
|
dos->FileSizeInPages = divideCeil(dosStubSize, 512);
|
|
|
|
dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
dos->AddressOfRelocationTable = sizeof(dos_header);
|
|
|
|
dos->AddressOfNewExeHeader = dosStubSize;
|
|
|
|
|
2018-03-08 22:27:28 +08:00
|
|
|
// Write DOS program.
|
|
|
|
memcpy(buf, dosProgram, sizeof(dosProgram));
|
|
|
|
buf += sizeof(dosProgram);
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// Write PE magic
|
|
|
|
memcpy(buf, PEMagic, sizeof(PEMagic));
|
|
|
|
buf += sizeof(PEMagic);
|
|
|
|
|
|
|
|
// Write COFF header
|
|
|
|
auto *coff = reinterpret_cast<coff_file_header *>(buf);
|
|
|
|
buf += sizeof(*coff);
|
2015-07-26 05:54:50 +08:00
|
|
|
coff->Machine = config->machine;
|
2015-05-29 03:09:30 +08:00
|
|
|
coff->NumberOfSections = outputSections.size();
|
2015-06-15 11:03:23 +08:00
|
|
|
coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE;
|
2015-07-28 11:12:00 +08:00
|
|
|
if (config->largeAddressAware)
|
2015-07-08 09:45:29 +08:00
|
|
|
coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE;
|
2015-07-28 11:12:00 +08:00
|
|
|
if (!config->is64())
|
2015-07-09 09:25:49 +08:00
|
|
|
coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE;
|
2015-06-17 08:16:33 +08:00
|
|
|
if (config->dll)
|
|
|
|
coff->Characteristics |= IMAGE_FILE_DLL;
|
2019-11-13 12:53:15 +08:00
|
|
|
if (config->driverUponly)
|
|
|
|
coff->Characteristics |= IMAGE_FILE_UP_SYSTEM_ONLY;
|
2015-06-15 09:23:58 +08:00
|
|
|
if (!config->relocatable)
|
2015-06-15 11:03:23 +08:00
|
|
|
coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED;
|
2019-04-25 22:02:26 +08:00
|
|
|
if (config->swaprunCD)
|
|
|
|
coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP;
|
|
|
|
if (config->swaprunNet)
|
|
|
|
coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP;
|
2015-05-29 03:09:30 +08:00
|
|
|
coff->SizeOfOptionalHeader =
|
2018-11-14 18:26:47 +08:00
|
|
|
sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory;
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Write PE header
|
2015-07-08 09:45:29 +08:00
|
|
|
auto *pe = reinterpret_cast<PEHeaderTy *>(buf);
|
2015-05-29 03:09:30 +08:00
|
|
|
buf += sizeof(*pe);
|
2015-07-10 00:40:39 +08:00
|
|
|
pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
|
2017-06-22 00:42:08 +08:00
|
|
|
|
|
|
|
// If {Major,Minor}LinkerVersion is left at 0.0, then for some
|
|
|
|
// reason signing the resulting PE file with Authenticode produces a
|
|
|
|
// signature that fails to validate on Windows 7 (but is OK on 10).
|
|
|
|
// Set it to 14.0, which is what VS2015 outputs, and which avoids
|
|
|
|
// that problem.
|
|
|
|
pe->MajorLinkerVersion = 14;
|
|
|
|
pe->MinorLinkerVersion = 0;
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
pe->ImageBase = config->imageBase;
|
2019-08-07 18:16:21 +08:00
|
|
|
pe->SectionAlignment = config->align;
|
2019-05-24 20:42:36 +08:00
|
|
|
pe->FileAlignment = config->fileAlign;
|
2015-05-30 00:28:29 +08:00
|
|
|
pe->MajorImageVersion = config->majorImageVersion;
|
|
|
|
pe->MinorImageVersion = config->minorImageVersion;
|
2015-05-30 00:34:31 +08:00
|
|
|
pe->MajorOperatingSystemVersion = config->majorOSVersion;
|
|
|
|
pe->MinorOperatingSystemVersion = config->minorOSVersion;
|
2020-10-04 06:29:45 +08:00
|
|
|
pe->MajorSubsystemVersion = config->majorSubsystemVersion;
|
|
|
|
pe->MinorSubsystemVersion = config->minorSubsystemVersion;
|
2015-05-30 00:34:31 +08:00
|
|
|
pe->Subsystem = config->subsystem;
|
2015-05-29 03:09:30 +08:00
|
|
|
pe->SizeOfImage = sizeOfImage;
|
|
|
|
pe->SizeOfHeaders = sizeOfHeaders;
|
2015-06-29 03:56:30 +08:00
|
|
|
if (!config->noEntry) {
|
2016-12-10 05:55:24 +08:00
|
|
|
Defined *entry = cast<Defined>(config->entry);
|
2015-06-29 03:56:30 +08:00
|
|
|
pe->AddressOfEntryPoint = entry->getRVA();
|
2015-07-25 10:25:14 +08:00
|
|
|
// Pointer to thumb code must have the LSB set, so adjust it.
|
2015-07-26 05:54:50 +08:00
|
|
|
if (config->machine == ARMNT)
|
2015-07-25 10:25:14 +08:00
|
|
|
pe->AddressOfEntryPoint |= 1;
|
2015-06-29 03:56:30 +08:00
|
|
|
}
|
2015-05-30 00:21:11 +08:00
|
|
|
pe->SizeOfStackReserve = config->stackReserve;
|
|
|
|
pe->SizeOfStackCommit = config->stackCommit;
|
2015-05-30 00:23:40 +08:00
|
|
|
pe->SizeOfHeapReserve = config->heapReserve;
|
|
|
|
pe->SizeOfHeapCommit = config->heapCommit;
|
2017-04-07 07:07:53 +08:00
|
|
|
if (config->appContainer)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER;
|
2019-11-13 12:53:15 +08:00
|
|
|
if (config->driverWdm)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER;
|
2015-06-17 07:13:00 +08:00
|
|
|
if (config->dynamicBase)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
|
|
|
|
if (config->highEntropyVA)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA;
|
2017-09-16 06:49:13 +08:00
|
|
|
if (!config->allowBind)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND;
|
2015-06-17 07:13:00 +08:00
|
|
|
if (config->nxCompat)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT;
|
|
|
|
if (!config->allowIsolation)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION;
|
2018-02-14 04:32:53 +08:00
|
|
|
if (config->guardCF != GuardCFLevel::Off)
|
2018-02-06 09:58:26 +08:00
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF;
|
2018-05-31 21:43:02 +08:00
|
|
|
if (config->integrityCheck)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY;
|
2020-07-28 04:44:41 +08:00
|
|
|
if (setNoSEHCharacteristic || config->noSEH)
|
2017-12-16 04:53:03 +08:00
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH;
|
2015-06-17 07:13:00 +08:00
|
|
|
if (config->terminalServerAware)
|
|
|
|
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
|
2018-11-14 18:26:47 +08:00
|
|
|
pe->NumberOfRvaAndSize = numberOfDataDirectory;
|
2018-04-06 11:25:49 +08:00
|
|
|
if (textSec->getVirtualSize()) {
|
|
|
|
pe->BaseOfCode = textSec->getRVA();
|
|
|
|
pe->SizeOfCode = textSec->getRawSize();
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
pe->SizeOfInitializedData = getSizeOfInitializedData();
|
|
|
|
|
|
|
|
// Write data directory
|
2015-06-21 12:10:54 +08:00
|
|
|
auto *dir = reinterpret_cast<data_directory *>(buf);
|
2018-11-14 18:26:47 +08:00
|
|
|
buf += sizeof(*dir) * numberOfDataDirectory;
|
2019-08-20 17:53:06 +08:00
|
|
|
if (edataStart) {
|
|
|
|
dir[EXPORT_TABLE].RelativeVirtualAddress = edataStart->getRVA();
|
|
|
|
dir[EXPORT_TABLE].Size =
|
|
|
|
edataEnd->getRVA() + edataEnd->getSize() - edataStart->getRVA();
|
2015-06-17 08:16:33 +08:00
|
|
|
}
|
2018-09-22 06:01:06 +08:00
|
|
|
if (importTableStart) {
|
|
|
|
dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA();
|
|
|
|
dir[IMPORT_TABLE].Size = importTableSize;
|
|
|
|
}
|
|
|
|
if (iatStart) {
|
|
|
|
dir[IAT].RelativeVirtualAddress = iatStart->getRVA();
|
|
|
|
dir[IAT].Size = iatSize;
|
2015-06-22 06:31:52 +08:00
|
|
|
}
|
2018-04-06 11:25:49 +08:00
|
|
|
if (rsrcSec->getVirtualSize()) {
|
|
|
|
dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA();
|
|
|
|
dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize();
|
2015-06-15 05:50:50 +08:00
|
|
|
}
|
2018-04-07 08:46:55 +08:00
|
|
|
if (firstPdata) {
|
|
|
|
dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA();
|
|
|
|
dir[EXCEPTION_TABLE].Size =
|
|
|
|
lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA();
|
2015-06-21 12:00:54 +08:00
|
|
|
}
|
2018-04-06 11:25:49 +08:00
|
|
|
if (relocSec->getVirtualSize()) {
|
|
|
|
dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
|
|
|
|
dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize();
|
2016-08-10 12:37:56 +08:00
|
|
|
}
|
2017-11-04 05:21:47 +08:00
|
|
|
if (Symbol *sym = symtab->findUnderscore("_tls_used")) {
|
2017-11-01 00:10:24 +08:00
|
|
|
if (Defined *b = dyn_cast<Defined>(sym)) {
|
2015-07-06 09:48:01 +08:00
|
|
|
dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA();
|
2016-03-15 14:41:02 +08:00
|
|
|
dir[TLS_TABLE].Size = config->is64()
|
|
|
|
? sizeof(object::coff_tls_directory64)
|
|
|
|
: sizeof(object::coff_tls_directory32);
|
2015-07-06 09:48:01 +08:00
|
|
|
}
|
|
|
|
}
|
2018-09-06 02:02:43 +08:00
|
|
|
if (debugDirectory) {
|
2016-08-30 05:20:46 +08:00
|
|
|
dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA();
|
|
|
|
dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize();
|
|
|
|
}
|
2017-11-04 05:21:47 +08:00
|
|
|
if (Symbol *sym = symtab->findUnderscore("_load_config_used")) {
|
2017-11-01 00:10:24 +08:00
|
|
|
if (auto *b = dyn_cast<DefinedRegular>(sym)) {
|
2016-03-15 17:48:27 +08:00
|
|
|
SectionChunk *sc = b->getChunk();
|
|
|
|
assert(b->getRVA() >= sc->getRVA());
|
|
|
|
uint64_t offsetInChunk = b->getRVA() - sc->getRVA();
|
2019-07-10 17:10:01 +08:00
|
|
|
if (!sc->hasData || offsetInChunk + 4 > sc->getSize())
|
2016-07-15 07:37:14 +08:00
|
|
|
fatal("_load_config_used is malformed");
|
2016-03-15 17:48:27 +08:00
|
|
|
|
|
|
|
ArrayRef<uint8_t> secContents = sc->getContents();
|
|
|
|
uint32_t loadConfigSize =
|
|
|
|
*reinterpret_cast<const ulittle32_t *>(&secContents[offsetInChunk]);
|
|
|
|
if (offsetInChunk + loadConfigSize > sc->getSize())
|
2016-07-15 07:37:14 +08:00
|
|
|
fatal("_load_config_used is too large");
|
2015-07-17 02:30:35 +08:00
|
|
|
dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = b->getRVA();
|
2016-03-15 17:48:27 +08:00
|
|
|
dir[LOAD_CONFIG_TABLE].Size = loadConfigSize;
|
2015-07-17 02:30:35 +08:00
|
|
|
}
|
|
|
|
}
|
2016-08-10 12:37:56 +08:00
|
|
|
if (!delayIdata.empty()) {
|
|
|
|
dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress =
|
|
|
|
delayIdata.getDirRVA();
|
|
|
|
dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize();
|
|
|
|
}
|
2015-05-29 03:09:30 +08:00
|
|
|
|
|
|
|
// Write section table
|
2015-06-04 00:44:00 +08:00
|
|
|
for (OutputSection *sec : outputSections) {
|
2015-06-07 07:19:38 +08:00
|
|
|
sec->writeHeaderTo(buf);
|
2015-05-31 03:09:50 +08:00
|
|
|
buf += sizeof(coff_section);
|
|
|
|
}
|
2016-10-12 03:45:07 +08:00
|
|
|
sectionTable = ArrayRef<uint8_t>(
|
|
|
|
buf - outputSections.size() * sizeof(coff_section), buf);
|
2015-05-31 03:09:50 +08:00
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
if (outputSymtab.empty() && strtab.empty())
|
2015-05-31 03:09:50 +08:00
|
|
|
return;
|
2015-07-09 00:37:50 +08:00
|
|
|
|
2017-11-21 09:14:14 +08:00
|
|
|
coff->PointerToSymbolTable = pointerToSymbolTable;
|
|
|
|
uint32_t numberOfSymbols = outputSymtab.size();
|
|
|
|
coff->NumberOfSymbols = numberOfSymbols;
|
|
|
|
auto *symbolTable = reinterpret_cast<coff_symbol16 *>(
|
|
|
|
buffer->getBufferStart() + coff->PointerToSymbolTable);
|
|
|
|
for (size_t i = 0; i != numberOfSymbols; ++i)
|
|
|
|
symbolTable[i] = outputSymtab[i];
|
|
|
|
// Create the string table, it follows immediately after the symbol table.
|
|
|
|
// The first 4 bytes is length including itself.
|
|
|
|
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
|
|
|
|
write32le(buf, strtab.size() + 4);
|
|
|
|
if (!strtab.empty())
|
|
|
|
memcpy(buf + 4, strtab.data(), strtab.size());
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2015-08-06 22:58:50 +08:00
|
|
|
void Writer::openFile(StringRef path) {
|
2017-12-07 06:08:17 +08:00
|
|
|
buffer = CHECK(
|
2016-07-15 09:06:38 +08:00
|
|
|
FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable),
|
|
|
|
"failed to open " + path);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2018-04-06 11:25:49 +08:00
|
|
|
void Writer::createSEHTable() {
|
2018-02-06 09:58:26 +08:00
|
|
|
SymbolRVASet handlers;
|
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
|
|
|
if (!file->hasSafeSEH())
|
[COFF] Implement /safeseh:no and check @feat.00 flags by default
Summary:
Fixes PR41828. Before this, LLD always emitted SafeSEH chunks and
defined __safe_se_handler_table & size. Now, /safeseh:no leaves those
undefined.
Additionally, we were checking for the safeseh @feat.00 flag in two
places: once to emit errors, and once during safeseh table construction.
The error was set up to be off by default, but safeseh is supposed to be
on by default. I combined the two checks, so now LLD emits an error if
an input object lacks @feat.00 and safeseh is enabled. This caused the
majority of 32-bit LLD tests to fail, since many test input object files
lack @feat.00 symbols. I explicitly added -safeseh:no to those tests to
preserve behavior.
Finally, LLD no longer sets IMAGE_DLL_CHARACTERISTICS_NO_SEH if any
input file wasn't compiled for safeseh.
Reviewers: mstorsjo, ruiu, thakis
Reviewed By: ruiu, thakis
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63570
llvm-svn: 366238
2019-07-17 02:17:33 +08:00
|
|
|
error("/safeseh: " + file->getName() + " is not compatible with SEH");
|
2018-02-06 09:58:26 +08:00
|
|
|
markSymbolsForRVATable(file, file->getSXDataChunks(), handlers);
|
|
|
|
}
|
2017-11-08 07:24:10 +08:00
|
|
|
|
[COFF] Implement /safeseh:no and check @feat.00 flags by default
Summary:
Fixes PR41828. Before this, LLD always emitted SafeSEH chunks and
defined __safe_se_handler_table & size. Now, /safeseh:no leaves those
undefined.
Additionally, we were checking for the safeseh @feat.00 flag in two
places: once to emit errors, and once during safeseh table construction.
The error was set up to be off by default, but safeseh is supposed to be
on by default. I combined the two checks, so now LLD emits an error if
an input object lacks @feat.00 and safeseh is enabled. This caused the
majority of 32-bit LLD tests to fail, since many test input object files
lack @feat.00 symbols. I explicitly added -safeseh:no to those tests to
preserve behavior.
Finally, LLD no longer sets IMAGE_DLL_CHARACTERISTICS_NO_SEH if any
input file wasn't compiled for safeseh.
Reviewers: mstorsjo, ruiu, thakis
Reviewed By: ruiu, thakis
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63570
llvm-svn: 366238
2019-07-17 02:17:33 +08:00
|
|
|
// Set the "no SEH" characteristic if there really were no handlers, or if
|
|
|
|
// there is no load config object to point to the table of handlers.
|
2018-04-19 06:37:10 +08:00
|
|
|
setNoSEHCharacteristic =
|
|
|
|
handlers.empty() || !symtab->findUnderscore("_load_config_used");
|
|
|
|
|
2018-04-06 11:25:49 +08:00
|
|
|
maybeAddRVATable(std::move(handlers), "__safe_se_handler_table",
|
2018-02-06 09:58:26 +08:00
|
|
|
"__safe_se_handler_count");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set
|
|
|
|
// cannot contain duplicates. Therefore, the set is uniqued by Chunk and the
|
|
|
|
// symbol's offset into that Chunk.
|
|
|
|
static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) {
|
|
|
|
Chunk *c = s->getChunk();
|
|
|
|
if (auto *sc = dyn_cast<SectionChunk>(c))
|
|
|
|
c = sc->repl; // Look through ICF replacement.
|
|
|
|
uint32_t off = s->getRVA() - (c ? c->getRVA() : 0);
|
|
|
|
rvaSet.insert({c, off});
|
|
|
|
}
|
|
|
|
|
2018-11-27 09:50:17 +08:00
|
|
|
// Given a symbol, add it to the GFIDs table if it is a live, defined, function
|
|
|
|
// symbol in an executable section.
|
|
|
|
static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms,
|
|
|
|
Symbol *s) {
|
2019-03-01 05:05:41 +08:00
|
|
|
if (!s)
|
2018-11-27 09:50:17 +08:00
|
|
|
return;
|
|
|
|
|
2019-03-01 05:05:41 +08:00
|
|
|
switch (s->kind()) {
|
|
|
|
case Symbol::DefinedLocalImportKind:
|
|
|
|
case Symbol::DefinedImportDataKind:
|
|
|
|
// Defines an __imp_ pointer, so it is data, so it is ignored.
|
|
|
|
break;
|
|
|
|
case Symbol::DefinedCommonKind:
|
|
|
|
// Common is always data, so it is ignored.
|
|
|
|
break;
|
|
|
|
case Symbol::DefinedAbsoluteKind:
|
|
|
|
case Symbol::DefinedSyntheticKind:
|
|
|
|
// Absolute is never code, synthetic generally isn't and usually isn't
|
|
|
|
// determinable.
|
|
|
|
break;
|
2019-09-04 04:32:16 +08:00
|
|
|
case Symbol::LazyArchiveKind:
|
|
|
|
case Symbol::LazyObjectKind:
|
2019-03-01 05:05:41 +08:00
|
|
|
case Symbol::UndefinedKind:
|
|
|
|
// Undefined symbols resolve to zero, so they don't have an RVA. Lazy
|
|
|
|
// symbols shouldn't have relocations.
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Symbol::DefinedImportThunkKind:
|
|
|
|
// Thunks are always code, include them.
|
|
|
|
addSymbolToRVASet(addressTakenSyms, cast<Defined>(s));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Symbol::DefinedRegularKind: {
|
|
|
|
// This is a regular, defined, symbol from a COFF file. Mark the symbol as
|
|
|
|
// address taken if the symbol type is function and it's in an executable
|
|
|
|
// section.
|
|
|
|
auto *d = cast<DefinedRegular>(s);
|
|
|
|
if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
|
2019-05-25 02:25:49 +08:00
|
|
|
SectionChunk *sc = dyn_cast<SectionChunk>(d->getChunk());
|
|
|
|
if (sc && sc->live &&
|
|
|
|
sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)
|
2019-03-01 05:05:41 +08:00
|
|
|
addSymbolToRVASet(addressTakenSyms, d);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-11-27 09:50:17 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Visit all relocations from all section contributions of this object file and
|
|
|
|
// mark the relocation target as address-taken.
|
|
|
|
static void markSymbolsWithRelocations(ObjFile *file,
|
|
|
|
SymbolRVASet &usedSymbols) {
|
|
|
|
for (Chunk *c : file->getChunks()) {
|
|
|
|
// We only care about live section chunks. Common chunks and other chunks
|
|
|
|
// don't generally contain relocations.
|
|
|
|
SectionChunk *sc = dyn_cast<SectionChunk>(c);
|
2018-08-31 15:45:20 +08:00
|
|
|
if (!sc || !sc->live)
|
2018-02-06 09:58:26 +08:00
|
|
|
continue;
|
|
|
|
|
2019-05-04 04:17:14 +08:00
|
|
|
for (const coff_relocation &reloc : sc->getRelocs()) {
|
2018-08-09 21:43:22 +08:00
|
|
|
if (config->machine == I386 && reloc.Type == COFF::IMAGE_REL_I386_REL32)
|
|
|
|
// Ignore relative relocations on x86. On x86_64 they can't be ignored
|
|
|
|
// since they're also used to compute absolute addresses.
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex);
|
2018-11-27 09:50:17 +08:00
|
|
|
maybeAddAddressTakenFunction(usedSymbols, ref);
|
2018-02-06 09:58:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-11-08 07:24:10 +08:00
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Create the guard function id table. This is a table of RVAs of all
|
|
|
|
// address-taken functions. It is sorted and uniqued, just like the safe SEH
|
|
|
|
// table.
|
2018-04-06 11:25:49 +08:00
|
|
|
void Writer::createGuardCFTables() {
|
2018-02-06 09:58:26 +08:00
|
|
|
SymbolRVASet addressTakenSyms;
|
2018-02-14 04:32:53 +08:00
|
|
|
SymbolRVASet longJmpTargets;
|
2017-11-08 07:24:10 +08:00
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
2018-02-14 04:32:53 +08:00
|
|
|
// If the object was compiled with /guard:cf, the address taken symbols
|
|
|
|
// are in .gfids$y sections, and the longjmp targets are in .gljmp$y
|
|
|
|
// sections. If the object was not compiled with /guard:cf, we assume there
|
|
|
|
// were no setjmp targets, and that all code symbols with relocations are
|
|
|
|
// possibly address-taken.
|
|
|
|
if (file->hasGuardCF()) {
|
2018-02-06 09:58:26 +08:00
|
|
|
markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms);
|
2018-02-14 04:32:53 +08:00
|
|
|
markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets);
|
|
|
|
} else {
|
2018-02-06 09:58:26 +08:00
|
|
|
markSymbolsWithRelocations(file, addressTakenSyms);
|
2018-02-14 04:32:53 +08:00
|
|
|
}
|
2018-02-06 09:58:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Mark the image entry as address-taken.
|
|
|
|
if (config->entry)
|
2018-11-27 09:50:17 +08:00
|
|
|
maybeAddAddressTakenFunction(addressTakenSyms, config->entry);
|
|
|
|
|
|
|
|
// Mark exported symbols in executable sections as address-taken.
|
|
|
|
for (Export &e : config->exports)
|
|
|
|
maybeAddAddressTakenFunction(addressTakenSyms, e.sym);
|
2018-02-06 09:58:26 +08:00
|
|
|
|
2018-06-28 23:22:40 +08:00
|
|
|
// Ensure sections referenced in the gfid table are 16-byte aligned.
|
|
|
|
for (const ChunkAndOffset &c : addressTakenSyms)
|
2019-05-23 04:21:52 +08:00
|
|
|
if (c.inputChunk->getAlignment() < 16)
|
|
|
|
c.inputChunk->setAlignment(16);
|
2018-06-28 23:22:40 +08:00
|
|
|
|
2018-04-06 11:25:49 +08:00
|
|
|
maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table",
|
2018-02-06 09:58:26 +08:00
|
|
|
"__guard_fids_count");
|
|
|
|
|
2018-02-14 04:32:53 +08:00
|
|
|
// Add the longjmp target table unless the user told us not to.
|
|
|
|
if (config->guardCF == GuardCFLevel::Full)
|
2018-04-06 11:25:49 +08:00
|
|
|
maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table",
|
2018-02-14 04:32:53 +08:00
|
|
|
"__guard_longjmp_count");
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Set __guard_flags, which will be used in the load config to indicate that
|
|
|
|
// /guard:cf was enabled.
|
|
|
|
uint32_t guardFlags = uint32_t(coff_guard_flags::CFInstrumented) |
|
|
|
|
uint32_t(coff_guard_flags::HasFidTable);
|
2018-02-14 04:32:53 +08:00
|
|
|
if (config->guardCF == GuardCFLevel::Full)
|
|
|
|
guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable);
|
2018-02-06 09:58:26 +08:00
|
|
|
Symbol *flagSym = symtab->findUnderscore("__guard_flags");
|
|
|
|
cast<DefinedAbsolute>(flagSym)->setVA(guardFlags);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Take a list of input sections containing symbol table indices and add those
|
2020-10-02 02:27:32 +08:00
|
|
|
// symbols to an RVA table. The challenge is that symbol RVAs are not known and
|
2018-02-06 09:58:26 +08:00
|
|
|
// depend on the table size, so we can't directly build a set of integers.
|
2020-10-02 02:27:32 +08:00
|
|
|
void Writer::markSymbolsForRVATable(ObjFile *file,
|
2018-02-06 09:58:26 +08:00
|
|
|
ArrayRef<SectionChunk *> symIdxChunks,
|
2020-10-02 02:27:32 +08:00
|
|
|
SymbolRVASet &tableSymbols) {
|
2018-02-06 09:58:26 +08:00
|
|
|
for (SectionChunk *c : symIdxChunks) {
|
|
|
|
// Skip sections discarded by linker GC. This comes up when a .gfids section
|
|
|
|
// is associated with something like a vtable and the vtable is discarded.
|
|
|
|
// In this case, the associated gfids section is discarded, and we don't
|
|
|
|
// mark the virtual member functions as address-taken by the vtable.
|
2018-08-31 15:45:20 +08:00
|
|
|
if (!c->live)
|
2018-02-06 09:58:26 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Validate that the contents look like symbol table indices.
|
|
|
|
ArrayRef<uint8_t> data = c->getContents();
|
|
|
|
if (data.size() % 4 != 0) {
|
|
|
|
warn("ignoring " + c->getSectionName() +
|
|
|
|
" symbol table index section in object " + toString(file));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read each symbol table index and check if that symbol was included in the
|
2020-10-02 02:27:32 +08:00
|
|
|
// final link. If so, add it to the table symbol set.
|
2018-02-06 09:58:26 +08:00
|
|
|
ArrayRef<ulittle32_t> symIndices(
|
|
|
|
reinterpret_cast<const ulittle32_t *>(data.data()), data.size() / 4);
|
|
|
|
ArrayRef<Symbol *> objSymbols = file->getSymbols();
|
|
|
|
for (uint32_t symIndex : symIndices) {
|
|
|
|
if (symIndex >= objSymbols.size()) {
|
|
|
|
warn("ignoring invalid symbol table index in section " +
|
|
|
|
c->getSectionName() + " in object " + toString(file));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (Symbol *s = objSymbols[symIndex]) {
|
|
|
|
if (s->isLive())
|
2020-10-02 02:27:32 +08:00
|
|
|
addSymbolToRVASet(tableSymbols, cast<Defined>(s));
|
2018-02-06 09:58:26 +08:00
|
|
|
}
|
|
|
|
}
|
2017-11-08 07:24:10 +08:00
|
|
|
}
|
2018-02-06 09:58:26 +08:00
|
|
|
}
|
2017-11-08 07:24:10 +08:00
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
// Replace the absolute table symbol with a synthetic symbol pointing to
|
2019-07-16 16:26:38 +08:00
|
|
|
// tableChunk so that we can emit base relocations for it and resolve section
|
2018-02-06 09:58:26 +08:00
|
|
|
// relative relocations.
|
2018-04-06 11:25:49 +08:00
|
|
|
void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
|
|
|
|
StringRef countSym) {
|
2018-02-06 09:58:26 +08:00
|
|
|
if (tableSymbols.empty())
|
2017-11-08 07:24:10 +08:00
|
|
|
return;
|
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
RVATableChunk *tableChunk = make<RVATableChunk>(std::move(tableSymbols));
|
2018-04-06 11:25:49 +08:00
|
|
|
rdataSec->addChunk(tableChunk);
|
2017-11-08 07:24:10 +08:00
|
|
|
|
2018-02-06 09:58:26 +08:00
|
|
|
Symbol *t = symtab->findUnderscore(tableSym);
|
|
|
|
Symbol *c = symtab->findUnderscore(countSym);
|
|
|
|
replaceSymbol<DefinedSynthetic>(t, t->getName(), tableChunk);
|
|
|
|
cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / 4);
|
2015-07-25 07:51:14 +08:00
|
|
|
}
|
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
// MinGW specific. Gather all relocations that are imported from a DLL even
|
|
|
|
// though the code didn't expect it to, produce the table that the runtime
|
|
|
|
// uses for fixing them up, and provide the synthetic symbols that the
|
|
|
|
// runtime uses for finding the table.
|
|
|
|
void Writer::createRuntimePseudoRelocs() {
|
|
|
|
std::vector<RuntimePseudoReloc> rels;
|
|
|
|
|
|
|
|
for (Chunk *c : symtab->getChunks()) {
|
|
|
|
auto *sc = dyn_cast<SectionChunk>(c);
|
2018-08-31 15:45:20 +08:00
|
|
|
if (!sc || !sc->live)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
sc->getRuntimePseudoRelocs(rels);
|
|
|
|
}
|
|
|
|
|
2020-04-26 05:49:44 +08:00
|
|
|
if (!config->pseudoRelocs) {
|
|
|
|
// Not writing any pseudo relocs; if some were needed, error out and
|
|
|
|
// indicate what required them.
|
|
|
|
for (const RuntimePseudoReloc &rpr : rels)
|
|
|
|
error("automatic dllimport of " + rpr.sym->getName() + " in " +
|
|
|
|
toString(rpr.target->file) + " requires pseudo relocations");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
if (!rels.empty())
|
|
|
|
log("Writing " + Twine(rels.size()) + " runtime pseudo relocations");
|
|
|
|
PseudoRelocTableChunk *table = make<PseudoRelocTableChunk>(rels);
|
|
|
|
rdataSec->addChunk(table);
|
|
|
|
EmptyChunk *endOfList = make<EmptyChunk>();
|
|
|
|
rdataSec->addChunk(endOfList);
|
|
|
|
|
|
|
|
Symbol *headSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__");
|
|
|
|
Symbol *endSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__");
|
|
|
|
replaceSymbol<DefinedSynthetic>(headSym, headSym->getName(), table);
|
|
|
|
replaceSymbol<DefinedSynthetic>(endSym, endSym->getName(), endOfList);
|
|
|
|
}
|
|
|
|
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
// MinGW specific.
|
|
|
|
// The MinGW .ctors and .dtors lists have sentinels at each end;
|
|
|
|
// a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end.
|
|
|
|
// There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__
|
|
|
|
// and __DTOR_LIST__ respectively.
|
|
|
|
void Writer::insertCtorDtorSymbols() {
|
|
|
|
AbsolutePointerChunk *ctorListHead = make<AbsolutePointerChunk>(-1);
|
|
|
|
AbsolutePointerChunk *ctorListEnd = make<AbsolutePointerChunk>(0);
|
|
|
|
AbsolutePointerChunk *dtorListHead = make<AbsolutePointerChunk>(-1);
|
|
|
|
AbsolutePointerChunk *dtorListEnd = make<AbsolutePointerChunk>(0);
|
|
|
|
ctorsSec->insertChunkAtStart(ctorListHead);
|
|
|
|
ctorsSec->addChunk(ctorListEnd);
|
|
|
|
dtorsSec->insertChunkAtStart(dtorListHead);
|
|
|
|
dtorsSec->addChunk(dtorListEnd);
|
2019-07-11 13:40:30 +08:00
|
|
|
|
[COFF] Provide __CTOR_LIST__ and __DTOR_LIST__ symbols for MinGW
MinGW uses these kind of list terminator symbols for traversing
the constructor/destructor lists. These list terminators are
actual pointers entries in the lists, with the values 0 and
(uintptr_t)-1 (instead of just symbols pointing to the start/end
of the list).
(This mechanism exists in both the mingw-w64 crt startup code and
in libgcc; normally the mingw-w64 one is used, but a DLL build of
libgcc uses the libgcc one. Therefore it's not trivial to change
the mechanism without lots of cross-project synchronization and
potentially invalidating some combinations of old/new versions
of them.)
When mingw-w64 has been used with lld so far, the CRT startup object
files have so far provided these symbols, ending up with different,
incompatible builds of the CRT startup object files depending on
whether binutils or lld are going to be used.
In order to avoid the need of different configuration of the CRT startup
object files depending on what linker to be used, provide these symbols
in lld instead. (Mingw-w64 checks at build time whether the linker
provides these symbols or not.) This unifies this particular detail
between the two linkers.
This does disallow the use of the very latest lld with older versions
of mingw-w64 (the configure check for the list was added recently;
earlier it simply checked whether the CRT was built with gcc or clang),
and requires rebuilding the mingw-w64 CRT. But the number of users of
lld+mingw still is low enough that such a change should be tolerable,
and unifies this aspect of the toolchains, easing interoperability
between the toolchains for the future.
The actual test for this feature is added in ctors_dtors_priority.s,
but a number of other tests that checked absolute output addresses
are updated.
Differential Revision: https://reviews.llvm.org/D52053
llvm-svn: 342294
2018-09-15 06:26:59 +08:00
|
|
|
Symbol *ctorListSym = symtab->findUnderscore("__CTOR_LIST__");
|
|
|
|
Symbol *dtorListSym = symtab->findUnderscore("__DTOR_LIST__");
|
|
|
|
replaceSymbol<DefinedSynthetic>(ctorListSym, ctorListSym->getName(),
|
|
|
|
ctorListHead);
|
|
|
|
replaceSymbol<DefinedSynthetic>(dtorListSym, dtorListSym->getName(),
|
|
|
|
dtorListHead);
|
|
|
|
}
|
|
|
|
|
2016-06-20 11:39:39 +08:00
|
|
|
// Handles /section options to allow users to overwrite
|
|
|
|
// section attributes.
|
|
|
|
void Writer::setSectionPermissions() {
|
|
|
|
for (auto &p : config->section) {
|
|
|
|
StringRef name = p.first;
|
|
|
|
uint32_t perm = p.second;
|
2018-04-21 05:10:33 +08:00
|
|
|
for (OutputSection *sec : outputSections)
|
|
|
|
if (sec->name == name)
|
|
|
|
sec->setPermissions(perm);
|
2016-06-20 11:39:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// Write section contents to a mmap'ed file.
|
|
|
|
void Writer::writeSections() {
|
2018-02-18 04:41:38 +08:00
|
|
|
// Record the number of sections to apply section index relocations
|
|
|
|
// against absolute symbols. See applySecIdx in Chunks.cpp..
|
|
|
|
DefinedAbsolute::numOutputSections = outputSections.size();
|
2017-06-23 07:33:04 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
uint8_t *buf = buffer->getBufferStart();
|
2015-06-04 00:44:00 +08:00
|
|
|
for (OutputSection *sec : outputSections) {
|
2015-08-14 11:30:59 +08:00
|
|
|
uint8_t *secBuf = buf + sec->getFileOff();
|
2015-05-29 03:09:30 +08:00
|
|
|
// Fill gaps between functions in .text with INT3 instructions
|
|
|
|
// instead of leaving as NUL bytes (which can be interpreted as
|
|
|
|
// ADD instructions).
|
2018-04-20 05:48:37 +08:00
|
|
|
if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE)
|
2015-08-14 11:30:59 +08:00
|
|
|
memset(secBuf, 0xCC, sec->getRawSize());
|
2019-05-10 05:21:22 +08:00
|
|
|
parallelForEach(sec->chunks, [&](Chunk *c) {
|
|
|
|
c->writeTo(secBuf + c->getRVA() - sec->getRVA());
|
|
|
|
});
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
void Writer::writeBuildId() {
|
2018-03-09 03:33:47 +08:00
|
|
|
// There are two important parts to the build ID.
|
|
|
|
// 1) If building with debug info, the COFF debug directory contains a
|
|
|
|
// timestamp as well as a Guid and Age of the PDB.
|
|
|
|
// 2) In all cases, the PE COFF file header also contains a timestamp.
|
|
|
|
// For reproducibility, instead of a timestamp we want to use a hash of the
|
2018-09-16 02:37:22 +08:00
|
|
|
// PE contents.
|
2018-03-09 03:33:47 +08:00
|
|
|
if (config->debug) {
|
|
|
|
assert(buildId && "BuildId is not set!");
|
2018-09-16 02:37:22 +08:00
|
|
|
// BuildId->BuildId was filled in when the PDB was written.
|
2018-03-09 03:33:47 +08:00
|
|
|
}
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
|
2018-03-09 03:33:47 +08:00
|
|
|
// At this point the only fields in the COFF file which remain unset are the
|
|
|
|
// "timestamp" in the COFF file header, and the ones in the coff debug
|
|
|
|
// directory. Now we can hash the file and write that hash to the various
|
|
|
|
// timestamp fields in the file.
|
|
|
|
StringRef outputFileData(
|
|
|
|
reinterpret_cast<const char *>(buffer->getBufferStart()),
|
|
|
|
buffer->getBufferSize());
|
2018-03-08 02:13:41 +08:00
|
|
|
|
2018-05-17 23:11:01 +08:00
|
|
|
uint32_t timestamp = config->timestamp;
|
2018-11-27 17:20:55 +08:00
|
|
|
uint64_t hash = 0;
|
|
|
|
bool generateSyntheticBuildId =
|
|
|
|
config->mingw && config->debug && config->pdbPath.empty();
|
|
|
|
|
|
|
|
if (config->repro || generateSyntheticBuildId)
|
|
|
|
hash = xxHash64(outputFileData);
|
|
|
|
|
2018-05-17 23:11:01 +08:00
|
|
|
if (config->repro)
|
2018-11-27 17:20:55 +08:00
|
|
|
timestamp = static_cast<uint32_t>(hash);
|
|
|
|
|
|
|
|
if (generateSyntheticBuildId) {
|
|
|
|
// For MinGW builds without a PDB file, we still generate a build id
|
|
|
|
// to allow associating a crash dump to the executable.
|
|
|
|
buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70;
|
|
|
|
buildId->buildId->PDB70.Age = 1;
|
|
|
|
memcpy(buildId->buildId->PDB70.Signature, &hash, 8);
|
|
|
|
// xxhash only gives us 8 bytes, so put some fixed data in the other half.
|
|
|
|
memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8);
|
|
|
|
}
|
2018-03-09 03:33:47 +08:00
|
|
|
|
|
|
|
if (debugDirectory)
|
2018-05-17 23:11:01 +08:00
|
|
|
debugDirectory->setTimeDateStamp(timestamp);
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
|
2018-03-09 03:33:47 +08:00
|
|
|
uint8_t *buf = buffer->getBufferStart();
|
|
|
|
buf += dosStubSize + sizeof(PEMagic);
|
|
|
|
object::coff_file_header *coffHeader =
|
|
|
|
reinterpret_cast<coff_file_header *>(buf);
|
2018-05-17 23:11:01 +08:00
|
|
|
coffHeader->TimeDateStamp = timestamp;
|
[LLD COFF/PDB] Incrementally update the build id.
Previously, our algorithm to compute a build id involved hashing the
executable and storing that as the GUID in the CV Debug Record chunk,
and setting the age to 1.
This breaks down in one very obvious case: a user adds some newlines to
a file, rebuilds, but changes nothing else. This causes new line
information and new file checksums to get written to the PDB, meaning
that the debug info is different, but the generated code would be the
same, so we would write the same build over again with an age of 1.
Anyone using a symbol cache would have a problem now, because the
debugger would open the executable, look at the age and guid, find a
matching PDB in the symbol cache and then load it. It would never copy
the new PDB to the symbol cache.
This patch implements the canonical Windows algorithm for updating
a build id, which is to check the existing executable first, and
re-use an existing GUID while bumping the age if it already
exists.
Differential Revision: https://reviews.llvm.org/D36758
llvm-svn: 310961
2017-08-16 05:31:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-21 12:00:54 +08:00
|
|
|
// Sort .pdata section contents according to PE/COFF spec 5.5.
|
|
|
|
void Writer::sortExceptionTable() {
|
2018-04-07 08:46:55 +08:00
|
|
|
if (!firstPdata)
|
2015-08-06 11:45:27 +08:00
|
|
|
return;
|
|
|
|
// We assume .pdata contains function table entries only.
|
2018-04-07 08:46:55 +08:00
|
|
|
auto bufAddr = [&](Chunk *c) {
|
2019-05-25 02:25:49 +08:00
|
|
|
OutputSection *os = c->getOutputSection();
|
|
|
|
return buffer->getBufferStart() + os->getFileOff() + c->getRVA() -
|
|
|
|
os->getRVA();
|
2018-04-07 08:46:55 +08:00
|
|
|
};
|
|
|
|
uint8_t *begin = bufAddr(firstPdata);
|
|
|
|
uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize();
|
2015-08-06 11:45:27 +08:00
|
|
|
if (config->machine == AMD64) {
|
2015-06-21 12:00:54 +08:00
|
|
|
struct Entry { ulittle32_t begin, end, unwind; };
|
2020-07-09 20:54:53 +08:00
|
|
|
if ((end - begin) % sizeof(Entry) != 0) {
|
|
|
|
fatal("unexpected .pdata size: " + Twine(end - begin) +
|
|
|
|
" is not a multiple of " + Twine(sizeof(Entry)));
|
|
|
|
}
|
2019-02-28 04:53:50 +08:00
|
|
|
parallelSort(
|
|
|
|
MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
|
|
|
|
[](const Entry &a, const Entry &b) { return a.begin < b.begin; });
|
2015-08-06 11:45:27 +08:00
|
|
|
return;
|
|
|
|
}
|
2017-12-14 16:56:29 +08:00
|
|
|
if (config->machine == ARMNT || config->machine == ARM64) {
|
2015-08-06 11:45:27 +08:00
|
|
|
struct Entry { ulittle32_t begin, unwind; };
|
2020-07-09 20:54:53 +08:00
|
|
|
if ((end - begin) % sizeof(Entry) != 0) {
|
|
|
|
fatal("unexpected .pdata size: " + Twine(end - begin) +
|
|
|
|
" is not a multiple of " + Twine(sizeof(Entry)));
|
|
|
|
}
|
2019-02-28 04:53:50 +08:00
|
|
|
parallelSort(
|
|
|
|
MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
|
|
|
|
[](const Entry &a, const Entry &b) { return a.begin < b.begin; });
|
2015-08-06 11:45:27 +08:00
|
|
|
return;
|
2015-06-21 12:00:54 +08:00
|
|
|
}
|
Make it possible to redirect not only errs() but also outs()
This change is for those who use lld as a library. Context:
https://reviews.llvm.org/D70287
This patch adds a new parmeter to lld::*::link() so that we can pass
an raw_ostream object representing stdout. Previously, lld::*::link()
took only an stderr object.
Justification for making stdoutOS and stderrOS mandatory: I wanted to
make link() functions to take stdout and stderr in that order.
However, if we change the function signature from
bool link(ArrayRef<const char *> args, bool canExitEarly,
raw_ostream &stderrOS = llvm::errs());
to
bool link(ArrayRef<const char *> args, bool canExitEarly,
raw_ostream &stdoutOS = llvm::outs(),
raw_ostream &stderrOS = llvm::errs());
, then the meaning of existing code that passes stderrOS silently
changes (stderrOS would be interpreted as stdoutOS). So, I chose to
make existing code not to compile, so that developers can fix their
code.
Differential Revision: https://reviews.llvm.org/D70292
2019-11-15 13:06:57 +08:00
|
|
|
lld::errs() << "warning: don't know how to handle .pdata.\n";
|
2015-06-21 12:00:54 +08:00
|
|
|
}
|
|
|
|
|
2018-10-05 20:56:46 +08:00
|
|
|
// The CRT section contains, among other things, the array of function
|
|
|
|
// pointers that initialize every global variable that is not trivially
|
|
|
|
// constructed. The CRT calls them one after the other prior to invoking
|
|
|
|
// main().
|
|
|
|
//
|
|
|
|
// As per C++ spec, 3.6.2/2.3,
|
|
|
|
// "Variables with ordered initialization defined within a single
|
|
|
|
// translation unit shall be initialized in the order of their definitions
|
|
|
|
// in the translation unit"
|
|
|
|
//
|
|
|
|
// It is therefore critical to sort the chunks containing the function
|
|
|
|
// pointers in the order that they are listed in the object file (top to
|
|
|
|
// bottom), otherwise global objects might not be initialized in the
|
|
|
|
// correct order.
|
|
|
|
void Writer::sortCRTSectionChunks(std::vector<Chunk *> &chunks) {
|
|
|
|
auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) {
|
|
|
|
auto sa = dyn_cast<SectionChunk>(a);
|
|
|
|
auto sb = dyn_cast<SectionChunk>(b);
|
|
|
|
assert(sa && sb && "Non-section chunks in CRT section!");
|
|
|
|
|
|
|
|
StringRef sAObj = sa->file->mb.getBufferIdentifier();
|
|
|
|
StringRef sBObj = sb->file->mb.getBufferIdentifier();
|
|
|
|
|
|
|
|
return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber();
|
|
|
|
};
|
2019-04-23 10:42:06 +08:00
|
|
|
llvm::stable_sort(chunks, sectionChunkOrder);
|
2018-10-05 20:56:46 +08:00
|
|
|
|
|
|
|
if (config->verbose) {
|
|
|
|
for (auto &c : chunks) {
|
|
|
|
auto sc = dyn_cast<SectionChunk>(c);
|
|
|
|
log(" " + sc->file->mb.getBufferIdentifier().str() +
|
|
|
|
", SectionID: " + Twine(sc->getSectionNumber()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
OutputSection *Writer::findSection(StringRef name) {
|
2015-06-04 00:44:00 +08:00
|
|
|
for (OutputSection *sec : outputSections)
|
2018-03-16 05:13:46 +08:00
|
|
|
if (sec->name == name)
|
2015-06-04 00:44:00 +08:00
|
|
|
return sec;
|
2015-05-29 03:09:30 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t Writer::getSizeOfInitializedData() {
|
|
|
|
uint32_t res = 0;
|
2015-06-04 00:44:00 +08:00
|
|
|
for (OutputSection *s : outputSections)
|
2018-04-20 05:48:37 +08:00
|
|
|
if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
|
2015-05-29 03:09:30 +08:00
|
|
|
res += s->getRawSize();
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-04-06 11:25:49 +08:00
|
|
|
// Add base relocations to .reloc section.
|
|
|
|
void Writer::addBaserels() {
|
|
|
|
if (!config->relocatable)
|
|
|
|
return;
|
2018-09-25 18:59:29 +08:00
|
|
|
relocSec->chunks.clear();
|
2015-07-25 09:44:32 +08:00
|
|
|
std::vector<Baserel> v;
|
2015-06-15 09:23:58 +08:00
|
|
|
for (OutputSection *sec : outputSections) {
|
2018-07-19 12:25:22 +08:00
|
|
|
if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
|
2015-06-15 09:23:58 +08:00
|
|
|
continue;
|
|
|
|
// Collect all locations for base relocations.
|
2018-09-25 18:59:29 +08:00
|
|
|
for (Chunk *c : sec->chunks)
|
2015-07-25 06:58:44 +08:00
|
|
|
c->getBaserels(&v);
|
2015-06-15 09:23:58 +08:00
|
|
|
// Add the addresses to .reloc section.
|
|
|
|
if (!v.empty())
|
2018-04-06 11:25:49 +08:00
|
|
|
addBaserelBlocks(v);
|
2015-06-15 09:23:58 +08:00
|
|
|
v.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add addresses to .reloc section. Note that addresses are grouped by page.
|
2018-04-06 11:25:49 +08:00
|
|
|
void Writer::addBaserelBlocks(std::vector<Baserel> &v) {
|
2015-06-15 09:23:58 +08:00
|
|
|
const uint32_t mask = ~uint32_t(pageSize - 1);
|
2015-07-25 09:44:32 +08:00
|
|
|
uint32_t page = v[0].rva & mask;
|
2015-06-15 09:23:58 +08:00
|
|
|
size_t i = 0, j = 1;
|
|
|
|
for (size_t e = v.size(); j < e; ++j) {
|
2015-07-25 09:44:32 +08:00
|
|
|
uint32_t p = v[j].rva & mask;
|
2015-06-15 09:23:58 +08:00
|
|
|
if (p == page)
|
|
|
|
continue;
|
2018-04-06 11:25:49 +08:00
|
|
|
relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
|
2015-06-15 09:23:58 +08:00
|
|
|
i = j;
|
|
|
|
page = p;
|
|
|
|
}
|
|
|
|
if (i == j)
|
|
|
|
return;
|
2018-04-06 11:25:49 +08:00
|
|
|
relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
|
2015-06-15 09:23:58 +08:00
|
|
|
}
|
2019-01-28 09:45:35 +08:00
|
|
|
|
|
|
|
PartialSection *Writer::createPartialSection(StringRef name,
|
|
|
|
uint32_t outChars) {
|
2019-02-05 16:16:10 +08:00
|
|
|
PartialSection *&pSec = partialSections[{name, outChars}];
|
2019-01-28 09:45:35 +08:00
|
|
|
if (pSec)
|
|
|
|
return pSec;
|
|
|
|
pSec = make<PartialSection>(name, outChars);
|
|
|
|
return pSec;
|
|
|
|
}
|
|
|
|
|
|
|
|
PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) {
|
2019-02-05 16:16:10 +08:00
|
|
|
auto it = partialSections.find({name, outChars});
|
2019-01-28 09:45:35 +08:00
|
|
|
if (it != partialSections.end())
|
2019-02-05 16:16:10 +08:00
|
|
|
return it->second;
|
2019-01-28 09:45:35 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|