COFF: Support base relocations.

PE/COFF executables/DLLs usually contain data which is called
base relocations. Base relocations are a list of addresses that
need to be fixed by the loader if load-time relocation is needed.

Base relocations are in .reloc section.

We emit one base relocation entry for each IMAGE_REL_AMD64_ADDR64
relocation.

In order to save disk space, base relocations are grouped by page.
Each group is called a block. A block starts with a 32-bit page
address followed by 16-bit offsets in the page. That is more
efficient representation of addresses than just an array of 32-bit
addresses.

llvm-svn: 239710
This commit is contained in:
Rui Ueyama 2015-06-15 01:23:58 +00:00
parent 0bee1d7ff1
commit 588e832d0a
9 changed files with 355 additions and 9 deletions

View File

@ -21,7 +21,6 @@ using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::COFF;
using llvm::RoundUpToAlignment;
namespace lld {
namespace coff {
@ -114,6 +113,25 @@ void SectionChunk::applyReloc(uint8_t *Buf, const coff_relocation *Rel) {
}
}
// Windows-specific.
// Collect all locations that contain absolute 64-bit addresses,
// which need to be fixed by the loader if load-time relocation is needed.
// Only called when base relocation is enabled.
void SectionChunk::getBaserels(std::vector<uint32_t> *Res, Defined *ImageBase) {
for (const auto &I : getSectionRef().relocations()) {
// ADDR64 relocations contain absolute addresses.
// Symbol __ImageBase is special -- it's an absolute symbol, but its
// address never changes even if image is relocated.
const coff_relocation *Rel = File->getCOFFObj()->getCOFFRelocation(I);
if (Rel->Type != IMAGE_REL_AMD64_ADDR64)
continue;
SymbolBody *Body = File->getSymbolBody(Rel->SymbolTableIndex);
if (Body == ImageBase)
continue;
Res->push_back(RVA + Rel->VirtualAddress);
}
}
bool SectionChunk::hasData() const {
return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
}
@ -173,5 +191,25 @@ void ImportThunkChunk::writeTo(uint8_t *Buf) {
write32le(Buf + FileOff + 2, Operand);
}
// Windows-specific.
// This class represents a block in .reloc section.
BaserelChunk::BaserelChunk(uint32_t Page, uint32_t *Begin, uint32_t *End) {
// Block header consists of 4 byte page RVA and 4 byte block size.
// Each entry is 2 byte. Last entry may be padding.
Data.resize(RoundUpToAlignment((End - Begin) * 2 + 8, 4));
uint8_t *P = Data.data();
write32le(P, Page);
write32le(P + 4, Data.size());
P += 8;
for (uint32_t *I = Begin; I != End; ++I) {
write16le(P, (IMAGE_REL_BASED_DIR64 << 12) | (*I - Page));
P += 2;
}
}
void BaserelChunk::writeTo(uint8_t *Buf) {
memcpy(Buf + FileOff, Data.data(), Data.size());
}
} // namespace coff
} // namespace lld

View File

@ -92,6 +92,10 @@ public:
void setOutputSection(OutputSection *O) { Out = O; }
OutputSection *getOutputSection() { return Out; }
// Windows-specific.
// Collect all locations that contain absolute addresses for base relocations.
virtual void getBaserels(std::vector<uint32_t> *Res, Defined *ImageBase) {}
protected:
// The RVA of this chunk in the output. The writer sets a value.
uint64_t RVA = 0;
@ -123,6 +127,7 @@ public:
StringRef getSectionName() const override { return SectionName; }
void printDiscardedMessage() override;
bool isCOMDAT() const override;
void getBaserels(std::vector<uint32_t> *Res, Defined *ImageBase) override;
// Adds COMDAT associative sections to this COMDAT section. A chunk
// and its children are treated as a group by the garbage collector.
@ -183,6 +188,19 @@ private:
Defined *ImpSymbol;
};
// Windows-specific.
// This class represents a block in .reloc section.
// See the PE/COFF spec 5.6 for details.
class BaserelChunk : public Chunk {
public:
BaserelChunk(uint32_t Page, uint32_t *Begin, uint32_t *End);
size_t getSize() const override { return Data.size(); }
void writeTo(uint8_t *Buf) override;
private:
std::vector<uint8_t> Data;
};
} // namespace coff
} // namespace lld

View File

@ -31,6 +31,7 @@ public:
StringRef EntryName;
std::string OutputFile;
bool DoGC = true;
bool Relocatable = true;
// Symbols in this set are considered as live by the garbage collector.
std::set<StringRef> GCRoots;

View File

@ -225,6 +225,10 @@ bool LinkerDriver::link(int Argc, const char *Argv[]) {
if (auto *Arg = Args->getLastArg(OPT_entry))
Config->EntryName = Arg->getValue();
// Handle /fixed
if (Args->hasArg(OPT_fixed))
Config->Relocatable = false;
// Handle /machine
auto MTOrErr = getMachineType(Args.get());
if (auto EC = MTOrErr.getError()) {

View File

@ -222,12 +222,8 @@ Glossary
locations containing addresses. The loader adds a difference between
RVA and actual load address to all locations listed there.
Note 1: This run-time relocation mechanism is very simple compared
to ELF. There's no PLT or GOT. Images are relocated as a whole just
Note that this run-time relocation mechanism is much simpler than ELF.
There's no PLT or GOT. Images are relocated as a whole just
by shifting entire images in memory by some offsets. Although doing
this breaks text sharing, I think this mechanism is not actually bad
on today's computers.
Note 2: We do not support base relocations yet. But if you were
wondering how Windows manages to load two images having conflicting
addresses into the same memory space, this is how it works.

View File

@ -42,6 +42,8 @@ std::error_code Writer::write(StringRef OutputPath) {
markLive();
createSections();
createImportTables();
if (Config->Relocatable)
createSection(".reloc");
assignAddresses();
removeEmptySections();
if (auto EC = openFile(OutputPath))
@ -192,6 +194,8 @@ void Writer::assignAddresses() {
uint64_t RVA = 0x1000; // The first page is kept unmapped.
uint64_t FileOff = SizeOfHeaders;
for (OutputSection *Sec : OutputSections) {
if (Sec->getName() == ".reloc")
addBaserels(Sec);
Sec->setRVA(RVA);
Sec->setFileOffset(FileOff);
RVA += RoundUpToAlignment(Sec->getVirtualSize(), PageSize);
@ -238,8 +242,9 @@ void Writer::writeHeader() {
COFF->Machine = MachineType;
COFF->NumberOfSections = OutputSections.size();
COFF->Characteristics =
(IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_RELOCS_STRIPPED |
IMAGE_FILE_LARGE_ADDRESS_AWARE);
(IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE);
if (!Config->Relocatable)
COFF->Characteristics = COFF->Characteristics | IMAGE_FILE_RELOCS_STRIPPED;
COFF->SizeOfOptionalHeader =
sizeof(pe32plus_header) + sizeof(data_directory) * NumberfOfDataDirectory;
@ -265,6 +270,8 @@ void Writer::writeHeader() {
PE->SizeOfStackCommit = Config->StackCommit;
PE->SizeOfHeapReserve = Config->HeapReserve;
PE->SizeOfHeapCommit = Config->HeapCommit;
if (Config->Relocatable)
PE->DLLCharacteristics = IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
PE->NumberOfRvaAndSize = NumberfOfDataDirectory;
if (OutputSection *Text = findSection(".text")) {
PE->BaseOfCode = Text->getRVA();
@ -285,6 +292,10 @@ void Writer::writeHeader() {
DataDirectory[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA();
DataDirectory[RESOURCE_TABLE].Size = Sec->getRawSize();
}
if (OutputSection *Sec = findSection(".reloc")) {
DataDirectory[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA();
DataDirectory[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize();
}
// Section table
// Name field in the section table is 8 byte long. Longer names need
@ -368,6 +379,7 @@ OutputSection *Writer::createSection(StringRef Name) {
const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA;
const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
const auto CODE = IMAGE_SCN_CNT_CODE;
const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE;
const auto R = IMAGE_SCN_MEM_READ;
const auto W = IMAGE_SCN_MEM_WRITE;
const auto X = IMAGE_SCN_MEM_EXECUTE;
@ -377,6 +389,7 @@ OutputSection *Writer::createSection(StringRef Name) {
.Case(".didat", DATA | R)
.Case(".idata", DATA | R)
.Case(".rdata", DATA | R)
.Case(".reloc", DATA | DISCARDABLE | R)
.Case(".text", CODE | R | X)
.Default(0);
if (!Perms)
@ -388,5 +401,42 @@ OutputSection *Writer::createSection(StringRef Name) {
return Sec;
}
// Dest is .reloc section. Add contents to that section.
void Writer::addBaserels(OutputSection *Dest) {
std::vector<uint32_t> V;
Defined *ImageBase = cast<Defined>(Symtab->find("__ImageBase"));
for (OutputSection *Sec : OutputSections) {
if (Sec == Dest)
continue;
// Collect all locations for base relocations.
for (Chunk *C : Sec->getChunks())
C->getBaserels(&V, ImageBase);
// Add the addresses to .reloc section.
if (!V.empty())
addBaserelBlocks(Dest, V);
V.clear();
}
}
// Add addresses to .reloc section. Note that addresses are grouped by page.
void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<uint32_t> &V) {
const uint32_t Mask = ~uint32_t(PageSize - 1);
uint32_t Page = V[0] & Mask;
size_t I = 0, J = 1;
for (size_t E = V.size(); J < E; ++J) {
uint32_t P = V[J] & Mask;
if (P == Page)
continue;
BaserelChunk *Buf = BAlloc.Allocate();
Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J));
I = J;
Page = P;
}
if (I == J)
return;
BaserelChunk *Buf = BAlloc.Allocate();
Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J));
}
} // namespace coff
} // namespace lld

View File

@ -87,6 +87,8 @@ private:
OutputSection *findSection(StringRef Name);
OutputSection *createSection(StringRef Name);
void addBaserels(OutputSection *Dest);
void addBaserelBlocks(OutputSection *Dest, std::vector<uint32_t> &V);
uint32_t getSizeOfInitializedData();
std::map<StringRef, std::vector<DefinedImportData *>> binImports();
@ -94,6 +96,7 @@ private:
SymbolTable *Symtab;
std::unique_ptr<llvm::FileOutputBuffer> Buffer;
llvm::SpecificBumpPtrAllocator<OutputSection> CAlloc;
llvm::SpecificBumpPtrAllocator<BaserelChunk> BAlloc;
std::vector<OutputSection *> OutputSections;
std::unique_ptr<IdataContents> Idata;

View File

@ -0,0 +1,164 @@
---
header:
Machine: IMAGE_FILE_MACHINE_AMD64
Characteristics: [ ]
sections:
- Name: .text
Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
Alignment: 4096
SectionData: B800000000000000005068000000000000000068000000000000000050E8000000000000000050E8000000000000000050E80000000000000000
Relocations:
- VirtualAddress: 0
SymbolName: abs_symbol
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 7
SymbolName: caption
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 12
SymbolName: message
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 18
SymbolName: MessageBoxA
Type: IMAGE_REL_AMD64_REL32
- VirtualAddress: 24
SymbolName: ExitProcess
Type: IMAGE_REL_AMD64_REL32
- VirtualAddress: 30
SymbolName: __ImageBase
Type: IMAGE_REL_AMD64_ADDR64
- Name: .text2
Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
Alignment: 4096
SectionData: B800000000000000005068000000000000000068000000000000000050E8000000000000000050E8000000000000000050E80000000000000000
Relocations:
- VirtualAddress: 0
SymbolName: abs_symbol
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 7
SymbolName: caption
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 12
SymbolName: message
Type: IMAGE_REL_AMD64_ADDR64
- VirtualAddress: 18
SymbolName: MessageBoxA
Type: IMAGE_REL_AMD64_REL32
- VirtualAddress: 24
SymbolName: ExitProcess
Type: IMAGE_REL_AMD64_REL32
- VirtualAddress: 30
SymbolName: __ImageBase
Type: IMAGE_REL_AMD64_ADDR64
- Name: .data
Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
Alignment: 4
SectionData: 48656C6C6F0048656C6C6F20576F726C6400
- Name: .drectve
Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ]
Alignment: 2147483648
SectionData: 2F454E5452593A6D61696E20
symbols:
- Name: "@comp.id"
Value: 10394907
SectionNumber: 65535
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
- Name: .text
Value: 0
SectionNumber: 1
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 28
NumberOfRelocations: 6
NumberOfLinenumbers: 0
CheckSum: 0
Number: 0
- Name: .text2
Value: 0
SectionNumber: 1
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 28
NumberOfRelocations: 6
NumberOfLinenumbers: 0
CheckSum: 0
Number: 0
- Name: .data
Value: 0
SectionNumber: 3
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 18
NumberOfRelocations: 0
NumberOfLinenumbers: 0
CheckSum: 0
Number: 0
- Name: MessageBoxA
Value: 0
SectionNumber: 0
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
- Name: ExitProcess
Value: 0
SectionNumber: 0
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
- Name: message
Value: 6
SectionNumber: 2
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
- Name: mainCRTStartup
Value: 0
SectionNumber: 1
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
- Name: caption
Value: 0
SectionNumber: 2
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
- Name: .drectve
Value: 0
SectionNumber: 3
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 12
NumberOfRelocations: 0
NumberOfLinenumbers: 0
CheckSum: 0
Number: 0
- Name: .file
Value: 0
SectionNumber: 65534
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
File: "hello.c"
- Name: abs_symbol
Value: 0xDEADBEEF
SectionNumber: -1
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
- Name: __ImageBase
Value: 0
SectionNumber: 0
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
...

View File

@ -0,0 +1,72 @@
# RUN: yaml2obj %p/Inputs/baserel.obj.yaml > %t.obj
#
# RUN: lld -flavor link2 /out:%t.exe %t.obj %p/Inputs/std64.lib
# RUN: llvm-readobj -coff-basereloc %t.exe | FileCheck %s --check-prefix=BASEREL
#
# RUN: lld -flavor link2 /out:%t.exe /fixed %t.obj %p/Inputs/std64.lib
# RUN: llvm-readobj -coff-basereloc %t.exe | FileCheck %s --check-prefix=NOBASEREL
BASEREL: BaseReloc [
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x2000
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x2007
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x200C
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: ABSOLUTE
BASEREL-NEXT: Address: 0x2000
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x3000
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x3007
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: DIR64
BASEREL-NEXT: Address: 0x300C
BASEREL-NEXT: }
BASEREL-NEXT: Entry {
BASEREL-NEXT: Type: ABSOLUTE
BASEREL-NEXT: Address: 0x3000
BASEREL-NEXT: }
NOBASEREL: BaseReloc [
NOBASEREL-NEXT: ]
# RUN: lld -flavor link2 /out:%t.exe %t.obj %p/Inputs/std64.lib
# RUN: llvm-readobj -file-headers -sections %t.exe | FileCheck %s \
# RUN: --check-prefix=BASEREL-HEADER
#
# RN: lld -flavor link2 /out:%t.exe /fixed %t.obj %p/Inputs/std64.lib
# RN: llvm-readobj -file-headers %t.exe | FileCheck %s \
# RN: --check-prefix=NOBASEREL-HEADER
BASEREL-HEADER-NOT: IMAGE_FILE_RELOCS_STRIPPED
NOBASEREL-HEADER: IMAGE_FILE_RELOCS_STRIPPED
BASEREL-HEADER: BaseRelocationTableRVA: 0x5000
BASEREL-HEADER: BaseRelocationTableSize: 0x20
BASEREL-HEADER: Name: .reloc (2E 72 65 6C 6F 63 00 00)
BASEREL-HEADER-NEXT: VirtualSize: 0x20
BASEREL-HEADER-NEXT: VirtualAddress: 0x5000
BASEREL-HEADER-NEXT: RawDataSize: 512
BASEREL-HEADER-NEXT: PointerToRawData: 0x1800
BASEREL-HEADER-NEXT: PointerToRelocations: 0x0
BASEREL-HEADER-NEXT: PointerToLineNumbers: 0x0
BASEREL-HEADER-NEXT: RelocationCount: 0
BASEREL-HEADER-NEXT: LineNumberCount: 0
BASEREL-HEADER-NEXT: Characteristics [ (0x40000040)
BASEREL-HEADER-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40)
BASEREL-HEADER-NEXT: IMAGE_SCN_MEM_READ (0x40000000)
BASEREL-HEADER-NEXT: ]