From 588e832d0aa608ab5e8f6a30248fdc39207f2a84 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Mon, 15 Jun 2015 01:23:58 +0000 Subject: [PATCH] COFF: Support base relocations. PE/COFF executables/DLLs usually contain data which is called base relocations. Base relocations are a list of addresses that need to be fixed by the loader if load-time relocation is needed. Base relocations are in .reloc section. We emit one base relocation entry for each IMAGE_REL_AMD64_ADDR64 relocation. In order to save disk space, base relocations are grouped by page. Each group is called a block. A block starts with a 32-bit page address followed by 16-bit offsets in the page. That is more efficient representation of addresses than just an array of 32-bit addresses. llvm-svn: 239710 --- lld/COFF/Chunks.cpp | 40 ++++++- lld/COFF/Chunks.h | 18 +++ lld/COFF/Config.h | 1 + lld/COFF/Driver.cpp | 4 + lld/COFF/README.md | 8 +- lld/COFF/Writer.cpp | 54 ++++++++- lld/COFF/Writer.h | 3 + lld/test/COFF/Inputs/baserel.obj.yaml | 164 ++++++++++++++++++++++++++ lld/test/COFF/baserel.test | 72 +++++++++++ 9 files changed, 355 insertions(+), 9 deletions(-) create mode 100644 lld/test/COFF/Inputs/baserel.obj.yaml create mode 100644 lld/test/COFF/baserel.test diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 72fd1e9711d2..e6403c2fc50a 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -21,7 +21,6 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::COFF; -using llvm::RoundUpToAlignment; namespace lld { namespace coff { @@ -114,6 +113,25 @@ void SectionChunk::applyReloc(uint8_t *Buf, const coff_relocation *Rel) { } } +// Windows-specific. +// Collect all locations that contain absolute 64-bit addresses, +// which need to be fixed by the loader if load-time relocation is needed. +// Only called when base relocation is enabled. +void SectionChunk::getBaserels(std::vector *Res, Defined *ImageBase) { + for (const auto &I : getSectionRef().relocations()) { + // ADDR64 relocations contain absolute addresses. + // Symbol __ImageBase is special -- it's an absolute symbol, but its + // address never changes even if image is relocated. + const coff_relocation *Rel = File->getCOFFObj()->getCOFFRelocation(I); + if (Rel->Type != IMAGE_REL_AMD64_ADDR64) + continue; + SymbolBody *Body = File->getSymbolBody(Rel->SymbolTableIndex); + if (Body == ImageBase) + continue; + Res->push_back(RVA + Rel->VirtualAddress); + } +} + bool SectionChunk::hasData() const { return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); } @@ -173,5 +191,25 @@ void ImportThunkChunk::writeTo(uint8_t *Buf) { write32le(Buf + FileOff + 2, Operand); } +// Windows-specific. +// This class represents a block in .reloc section. +BaserelChunk::BaserelChunk(uint32_t Page, uint32_t *Begin, uint32_t *End) { + // Block header consists of 4 byte page RVA and 4 byte block size. + // Each entry is 2 byte. Last entry may be padding. + Data.resize(RoundUpToAlignment((End - Begin) * 2 + 8, 4)); + uint8_t *P = Data.data(); + write32le(P, Page); + write32le(P + 4, Data.size()); + P += 8; + for (uint32_t *I = Begin; I != End; ++I) { + write16le(P, (IMAGE_REL_BASED_DIR64 << 12) | (*I - Page)); + P += 2; + } +} + +void BaserelChunk::writeTo(uint8_t *Buf) { + memcpy(Buf + FileOff, Data.data(), Data.size()); +} + } // namespace coff } // namespace lld diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 10ad33298c2a..88d944c7eab2 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -92,6 +92,10 @@ public: void setOutputSection(OutputSection *O) { Out = O; } OutputSection *getOutputSection() { return Out; } + // Windows-specific. + // Collect all locations that contain absolute addresses for base relocations. + virtual void getBaserels(std::vector *Res, Defined *ImageBase) {} + protected: // The RVA of this chunk in the output. The writer sets a value. uint64_t RVA = 0; @@ -123,6 +127,7 @@ public: StringRef getSectionName() const override { return SectionName; } void printDiscardedMessage() override; bool isCOMDAT() const override; + void getBaserels(std::vector *Res, Defined *ImageBase) override; // Adds COMDAT associative sections to this COMDAT section. A chunk // and its children are treated as a group by the garbage collector. @@ -183,6 +188,19 @@ private: Defined *ImpSymbol; }; +// Windows-specific. +// This class represents a block in .reloc section. +// See the PE/COFF spec 5.6 for details. +class BaserelChunk : public Chunk { +public: + BaserelChunk(uint32_t Page, uint32_t *Begin, uint32_t *End); + size_t getSize() const override { return Data.size(); } + void writeTo(uint8_t *Buf) override; + +private: + std::vector Data; +}; + } // namespace coff } // namespace lld diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 8f19df91eb47..d40fdaaf271f 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -31,6 +31,7 @@ public: StringRef EntryName; std::string OutputFile; bool DoGC = true; + bool Relocatable = true; // Symbols in this set are considered as live by the garbage collector. std::set GCRoots; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index a9ef480e48fb..8e74a6557716 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -225,6 +225,10 @@ bool LinkerDriver::link(int Argc, const char *Argv[]) { if (auto *Arg = Args->getLastArg(OPT_entry)) Config->EntryName = Arg->getValue(); + // Handle /fixed + if (Args->hasArg(OPT_fixed)) + Config->Relocatable = false; + // Handle /machine auto MTOrErr = getMachineType(Args.get()); if (auto EC = MTOrErr.getError()) { diff --git a/lld/COFF/README.md b/lld/COFF/README.md index 512da8aedaa9..1b2f1375de69 100644 --- a/lld/COFF/README.md +++ b/lld/COFF/README.md @@ -222,12 +222,8 @@ Glossary locations containing addresses. The loader adds a difference between RVA and actual load address to all locations listed there. - Note 1: This run-time relocation mechanism is very simple compared - to ELF. There's no PLT or GOT. Images are relocated as a whole just + Note that this run-time relocation mechanism is much simpler than ELF. + There's no PLT or GOT. Images are relocated as a whole just by shifting entire images in memory by some offsets. Although doing this breaks text sharing, I think this mechanism is not actually bad on today's computers. - - Note 2: We do not support base relocations yet. But if you were - wondering how Windows manages to load two images having conflicting - addresses into the same memory space, this is how it works. diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 109a15ccdd55..576e3c856ae6 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -42,6 +42,8 @@ std::error_code Writer::write(StringRef OutputPath) { markLive(); createSections(); createImportTables(); + if (Config->Relocatable) + createSection(".reloc"); assignAddresses(); removeEmptySections(); if (auto EC = openFile(OutputPath)) @@ -192,6 +194,8 @@ void Writer::assignAddresses() { uint64_t RVA = 0x1000; // The first page is kept unmapped. uint64_t FileOff = SizeOfHeaders; for (OutputSection *Sec : OutputSections) { + if (Sec->getName() == ".reloc") + addBaserels(Sec); Sec->setRVA(RVA); Sec->setFileOffset(FileOff); RVA += RoundUpToAlignment(Sec->getVirtualSize(), PageSize); @@ -238,8 +242,9 @@ void Writer::writeHeader() { COFF->Machine = MachineType; COFF->NumberOfSections = OutputSections.size(); COFF->Characteristics = - (IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_RELOCS_STRIPPED | - IMAGE_FILE_LARGE_ADDRESS_AWARE); + (IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE); + if (!Config->Relocatable) + COFF->Characteristics = COFF->Characteristics | IMAGE_FILE_RELOCS_STRIPPED; COFF->SizeOfOptionalHeader = sizeof(pe32plus_header) + sizeof(data_directory) * NumberfOfDataDirectory; @@ -265,6 +270,8 @@ void Writer::writeHeader() { PE->SizeOfStackCommit = Config->StackCommit; PE->SizeOfHeapReserve = Config->HeapReserve; PE->SizeOfHeapCommit = Config->HeapCommit; + if (Config->Relocatable) + PE->DLLCharacteristics = IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; PE->NumberOfRvaAndSize = NumberfOfDataDirectory; if (OutputSection *Text = findSection(".text")) { PE->BaseOfCode = Text->getRVA(); @@ -285,6 +292,10 @@ void Writer::writeHeader() { DataDirectory[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); DataDirectory[RESOURCE_TABLE].Size = Sec->getRawSize(); } + if (OutputSection *Sec = findSection(".reloc")) { + DataDirectory[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + DataDirectory[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + } // Section table // Name field in the section table is 8 byte long. Longer names need @@ -368,6 +379,7 @@ OutputSection *Writer::createSection(StringRef Name) { const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const auto CODE = IMAGE_SCN_CNT_CODE; + const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; const auto R = IMAGE_SCN_MEM_READ; const auto W = IMAGE_SCN_MEM_WRITE; const auto X = IMAGE_SCN_MEM_EXECUTE; @@ -377,6 +389,7 @@ OutputSection *Writer::createSection(StringRef Name) { .Case(".didat", DATA | R) .Case(".idata", DATA | R) .Case(".rdata", DATA | R) + .Case(".reloc", DATA | DISCARDABLE | R) .Case(".text", CODE | R | X) .Default(0); if (!Perms) @@ -388,5 +401,42 @@ OutputSection *Writer::createSection(StringRef Name) { return Sec; } +// Dest is .reloc section. Add contents to that section. +void Writer::addBaserels(OutputSection *Dest) { + std::vector V; + Defined *ImageBase = cast(Symtab->find("__ImageBase")); + for (OutputSection *Sec : OutputSections) { + if (Sec == Dest) + continue; + // Collect all locations for base relocations. + for (Chunk *C : Sec->getChunks()) + C->getBaserels(&V, ImageBase); + // Add the addresses to .reloc section. + if (!V.empty()) + addBaserelBlocks(Dest, V); + V.clear(); + } +} + +// Add addresses to .reloc section. Note that addresses are grouped by page. +void Writer::addBaserelBlocks(OutputSection *Dest, std::vector &V) { + const uint32_t Mask = ~uint32_t(PageSize - 1); + uint32_t Page = V[0] & Mask; + size_t I = 0, J = 1; + for (size_t E = V.size(); J < E; ++J) { + uint32_t P = V[J] & Mask; + if (P == Page) + continue; + BaserelChunk *Buf = BAlloc.Allocate(); + Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + I = J; + Page = P; + } + if (I == J) + return; + BaserelChunk *Buf = BAlloc.Allocate(); + Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); +} + } // namespace coff } // namespace lld diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h index d67ce2979a9c..0db88b7ed22d 100644 --- a/lld/COFF/Writer.h +++ b/lld/COFF/Writer.h @@ -87,6 +87,8 @@ private: OutputSection *findSection(StringRef Name); OutputSection *createSection(StringRef Name); + void addBaserels(OutputSection *Dest); + void addBaserelBlocks(OutputSection *Dest, std::vector &V); uint32_t getSizeOfInitializedData(); std::map> binImports(); @@ -94,6 +96,7 @@ private: SymbolTable *Symtab; std::unique_ptr Buffer; llvm::SpecificBumpPtrAllocator CAlloc; + llvm::SpecificBumpPtrAllocator BAlloc; std::vector OutputSections; std::unique_ptr Idata; diff --git a/lld/test/COFF/Inputs/baserel.obj.yaml b/lld/test/COFF/Inputs/baserel.obj.yaml new file mode 100644 index 000000000000..9a31dbbfc599 --- /dev/null +++ b/lld/test/COFF/Inputs/baserel.obj.yaml @@ -0,0 +1,164 @@ +--- +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4096 + SectionData: B800000000000000005068000000000000000068000000000000000050E8000000000000000050E8000000000000000050E80000000000000000 + Relocations: + - VirtualAddress: 0 + SymbolName: abs_symbol + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 7 + SymbolName: caption + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 12 + SymbolName: message + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 18 + SymbolName: MessageBoxA + Type: IMAGE_REL_AMD64_REL32 + - VirtualAddress: 24 + SymbolName: ExitProcess + Type: IMAGE_REL_AMD64_REL32 + - VirtualAddress: 30 + SymbolName: __ImageBase + Type: IMAGE_REL_AMD64_ADDR64 + - Name: .text2 + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4096 + SectionData: B800000000000000005068000000000000000068000000000000000050E8000000000000000050E8000000000000000050E80000000000000000 + Relocations: + - VirtualAddress: 0 + SymbolName: abs_symbol + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 7 + SymbolName: caption + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 12 + SymbolName: message + Type: IMAGE_REL_AMD64_ADDR64 + - VirtualAddress: 18 + SymbolName: MessageBoxA + Type: IMAGE_REL_AMD64_REL32 + - VirtualAddress: 24 + SymbolName: ExitProcess + Type: IMAGE_REL_AMD64_REL32 + - VirtualAddress: 30 + SymbolName: __ImageBase + Type: IMAGE_REL_AMD64_ADDR64 + - Name: .data + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 4 + SectionData: 48656C6C6F0048656C6C6F20576F726C6400 + - Name: .drectve + Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ] + Alignment: 2147483648 + SectionData: 2F454E5452593A6D61696E20 +symbols: + - Name: "@comp.id" + Value: 10394907 + SectionNumber: 65535 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 28 + NumberOfRelocations: 6 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .text2 + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 28 + NumberOfRelocations: 6 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .data + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 18 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: MessageBoxA + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: ExitProcess + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: message + Value: 6 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: mainCRTStartup + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: caption + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: .drectve + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 12 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: .file + Value: 0 + SectionNumber: 65534 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + File: "hello.c" + - Name: abs_symbol + Value: 0xDEADBEEF + SectionNumber: -1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __ImageBase + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lld/test/COFF/baserel.test b/lld/test/COFF/baserel.test new file mode 100644 index 000000000000..7313854fcd99 --- /dev/null +++ b/lld/test/COFF/baserel.test @@ -0,0 +1,72 @@ +# RUN: yaml2obj %p/Inputs/baserel.obj.yaml > %t.obj +# +# RUN: lld -flavor link2 /out:%t.exe %t.obj %p/Inputs/std64.lib +# RUN: llvm-readobj -coff-basereloc %t.exe | FileCheck %s --check-prefix=BASEREL +# +# RUN: lld -flavor link2 /out:%t.exe /fixed %t.obj %p/Inputs/std64.lib +# RUN: llvm-readobj -coff-basereloc %t.exe | FileCheck %s --check-prefix=NOBASEREL + +BASEREL: BaseReloc [ +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x2000 +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x2007 +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x200C +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: ABSOLUTE +BASEREL-NEXT: Address: 0x2000 +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x3000 +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x3007 +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: DIR64 +BASEREL-NEXT: Address: 0x300C +BASEREL-NEXT: } +BASEREL-NEXT: Entry { +BASEREL-NEXT: Type: ABSOLUTE +BASEREL-NEXT: Address: 0x3000 +BASEREL-NEXT: } + +NOBASEREL: BaseReloc [ +NOBASEREL-NEXT: ] + +# RUN: lld -flavor link2 /out:%t.exe %t.obj %p/Inputs/std64.lib +# RUN: llvm-readobj -file-headers -sections %t.exe | FileCheck %s \ +# RUN: --check-prefix=BASEREL-HEADER +# +# RN: lld -flavor link2 /out:%t.exe /fixed %t.obj %p/Inputs/std64.lib +# RN: llvm-readobj -file-headers %t.exe | FileCheck %s \ +# RN: --check-prefix=NOBASEREL-HEADER + +BASEREL-HEADER-NOT: IMAGE_FILE_RELOCS_STRIPPED + +NOBASEREL-HEADER: IMAGE_FILE_RELOCS_STRIPPED + +BASEREL-HEADER: BaseRelocationTableRVA: 0x5000 +BASEREL-HEADER: BaseRelocationTableSize: 0x20 +BASEREL-HEADER: Name: .reloc (2E 72 65 6C 6F 63 00 00) +BASEREL-HEADER-NEXT: VirtualSize: 0x20 +BASEREL-HEADER-NEXT: VirtualAddress: 0x5000 +BASEREL-HEADER-NEXT: RawDataSize: 512 +BASEREL-HEADER-NEXT: PointerToRawData: 0x1800 +BASEREL-HEADER-NEXT: PointerToRelocations: 0x0 +BASEREL-HEADER-NEXT: PointerToLineNumbers: 0x0 +BASEREL-HEADER-NEXT: RelocationCount: 0 +BASEREL-HEADER-NEXT: LineNumberCount: 0 +BASEREL-HEADER-NEXT: Characteristics [ (0x40000040) +BASEREL-HEADER-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40) +BASEREL-HEADER-NEXT: IMAGE_SCN_MEM_READ (0x40000000) +BASEREL-HEADER-NEXT: ]