From 05384080df67096cb43885df900745c932b2fd3c Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 12 Oct 2016 22:36:31 +0000 Subject: [PATCH] Support GNU-style ZLIB-compressed input sections. Previously, we supported only SHF_COMPRESSED sections because it's new and it's the ELF standard. But there are object files compressed in the GNU style out there, so we had to support it. Sections compressed in the GNU style start with ".zdebug_" and contain different headers than the ELF standard's one. In this patch, getRawCompressedData is responsible to handle it. A tricky thing about GNU-style compressed sections is that we have to rename them when creating output sections. ".zdebug_" prefix implies the section is compressed. We need to rename ".zdebug_" ".debug" because our output sections are not compressed. We do that in this patch. llvm-svn: 284068 --- lld/ELF/InputSection.cpp | 78 ++++++++++++++----- lld/ELF/InputSection.h | 9 ++- lld/ELF/LinkerScript.cpp | 2 +- lld/ELF/Writer.cpp | 10 ++- lld/ELF/Writer.h | 9 +-- lld/test/ELF/compressed-debug-input.s | 105 +++++++++++++++----------- 6 files changed, 143 insertions(+), 70 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 28431fa10376..6ec654ed4054 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -23,6 +23,7 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; @@ -40,12 +41,19 @@ static ArrayRef getSectionContents(elf::ObjectFile *File, return check(File->getObj().getSectionContents(Hdr)); } +// ELF supports ZLIB-compressed section. Returns true if the section +// is compressed. +template +static bool isCompressed(const typename ELFT::Shdr *Hdr, StringRef Name) { + return (Hdr->sh_flags & SHF_COMPRESSED) || Name.startswith(".zdebug"); +} + template InputSectionBase::InputSectionBase(elf::ObjectFile *File, const Elf_Shdr *Hdr, StringRef Name, Kind SectionKind) : InputSectionData(SectionKind, Name, getSectionContents(File, Hdr), - Hdr->sh_flags & SHF_COMPRESSED, !Config->GcSections), + isCompressed(Hdr, Name), !Config->GcSections), Header(Hdr), File(File), Repl(this) { // The ELF spec states that a value of 0 means the section has // no alignment constraits. @@ -100,30 +108,62 @@ typename ELFT::uint InputSectionBase::getOffset(uintX_t Offset) const { llvm_unreachable("invalid section kind"); } +// Returns compressed data and its size when uncompressed. +template +std::pair, uint64_t> +InputSectionBase::getElfCompressedData(ArrayRef Data) { + // Compressed section with Elf_Chdr is the ELF standard. + if (Data.size() < sizeof(Elf_Chdr)) + fatal(getName(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast(Data.data()); + if (Hdr->ch_type != ELFCOMPRESS_ZLIB) + fatal(getName(this) + ": unsupported compression type"); + return {Data.slice(sizeof(*Hdr)), Hdr->ch_size}; +} + +// Returns compressed data and its size when uncompressed. +template +std::pair, uint64_t> +InputSectionBase::getRawCompressedData(ArrayRef Data) { + // Compressed sections without Elf_Chdr header contain this header + // instead. This is a GNU extension. + struct ZlibHeader { + char magic[4]; // should be "ZLIB" + char Size[8]; // Uncompressed size in big-endian + }; + + if (Data.size() < sizeof(ZlibHeader)) + fatal(getName(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast(Data.data()); + if (memcmp(Hdr->magic, "ZLIB", 4)) + fatal(getName(this) + ": broken ZLIB-compressed section"); + return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)}; +} + template void InputSectionBase::uncompress() { if (!zlib::isAvailable()) fatal(getName(this) + ": build lld with zlib to enable compressed sections support"); - // A compressed section consists of a header of Elf_Chdr type - // followed by compressed data. - if (Data.size() < sizeof(Elf_Chdr)) - fatal("corrupt compressed section"); + // This section is compressed. Here we decompress it. Ideally, all + // compressed sections have SHF_COMPRESSED bit and their contents + // start with headers of Elf_Chdr type. However, sections whose + // names start with ".zdebug_" don't have the bit and contains a raw + // ZLIB-compressed data (which is a bad thing because section names + // shouldn't be significant in ELF.) We need to be able to read both. + ArrayRef Buf; // Compressed data + size_t Size; // Uncompressed size + if (Header->sh_flags & SHF_COMPRESSED) + std::tie(Buf, Size) = getElfCompressedData(Data); + else + std::tie(Buf, Size) = getRawCompressedData(Data); - auto *Hdr = reinterpret_cast(Data.data()); - Data = Data.slice(sizeof(Elf_Chdr)); - - if (Hdr->ch_type != ELFCOMPRESS_ZLIB) - fatal(getName(this) + ": unsupported compression type"); - - StringRef Buf((const char *)Data.data(), Data.size()); - size_t UncompressedDataSize = Hdr->ch_size; - UncompressedData.reset(new char[UncompressedDataSize]); - if (zlib::uncompress(Buf, UncompressedData.get(), UncompressedDataSize) != - zlib::StatusOK) - fatal(getName(this) + ": error uncompressing section"); - Data = ArrayRef((uint8_t *)UncompressedData.get(), - UncompressedDataSize); + // Uncompress Buf. + UncompressedData.reset(new uint8_t[Size]); + if (zlib::uncompress(StringRef((const char *)Buf.data(), Buf.size()), + (char *)UncompressedData.get(), Size) != zlib::StatusOK) + fatal(getName(this) + ": error while uncompressing section"); + Data = ArrayRef(UncompressedData.get(), Size); } template diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 3ee169712215..3fd89bd8ec89 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -67,7 +67,7 @@ public: ArrayRef getData(const SectionPiece &P) const; // If a section is compressed, this has the uncompressed section data. - std::unique_ptr UncompressedData; + std::unique_ptr UncompressedData; std::vector Relocations; }; @@ -118,6 +118,13 @@ public: void uncompress(); void relocate(uint8_t *Buf, uint8_t *BufEnd); + +private: + std::pair, uint64_t> + getElfCompressedData(ArrayRef Data); + + std::pair, uint64_t> + getRawCompressedData(ArrayRef Data); }; template InputSectionBase InputSectionBase::Discarded; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index f1fe241af12b..1a612b278e83 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -351,7 +351,7 @@ void LinkerScript::createSections(OutputSectionFactory &Factory) { for (ObjectFile *F : Symtab::X->getObjectFiles()) for (InputSectionBase *S : F->getSections()) if (!isDiscarded(S) && !S->OutSec) - addSection(Factory, S, getOutputSectionName(S->Name)); + addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc)); } // Sets value of a section-defined symbol. Two kinds of diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index d50deaea164e..ade3ead70592 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -91,7 +91,7 @@ private: }; } // anonymous namespace -StringRef elf::getOutputSectionName(StringRef Name) { +StringRef elf::getOutputSectionName(StringRef Name, BumpPtrAllocator &Alloc) { if (Config->Relocatable) return Name; @@ -103,6 +103,11 @@ StringRef elf::getOutputSectionName(StringRef Name) { if (Name.startswith(V) || Name == Prefix) return Prefix; } + + // ".zdebug_" is a prefix for ZLIB-compressed sections. + // Because we decompressed input sections, we want to remove 'z'. + if (Name.startswith(".zdebug_")) + return StringSaver(Alloc).save(Twine(".") + Name.substr(2)); return Name; } @@ -699,7 +704,8 @@ template void Writer::createSections() { } OutputSectionBase *Sec; bool IsNew; - std::tie(Sec, IsNew) = Factory.create(IS, getOutputSectionName(IS->Name)); + StringRef OutsecName = getOutputSectionName(IS->Name, Alloc); + std::tie(Sec, IsNew) = Factory.create(IS, OutsecName); if (IsNew) OutputSections.push_back(Sec); Sec->addSection(IS); diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h index cf1c56019b8f..3a380a980bd5 100644 --- a/lld/ELF/Writer.h +++ b/lld/ELF/Writer.h @@ -10,13 +10,11 @@ #ifndef LLD_ELF_WRITER_H #define LLD_ELF_WRITER_H +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" #include #include -namespace llvm { - class StringRef; -} - namespace lld { namespace elf { template class OutputSectionBase; @@ -41,7 +39,8 @@ struct PhdrEntry { bool HasLMA = false; }; -llvm::StringRef getOutputSectionName(llvm::StringRef Name); +llvm::StringRef getOutputSectionName(llvm::StringRef Name, + llvm::BumpPtrAllocator &Alloc); template void reportDiscarded(InputSectionBase *IS); diff --git a/lld/test/ELF/compressed-debug-input.s b/lld/test/ELF/compressed-debug-input.s index 7339833dbb7f..5104a102ba30 100644 --- a/lld/test/ELF/compressed-debug-input.s +++ b/lld/test/ELF/compressed-debug-input.s @@ -1,52 +1,73 @@ # REQUIRES: zlib # RUN: llvm-mc -compress-debug-sections=zlib -filetype=obj -triple=x86_64-unknown-linux %s -o %t -# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=COMPRESSED %s +# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=ZLIB %s +# ZLIB: Section { +# ZLIB: Index: 2 +# ZLIB: Name: .debug_str +# ZLIB-NEXT: Type: SHT_PROGBITS +# ZLIB-NEXT: Flags [ +# ZLIB-NEXT: SHF_COMPRESSED (0x800) +# ZLIB-NEXT: SHF_MERGE (0x10) +# ZLIB-NEXT: SHF_STRINGS (0x20) +# ZLIB-NEXT: ] +# ZLIB-NEXT: Address: +# ZLIB-NEXT: Offset: +# ZLIB-NEXT: Size: +# ZLIB-NEXT: Link: +# ZLIB-NEXT: Info: +# ZLIB-NEXT: AddressAlignment: 1 +# ZLIB-NEXT: EntrySize: 1 +# ZLIB-NEXT: } -# COMPRESSED: Section { -# COMPRESSED: Index: 2 -# COMPRESSED: Name: .debug_str -# COMPRESSED-NEXT: Type: SHT_PROGBITS -# COMPRESSED-NEXT: Flags [ -# COMPRESSED-NEXT: SHF_COMPRESSED (0x800) -# COMPRESSED-NEXT: SHF_MERGE (0x10) -# COMPRESSED-NEXT: SHF_STRINGS (0x20) -# COMPRESSED-NEXT: ] -# COMPRESSED-NEXT: Address: -# COMPRESSED-NEXT: Offset: -# COMPRESSED-NEXT: Size: 66 -# COMPRESSED-NEXT: Link: -# COMPRESSED-NEXT: Info: -# COMPRESSED-NEXT: AddressAlignment: 1 -# COMPRESSED-NEXT: EntrySize: 1 -# COMPRESSED-NEXT: } +# RUN: llvm-mc -compress-debug-sections=zlib-gnu -filetype=obj -triple=x86_64-unknown-linux %s -o %t2 +# RUN: llvm-readobj -sections %t2 | FileCheck -check-prefix=GNU %s +# GNU: Section { +# GNU: Index: 2 +# GNU: Name: .zdebug_str +# GNU-NEXT: Type: SHT_PROGBITS +# GNU-NEXT: Flags [ +# GNU-NEXT: SHF_MERGE (0x10) +# GNU-NEXT: SHF_STRINGS (0x20) +# GNU-NEXT: ] +# GNU-NEXT: Address: +# GNU-NEXT: Offset: +# GNU-NEXT: Size: +# GNU-NEXT: Link: +# GNU-NEXT: Info: +# GNU-NEXT: AddressAlignment: 1 +# GNU-NEXT: EntrySize: 1 +# GNU-NEXT: } # RUN: ld.lld %t -o %t.so -shared -# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=UNCOMPRESSED %s +# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=DATA %s -# UNCOMPRESSED: Section { -# UNCOMPRESSED: Index: 6 -# UNCOMPRESSED: Name: .debug_str -# UNCOMPRESSED-NEXT: Type: SHT_PROGBITS -# UNCOMPRESSED-NEXT: Flags [ -# UNCOMPRESSED-NEXT: SHF_MERGE (0x10) -# UNCOMPRESSED-NEXT: SHF_STRINGS (0x20) -# UNCOMPRESSED-NEXT: ] -# UNCOMPRESSED-NEXT: Address: 0x0 -# UNCOMPRESSED-NEXT: Offset: 0x1060 -# UNCOMPRESSED-NEXT: Size: 69 -# UNCOMPRESSED-NEXT: Link: 0 -# UNCOMPRESSED-NEXT: Info: 0 -# UNCOMPRESSED-NEXT: AddressAlignment: 1 -# UNCOMPRESSED-NEXT: EntrySize: 1 -# UNCOMPRESSED-NEXT: SectionData ( -# UNCOMPRESSED-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i| -# UNCOMPRESSED-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.| -# UNCOMPRESSED-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| -# UNCOMPRESSED-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned | -# UNCOMPRESSED-NEXT: 0040: 63686172 00 |char.| -# UNCOMPRESSED-NEXT: ) -# UNCOMPRESSED-NEXT: } +# RUN: ld.lld %t2 -o %t2.so -shared +# RUN: llvm-readobj -sections -section-data %t2.so | FileCheck -check-prefix=DATA %s + +# DATA: Section { +# DATA: Index: 6 +# DATA: Name: .debug_str +# DATA-NEXT: Type: SHT_PROGBITS +# DATA-NEXT: Flags [ +# DATA-NEXT: SHF_MERGE (0x10) +# DATA-NEXT: SHF_STRINGS (0x20) +# DATA-NEXT: ] +# DATA-NEXT: Address: 0x0 +# DATA-NEXT: Offset: 0x1060 +# DATA-NEXT: Size: 69 +# DATA-NEXT: Link: 0 +# DATA-NEXT: Info: 0 +# DATA-NEXT: AddressAlignment: 1 +# DATA-NEXT: EntrySize: 1 +# DATA-NEXT: SectionData ( +# DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i| +# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.| +# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| +# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned | +# DATA-NEXT: 0040: 63686172 00 |char.| +# DATA-NEXT: ) +# DATA-NEXT: } .section .debug_str,"MS",@progbits,1 .LASF2: