2015-09-22 08:01:39 +08:00
|
|
|
//===- InputSection.cpp ---------------------------------------------------===//
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
|
|
|
// The LLVM Linker
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-09-22 08:01:39 +08:00
|
|
|
#include "InputSection.h"
|
2015-09-26 03:24:57 +08:00
|
|
|
#include "Config.h"
|
2016-05-24 10:55:45 +08:00
|
|
|
#include "EhFrame.h"
|
2015-08-06 23:08:23 +08:00
|
|
|
#include "Error.h"
|
2015-08-28 07:15:56 +08:00
|
|
|
#include "InputFiles.h"
|
2016-07-12 16:50:42 +08:00
|
|
|
#include "LinkerScript.h"
|
2015-09-22 06:01:00 +08:00
|
|
|
#include "OutputSections.h"
|
2015-09-23 02:19:46 +08:00
|
|
|
#include "Target.h"
|
2016-07-09 00:10:27 +08:00
|
|
|
#include "Thunks.h"
|
2015-09-22 06:01:00 +08:00
|
|
|
|
2016-06-24 19:18:44 +08:00
|
|
|
#include "llvm/Support/Compression.h"
|
2016-02-26 05:33:56 +08:00
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::ELF;
|
2015-09-22 06:01:00 +08:00
|
|
|
using namespace llvm::object;
|
2016-10-13 06:36:31 +08:00
|
|
|
using namespace llvm::support;
|
2016-02-26 05:33:56 +08:00
|
|
|
using namespace llvm::support::endian;
|
2015-07-25 05:03:07 +08:00
|
|
|
|
|
|
|
using namespace lld;
|
2016-02-28 08:25:54 +08:00
|
|
|
using namespace lld::elf;
|
2015-07-25 05:03:07 +08:00
|
|
|
|
2016-09-12 21:13:53 +08:00
|
|
|
template <class ELFT>
|
|
|
|
static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File,
|
|
|
|
const typename ELFT::Shdr *Hdr) {
|
|
|
|
if (!File || Hdr->sh_type == SHT_NOBITS)
|
2016-10-26 08:54:03 +08:00
|
|
|
return makeArrayRef<uint8_t>(nullptr, Hdr->sh_size);
|
2016-09-12 21:13:53 +08:00
|
|
|
return check(File->getObj().getSectionContents(Hdr));
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:36:31 +08:00
|
|
|
// ELF supports ZLIB-compressed section. Returns true if the section
|
|
|
|
// is compressed.
|
|
|
|
template <class ELFT>
|
2016-10-26 08:54:03 +08:00
|
|
|
static bool isCompressed(typename ELFT::uint Flags, StringRef Name) {
|
|
|
|
return (Flags & SHF_COMPRESSED) || Name.startswith(".zdebug");
|
2016-10-13 06:36:31 +08:00
|
|
|
}
|
|
|
|
|
2015-10-20 05:00:02 +08:00
|
|
|
template <class ELFT>
|
2016-03-12 02:46:51 +08:00
|
|
|
InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
|
2016-10-26 08:54:03 +08:00
|
|
|
uintX_t Flags, uint32_t Type,
|
|
|
|
uintX_t Entsize, uint32_t Link,
|
|
|
|
uint32_t Info, uintX_t Addralign,
|
|
|
|
ArrayRef<uint8_t> Data, StringRef Name,
|
2016-09-08 22:06:08 +08:00
|
|
|
Kind SectionKind)
|
2016-10-26 08:54:03 +08:00
|
|
|
: InputSectionData(SectionKind, Name, Data, isCompressed<ELFT>(Flags, Name),
|
|
|
|
!Config->GcSections || !(Flags & SHF_ALLOC)),
|
|
|
|
File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link),
|
|
|
|
Info(Info), Repl(this) {
|
2016-02-24 08:38:18 +08:00
|
|
|
// The ELF spec states that a value of 0 means the section has
|
2016-10-08 03:54:57 +08:00
|
|
|
// no alignment constraits.
|
2016-10-26 08:54:03 +08:00
|
|
|
uint64_t V = std::max<uint64_t>(Addralign, 1);
|
2016-10-07 20:27:45 +08:00
|
|
|
if (!isPowerOf2_64(V))
|
|
|
|
fatal(getFilename(File) + ": section sh_addralign is not a power of 2");
|
2016-10-08 03:54:57 +08:00
|
|
|
|
|
|
|
// We reject object files having insanely large alignments even though
|
|
|
|
// they are allowed by the spec. I think 4GB is a reasonable limitation.
|
|
|
|
// We might want to relax this in the future.
|
2016-10-07 20:27:45 +08:00
|
|
|
if (V > UINT32_MAX)
|
2016-10-03 18:04:38 +08:00
|
|
|
fatal(getFilename(File) + ": section sh_addralign is too large");
|
2016-10-07 20:27:45 +08:00
|
|
|
Alignment = V;
|
2016-02-24 08:23:15 +08:00
|
|
|
}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
2016-10-26 08:54:03 +08:00
|
|
|
template <class ELFT>
|
|
|
|
InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
|
|
|
|
const Elf_Shdr *Hdr, StringRef Name,
|
|
|
|
Kind SectionKind)
|
|
|
|
: InputSectionBase(File, Hdr->sh_flags, Hdr->sh_type, Hdr->sh_entsize,
|
|
|
|
Hdr->sh_link, Hdr->sh_info, Hdr->sh_addralign,
|
2016-11-01 17:17:50 +08:00
|
|
|
getSectionContents(File, Hdr), Name, SectionKind) {
|
|
|
|
this->Offset = Hdr->sh_offset;
|
|
|
|
}
|
2016-10-26 08:54:03 +08:00
|
|
|
|
2016-11-08 22:47:16 +08:00
|
|
|
template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const {
|
|
|
|
if (auto *D = dyn_cast<InputSection<ELFT>>(this))
|
|
|
|
if (D->getThunksSize() > 0)
|
|
|
|
return D->getThunkOff() + D->getThunksSize();
|
|
|
|
return Data.size();
|
|
|
|
}
|
|
|
|
|
2016-08-03 12:39:42 +08:00
|
|
|
// Returns a string for an error message.
|
|
|
|
template <class SectionT> static std::string getName(SectionT *Sec) {
|
2016-09-08 22:06:08 +08:00
|
|
|
return (Sec->getFile()->getName() + "(" + Sec->Name + ")").str();
|
2016-08-03 12:39:42 +08:00
|
|
|
}
|
|
|
|
|
2015-11-12 00:50:37 +08:00
|
|
|
template <class ELFT>
|
2016-06-23 12:33:42 +08:00
|
|
|
typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const {
|
2016-09-08 20:33:41 +08:00
|
|
|
switch (kind()) {
|
2015-11-12 00:50:37 +08:00
|
|
|
case Regular:
|
|
|
|
return cast<InputSection<ELFT>>(this)->OutSecOff + Offset;
|
2015-11-12 03:54:14 +08:00
|
|
|
case EHFrame:
|
2016-07-21 04:19:58 +08:00
|
|
|
// The file crtbeginT.o has relocations pointing to the start of an empty
|
|
|
|
// .eh_frame that is known to be the first in the link. It does that to
|
|
|
|
// identify the start of the output .eh_frame.
|
|
|
|
return Offset;
|
2015-11-12 00:50:37 +08:00
|
|
|
case Merge:
|
|
|
|
return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset);
|
2015-12-20 18:57:34 +08:00
|
|
|
case MipsReginfo:
|
2016-05-04 18:07:38 +08:00
|
|
|
case MipsOptions:
|
2016-08-12 14:28:49 +08:00
|
|
|
case MipsAbiFlags:
|
|
|
|
// MIPS .reginfo, .MIPS.options, and .MIPS.abiflags sections are consumed
|
|
|
|
// by the linker, and the linker produces a single output section. It is
|
|
|
|
// possible that input files contain section symbol points to the
|
|
|
|
// corresponding input section. Redirect it to the produced output section.
|
2016-05-27 04:46:01 +08:00
|
|
|
if (Offset != 0)
|
2016-08-03 12:39:42 +08:00
|
|
|
fatal(getName(this) + ": unsupported reference to the middle of '" +
|
2016-09-08 22:06:08 +08:00
|
|
|
Name + "' section");
|
2016-05-27 04:46:01 +08:00
|
|
|
return this->OutSec->getVA();
|
2015-11-12 00:50:37 +08:00
|
|
|
}
|
2016-03-12 16:31:34 +08:00
|
|
|
llvm_unreachable("invalid section kind");
|
2015-11-12 00:50:37 +08:00
|
|
|
}
|
|
|
|
|
2016-10-13 06:36:31 +08:00
|
|
|
// Returns compressed data and its size when uncompressed.
|
|
|
|
template <class ELFT>
|
|
|
|
std::pair<ArrayRef<uint8_t>, uint64_t>
|
|
|
|
InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) {
|
|
|
|
// Compressed section with Elf_Chdr is the ELF standard.
|
2016-07-07 11:55:55 +08:00
|
|
|
if (Data.size() < sizeof(Elf_Chdr))
|
2016-10-13 06:36:31 +08:00
|
|
|
fatal(getName(this) + ": corrupted compressed section");
|
2016-07-07 11:55:55 +08:00
|
|
|
auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
|
|
|
|
if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
|
2016-08-03 12:39:42 +08:00
|
|
|
fatal(getName(this) + ": unsupported compression type");
|
2016-10-13 06:36:31 +08:00
|
|
|
return {Data.slice(sizeof(*Hdr)), Hdr->ch_size};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns compressed data and its size when uncompressed.
|
|
|
|
template <class ELFT>
|
|
|
|
std::pair<ArrayRef<uint8_t>, uint64_t>
|
|
|
|
InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) {
|
|
|
|
// Compressed sections without Elf_Chdr header contain this header
|
|
|
|
// instead. This is a GNU extension.
|
|
|
|
struct ZlibHeader {
|
2016-10-13 07:22:59 +08:00
|
|
|
char Magic[4]; // Should be "ZLIB"
|
2016-10-13 06:36:31 +08:00
|
|
|
char Size[8]; // Uncompressed size in big-endian
|
|
|
|
};
|
|
|
|
|
|
|
|
if (Data.size() < sizeof(ZlibHeader))
|
|
|
|
fatal(getName(this) + ": corrupted compressed section");
|
|
|
|
auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data());
|
2016-10-13 07:22:59 +08:00
|
|
|
if (memcmp(Hdr->Magic, "ZLIB", 4))
|
2016-10-13 06:36:31 +08:00
|
|
|
fatal(getName(this) + ": broken ZLIB-compressed section");
|
|
|
|
return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)};
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
|
|
|
|
if (!zlib::isAvailable())
|
|
|
|
fatal(getName(this) +
|
|
|
|
": build lld with zlib to enable compressed sections support");
|
2016-07-07 11:55:55 +08:00
|
|
|
|
2016-10-13 06:36:31 +08:00
|
|
|
// This section is compressed. Here we decompress it. Ideally, all
|
|
|
|
// compressed sections have SHF_COMPRESSED bit and their contents
|
|
|
|
// start with headers of Elf_Chdr type. However, sections whose
|
|
|
|
// names start with ".zdebug_" don't have the bit and contains a raw
|
|
|
|
// ZLIB-compressed data (which is a bad thing because section names
|
|
|
|
// shouldn't be significant in ELF.) We need to be able to read both.
|
|
|
|
ArrayRef<uint8_t> Buf; // Compressed data
|
|
|
|
size_t Size; // Uncompressed size
|
2016-10-26 20:36:56 +08:00
|
|
|
if (Flags & SHF_COMPRESSED)
|
2016-10-13 06:36:31 +08:00
|
|
|
std::tie(Buf, Size) = getElfCompressedData(Data);
|
|
|
|
else
|
|
|
|
std::tie(Buf, Size) = getRawCompressedData(Data);
|
|
|
|
|
|
|
|
// Uncompress Buf.
|
|
|
|
UncompressedData.reset(new uint8_t[Size]);
|
2016-10-13 08:13:15 +08:00
|
|
|
if (zlib::uncompress(toStringRef(Buf), (char *)UncompressedData.get(),
|
|
|
|
Size) != zlib::StatusOK)
|
2016-10-13 06:36:31 +08:00
|
|
|
fatal(getName(this) + ": error while uncompressing section");
|
|
|
|
Data = ArrayRef<uint8_t>(UncompressedData.get(), Size);
|
2016-06-24 19:18:44 +08:00
|
|
|
}
|
|
|
|
|
2015-10-20 05:00:02 +08:00
|
|
|
template <class ELFT>
|
2016-04-04 22:04:16 +08:00
|
|
|
typename ELFT::uint
|
2016-06-23 12:33:42 +08:00
|
|
|
InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const {
|
2016-04-04 22:04:16 +08:00
|
|
|
return getOffset(Sym.Value);
|
2015-10-20 05:00:02 +08:00
|
|
|
}
|
|
|
|
|
2016-10-20 16:36:42 +08:00
|
|
|
template <class ELFT>
|
|
|
|
InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const {
|
2016-10-26 20:36:56 +08:00
|
|
|
if ((Flags & SHF_LINK_ORDER) && Link != 0)
|
|
|
|
return getFile()->getSections()[Link];
|
2016-10-10 17:39:26 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-10-26 08:54:03 +08:00
|
|
|
template <class ELFT>
|
|
|
|
InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type,
|
2016-10-28 01:45:40 +08:00
|
|
|
uintX_t Addralign, ArrayRef<uint8_t> Data,
|
|
|
|
StringRef Name)
|
2016-10-26 08:54:03 +08:00
|
|
|
: InputSectionBase<ELFT>(nullptr, Flags, Type,
|
|
|
|
/*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign,
|
2016-10-28 01:45:40 +08:00
|
|
|
Data, Name, Base::Regular) {}
|
2016-10-26 08:54:03 +08:00
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
template <class ELFT>
|
2016-03-12 02:46:51 +08:00
|
|
|
InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F,
|
2016-09-08 22:06:08 +08:00
|
|
|
const Elf_Shdr *Header, StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool InputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == Base::Regular;
|
2015-10-20 05:00:02 +08:00
|
|
|
}
|
2015-07-25 05:03:07 +08:00
|
|
|
|
2016-02-25 16:23:37 +08:00
|
|
|
template <class ELFT>
|
|
|
|
InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() {
|
2016-10-26 20:36:56 +08:00
|
|
|
assert(this->Type == SHT_RELA || this->Type == SHT_REL);
|
2016-02-25 16:23:37 +08:00
|
|
|
ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections();
|
2016-10-26 20:36:56 +08:00
|
|
|
return Sections[this->Info];
|
2016-02-25 16:23:37 +08:00
|
|
|
}
|
|
|
|
|
2016-10-20 16:36:42 +08:00
|
|
|
template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) {
|
2016-07-09 00:10:27 +08:00
|
|
|
Thunks.push_back(T);
|
2016-04-01 05:26:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const {
|
2016-10-26 08:54:03 +08:00
|
|
|
return this->Data.size();
|
2016-04-01 05:26:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const {
|
2016-07-09 00:10:27 +08:00
|
|
|
uint64_t Total = 0;
|
|
|
|
for (const Thunk<ELFT> *T : Thunks)
|
|
|
|
Total += T->size();
|
|
|
|
return Total;
|
2016-04-01 05:26:23 +08:00
|
|
|
}
|
|
|
|
|
2016-02-25 16:23:37 +08:00
|
|
|
// This is used for -r. We can't use memcpy to copy relocations because we need
|
|
|
|
// to update symbol table offset and section index for each relocation. So we
|
|
|
|
// copy relocations one by one.
|
|
|
|
template <class ELFT>
|
2016-03-13 13:06:50 +08:00
|
|
|
template <class RelTy>
|
2016-04-05 22:47:28 +08:00
|
|
|
void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
2016-02-25 16:23:37 +08:00
|
|
|
InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection();
|
|
|
|
|
2016-03-13 13:06:50 +08:00
|
|
|
for (const RelTy &Rel : Rels) {
|
2016-02-25 16:23:37 +08:00
|
|
|
uint32_t Type = Rel.getType(Config->Mips64EL);
|
2016-04-27 07:52:44 +08:00
|
|
|
SymbolBody &Body = this->File->getRelocTargetSym(Rel);
|
2016-02-25 16:23:37 +08:00
|
|
|
|
2016-08-02 16:49:57 +08:00
|
|
|
Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf);
|
2016-03-13 13:06:50 +08:00
|
|
|
Buf += sizeof(RelTy);
|
2016-02-25 16:23:37 +08:00
|
|
|
|
2016-08-02 16:49:57 +08:00
|
|
|
if (Config->Rela)
|
|
|
|
P->r_addend = getAddend<ELFT>(Rel);
|
2016-02-25 16:23:37 +08:00
|
|
|
P->r_offset = RelocatedSection->getOffset(Rel.r_offset);
|
2016-03-11 20:06:30 +08:00
|
|
|
P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL);
|
2016-02-25 16:23:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-13 09:40:19 +08:00
|
|
|
// Page(Expr) is the page address of the expression Expr, defined
|
|
|
|
// as (Expr & ~0xFFF). (This applies even if the machine page size
|
|
|
|
// supported by the platform has a different value.)
|
|
|
|
static uint64_t getAArch64Page(uint64_t Expr) {
|
|
|
|
return Expr & (~static_cast<uint64_t>(0xFFF));
|
2016-03-30 07:05:59 +08:00
|
|
|
}
|
|
|
|
|
2015-09-22 06:01:00 +08:00
|
|
|
template <class ELFT>
|
2016-07-12 11:49:41 +08:00
|
|
|
static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A,
|
|
|
|
typename ELFT::uint P,
|
|
|
|
const SymbolBody &Body, RelExpr Expr) {
|
2016-04-13 09:40:19 +08:00
|
|
|
switch (Expr) {
|
2016-05-04 22:44:22 +08:00
|
|
|
case R_HINT:
|
2016-10-20 17:59:26 +08:00
|
|
|
case R_TLSDESC_CALL:
|
2016-05-04 22:44:22 +08:00
|
|
|
llvm_unreachable("cannot relocate hint relocs");
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_TLSLD:
|
2016-09-01 07:01:13 +08:00
|
|
|
return Out<ELFT>::Got->getTlsIndexOff() + A - Out<ELFT>::Got->getSize();
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_TLSLD_PC:
|
|
|
|
return Out<ELFT>::Got->getTlsIndexVA() + A - P;
|
2016-07-09 00:10:27 +08:00
|
|
|
case R_THUNK_ABS:
|
|
|
|
return Body.getThunkVA<ELFT>() + A;
|
|
|
|
case R_THUNK_PC:
|
|
|
|
case R_THUNK_PLT_PC:
|
|
|
|
return Body.getThunkVA<ELFT>() + A - P;
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_PPC_TOC:
|
|
|
|
return getPPC64TocBase() + A;
|
|
|
|
case R_TLSGD:
|
2016-04-18 20:07:13 +08:00
|
|
|
return Out<ELFT>::Got->getGlobalDynOffset(Body) + A -
|
2016-09-01 07:01:13 +08:00
|
|
|
Out<ELFT>::Got->getSize();
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_TLSGD_PC:
|
|
|
|
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A - P;
|
2016-06-03 03:49:53 +08:00
|
|
|
case R_TLSDESC:
|
|
|
|
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A;
|
|
|
|
case R_TLSDESC_PAGE:
|
|
|
|
return getAArch64Page(Out<ELFT>::Got->getGlobalDynAddr(Body) + A) -
|
|
|
|
getAArch64Page(P);
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_PLT:
|
|
|
|
return Body.getPltVA<ELFT>() + A;
|
|
|
|
case R_PLT_PC:
|
|
|
|
case R_PPC_PLT_OPD:
|
|
|
|
return Body.getPltVA<ELFT>() + A - P;
|
|
|
|
case R_SIZE:
|
|
|
|
return Body.getSize<ELFT>() + A;
|
2016-04-18 20:07:13 +08:00
|
|
|
case R_GOTREL:
|
|
|
|
return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA();
|
2016-09-01 07:24:11 +08:00
|
|
|
case R_GOTREL_FROM_END:
|
|
|
|
return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA() -
|
|
|
|
Out<ELFT>::Got->getSize();
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE_END:
|
2016-04-18 20:07:13 +08:00
|
|
|
case R_GOT_FROM_END:
|
2016-09-01 07:01:13 +08:00
|
|
|
return Body.getGotOffset<ELFT>() + A - Out<ELFT>::Got->getSize();
|
2016-06-05 07:33:31 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE_ABS:
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_GOT:
|
|
|
|
return Body.getGotVA<ELFT>() + A;
|
2016-06-05 07:33:31 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_GOT_PAGE_PC:
|
|
|
|
return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P);
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE:
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_GOT_PC:
|
|
|
|
return Body.getGotVA<ELFT>() + A - P;
|
2016-04-18 20:07:13 +08:00
|
|
|
case R_GOTONLY_PC:
|
|
|
|
return Out<ELFT>::Got->getVA() + A - P;
|
2016-09-01 07:24:11 +08:00
|
|
|
case R_GOTONLY_PC_FROM_END:
|
|
|
|
return Out<ELFT>::Got->getVA() + A - P + Out<ELFT>::Got->getSize();
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_LD_TO_LE:
|
|
|
|
case R_RELAX_TLS_IE_TO_LE:
|
|
|
|
case R_RELAX_TLS_GD_TO_LE:
|
2016-04-18 20:07:13 +08:00
|
|
|
case R_TLS:
|
2016-09-24 02:47:50 +08:00
|
|
|
// A weak undefined TLS symbol resolves to the base of the TLS
|
|
|
|
// block, i.e. gets a value of zero. If we pass --gc-sections to
|
|
|
|
// lld and .tbss is not referenced, it gets reclaimed and we don't
|
|
|
|
// create a TLS program header. Therefore, we resolve this
|
|
|
|
// statically to zero.
|
|
|
|
if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) &&
|
|
|
|
Body.symbol()->isWeak())
|
|
|
|
return 0;
|
2016-05-21 01:41:09 +08:00
|
|
|
if (Target->TcbSize)
|
|
|
|
return Body.getVA<ELFT>(A) +
|
|
|
|
alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align);
|
2016-04-18 20:44:33 +08:00
|
|
|
return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz;
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_LE_NEG:
|
2016-04-18 20:07:13 +08:00
|
|
|
case R_NEG_TLS:
|
|
|
|
return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A);
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_ABS:
|
[ELF] - Implemented support for test/binop relaxations from latest ABI.
Patch implements next relaxation from latest ABI:
"Convert memory operand of test and binop into immediate operand, where binop is one of adc, add, and, cmp, or,
sbb, sub, xor instructions, when position-independent code is disabled."
It is described in System V Application Binary Interface AMD64 Architecture Processor
Supplement Draft Version 0.99.8 (https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-r249.pdf,
B.2 "B.2 Optimize GOTPCRELX Relocations").
Differential revision: http://reviews.llvm.org/D20793
llvm-svn: 271405
2016-06-02 00:45:30 +08:00
|
|
|
case R_RELAX_GOT_PC_NOPIC:
|
2016-04-13 09:40:19 +08:00
|
|
|
return Body.getVA<ELFT>(A);
|
2016-04-20 06:46:03 +08:00
|
|
|
case R_GOT_OFF:
|
|
|
|
return Body.getGotOffset<ELFT>() + A;
|
2016-05-16 02:13:50 +08:00
|
|
|
case R_MIPS_GOT_LOCAL_PAGE:
|
2016-03-13 23:37:38 +08:00
|
|
|
// If relocation against MIPS local symbol requires GOT entry, this entry
|
|
|
|
// should be initialized by 'page address'. This address is high 16-bits
|
2016-03-30 20:45:58 +08:00
|
|
|
// of sum the symbol's value and the addend.
|
2016-04-20 06:46:03 +08:00
|
|
|
return Out<ELFT>::Got->getMipsLocalPageOffset(Body.getVA<ELFT>(A));
|
2016-06-20 05:39:37 +08:00
|
|
|
case R_MIPS_GOT_OFF:
|
2016-10-21 15:22:30 +08:00
|
|
|
case R_MIPS_GOT_OFF32:
|
2016-06-20 05:39:37 +08:00
|
|
|
// In case of MIPS if a GOT relocation has non-zero addend this addend
|
|
|
|
// should be applied to the GOT entry content not to the GOT entry offset.
|
|
|
|
// That is why we use separate expression type.
|
|
|
|
return Out<ELFT>::Got->getMipsGotOffset(Body, A);
|
2016-06-23 23:26:31 +08:00
|
|
|
case R_MIPS_TLSGD:
|
|
|
|
return Out<ELFT>::Got->getGlobalDynOffset(Body) +
|
|
|
|
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
|
|
|
|
case R_MIPS_TLSLD:
|
|
|
|
return Out<ELFT>::Got->getTlsIndexOff() +
|
|
|
|
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_PPC_OPD: {
|
|
|
|
uint64_t SymVA = Body.getVA<ELFT>(A);
|
|
|
|
// If we have an undefined weak symbol, we might get here with a symbol
|
|
|
|
// address of zero. That could overflow, but the code must be unreachable,
|
|
|
|
// so don't bother doing anything at all.
|
|
|
|
if (!SymVA)
|
|
|
|
return 0;
|
|
|
|
if (Out<ELF64BE>::Opd) {
|
|
|
|
// If this is a local call, and we currently have the address of a
|
|
|
|
// function-descriptor, get the underlying code address instead.
|
|
|
|
uint64_t OpdStart = Out<ELF64BE>::Opd->getVA();
|
|
|
|
uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize();
|
|
|
|
bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd;
|
|
|
|
if (InOpd)
|
|
|
|
SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]);
|
|
|
|
}
|
|
|
|
return SymVA - P;
|
|
|
|
}
|
|
|
|
case R_PC:
|
2016-05-25 22:31:37 +08:00
|
|
|
case R_RELAX_GOT_PC:
|
2016-04-13 09:40:19 +08:00
|
|
|
return Body.getVA<ELFT>(A) - P;
|
2016-06-05 03:11:14 +08:00
|
|
|
case R_PLT_PAGE_PC:
|
2016-04-13 09:40:19 +08:00
|
|
|
case R_PAGE_PC:
|
|
|
|
return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P);
|
|
|
|
}
|
|
|
|
llvm_unreachable("Invalid expression");
|
2016-03-13 23:37:38 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 02:42:04 +08:00
|
|
|
// This function applies relocations to sections without SHF_ALLOC bit.
|
|
|
|
// Such sections are never mapped to memory at runtime. Debug sections are
|
|
|
|
// an example. Relocations in non-alloc sections are much easier to
|
|
|
|
// handle than in allocated sections because it will never need complex
|
|
|
|
// treatement such as GOT or PLT (because at runtime no one refers them).
|
|
|
|
// So, we handle relocations for non-alloc sections directly in this
|
|
|
|
// function as a performance optimization.
|
|
|
|
template <class ELFT>
|
|
|
|
template <class RelTy>
|
|
|
|
void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
|
|
|
for (const RelTy &Rel : Rels) {
|
|
|
|
uint32_t Type = Rel.getType(Config->Mips64EL);
|
2016-05-22 03:48:54 +08:00
|
|
|
uintX_t Offset = this->getOffset(Rel.r_offset);
|
|
|
|
uint8_t *BufLoc = Buf + Offset;
|
2016-04-29 11:21:08 +08:00
|
|
|
uintX_t Addend = getAddend<ELFT>(Rel);
|
|
|
|
if (!RelTy::IsRela)
|
2016-05-22 03:48:54 +08:00
|
|
|
Addend += Target->getImplicitAddend(BufLoc, Type);
|
2016-04-29 02:42:04 +08:00
|
|
|
|
2016-04-29 11:21:08 +08:00
|
|
|
SymbolBody &Sym = this->File->getRelocTargetSym(Rel);
|
2016-04-29 02:42:04 +08:00
|
|
|
if (Target->getRelExpr(Type, Sym) != R_ABS) {
|
2016-08-03 12:39:42 +08:00
|
|
|
error(getName(this) + " has non-ABS reloc");
|
2016-04-29 02:42:04 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-04-29 11:21:08 +08:00
|
|
|
uintX_t AddrLoc = this->OutSec->getVA() + Offset;
|
2016-11-02 04:11:01 +08:00
|
|
|
uint64_t SymVA = 0;
|
|
|
|
if (!Sym.isTls() || Out<ELFT>::TlsPhdr)
|
|
|
|
SymVA = SignExtend64<sizeof(uintX_t) * 8>(
|
|
|
|
getSymVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS));
|
2016-04-29 02:42:04 +08:00
|
|
|
Target->relocateOne(BufLoc, Type, SymVA);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-19 02:11:26 +08:00
|
|
|
template <class ELFT>
|
2016-04-13 09:40:19 +08:00
|
|
|
void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) {
|
2016-04-29 02:42:04 +08:00
|
|
|
// scanReloc function in Writer.cpp constructs Relocations
|
|
|
|
// vector only for SHF_ALLOC'ed sections. For other sections,
|
|
|
|
// we handle relocations directly here.
|
|
|
|
auto *IS = dyn_cast<InputSection<ELFT>>(this);
|
2016-10-26 20:36:56 +08:00
|
|
|
if (IS && !(IS->Flags & SHF_ALLOC)) {
|
2016-04-29 02:42:04 +08:00
|
|
|
for (const Elf_Shdr *RelSec : IS->RelocSections) {
|
|
|
|
if (RelSec->sh_type == SHT_RELA)
|
2016-11-04 03:07:44 +08:00
|
|
|
IS->relocateNonAlloc(Buf, check(IS->File->getObj().relas(RelSec)));
|
2016-04-29 02:42:04 +08:00
|
|
|
else
|
2016-11-04 03:07:44 +08:00
|
|
|
IS->relocateNonAlloc(Buf, check(IS->File->getObj().rels(RelSec)));
|
2016-04-29 02:42:04 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-04-13 09:40:19 +08:00
|
|
|
const unsigned Bits = sizeof(uintX_t) * 8;
|
2016-09-08 04:37:34 +08:00
|
|
|
for (const Relocation &Rel : Relocations) {
|
2016-08-19 23:46:28 +08:00
|
|
|
uintX_t Offset = getOffset(Rel.Offset);
|
2016-03-19 02:11:26 +08:00
|
|
|
uint8_t *BufLoc = Buf + Offset;
|
2016-04-13 09:40:19 +08:00
|
|
|
uint32_t Type = Rel.Type;
|
|
|
|
uintX_t A = Rel.Addend;
|
|
|
|
|
2016-03-19 02:11:26 +08:00
|
|
|
uintX_t AddrLoc = OutSec->getVA() + Offset;
|
2016-04-13 09:40:19 +08:00
|
|
|
RelExpr Expr = Rel.Expr;
|
2016-07-12 11:49:41 +08:00
|
|
|
uint64_t SymVA =
|
|
|
|
SignExtend64<Bits>(getSymVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr));
|
2016-03-19 02:11:26 +08:00
|
|
|
|
2016-05-21 05:14:06 +08:00
|
|
|
switch (Expr) {
|
2016-05-25 22:31:37 +08:00
|
|
|
case R_RELAX_GOT_PC:
|
[ELF] - Implemented support for test/binop relaxations from latest ABI.
Patch implements next relaxation from latest ABI:
"Convert memory operand of test and binop into immediate operand, where binop is one of adc, add, and, cmp, or,
sbb, sub, xor instructions, when position-independent code is disabled."
It is described in System V Application Binary Interface AMD64 Architecture Processor
Supplement Draft Version 0.99.8 (https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-r249.pdf,
B.2 "B.2 Optimize GOTPCRELX Relocations").
Differential revision: http://reviews.llvm.org/D20793
llvm-svn: 271405
2016-06-02 00:45:30 +08:00
|
|
|
case R_RELAX_GOT_PC_NOPIC:
|
2016-05-25 22:31:37 +08:00
|
|
|
Target->relaxGot(BufLoc, SymVA);
|
|
|
|
break;
|
2016-05-21 05:14:06 +08:00
|
|
|
case R_RELAX_TLS_IE_TO_LE:
|
|
|
|
Target->relaxTlsIeToLe(BufLoc, Type, SymVA);
|
|
|
|
break;
|
|
|
|
case R_RELAX_TLS_LD_TO_LE:
|
|
|
|
Target->relaxTlsLdToLe(BufLoc, Type, SymVA);
|
|
|
|
break;
|
|
|
|
case R_RELAX_TLS_GD_TO_LE:
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_LE_NEG:
|
2016-05-21 05:14:06 +08:00
|
|
|
Target->relaxTlsGdToLe(BufLoc, Type, SymVA);
|
|
|
|
break;
|
|
|
|
case R_RELAX_TLS_GD_TO_IE:
|
2016-06-05 07:33:31 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE_ABS:
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
2016-06-05 07:22:34 +08:00
|
|
|
case R_RELAX_TLS_GD_TO_IE_END:
|
2016-05-21 05:14:06 +08:00
|
|
|
Target->relaxTlsGdToIe(BufLoc, Type, SymVA);
|
|
|
|
break;
|
2016-05-24 20:17:11 +08:00
|
|
|
case R_PPC_PLT_OPD:
|
|
|
|
// Patch a nop (0x60000000) to a ld.
|
|
|
|
if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000)
|
|
|
|
write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1)
|
2016-10-20 16:36:42 +08:00
|
|
|
// fallthrough
|
2016-05-21 05:14:06 +08:00
|
|
|
default:
|
|
|
|
Target->relocateOne(BufLoc, Type, SymVA);
|
|
|
|
break;
|
|
|
|
}
|
2015-09-22 06:01:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-08 03:18:16 +08:00
|
|
|
template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) {
|
2016-10-26 20:36:56 +08:00
|
|
|
if (this->Type == SHT_NOBITS)
|
2015-07-25 05:03:07 +08:00
|
|
|
return;
|
2016-02-25 16:23:37 +08:00
|
|
|
|
2016-03-30 08:43:49 +08:00
|
|
|
// If -r is given, then an InputSection may be a relocation section.
|
2016-10-26 20:36:56 +08:00
|
|
|
if (this->Type == SHT_RELA) {
|
2016-10-26 08:54:03 +08:00
|
|
|
copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>());
|
2016-02-25 16:23:37 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-10-26 20:36:56 +08:00
|
|
|
if (this->Type == SHT_REL) {
|
2016-10-26 08:54:03 +08:00
|
|
|
copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>());
|
2016-02-25 16:23:37 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-03-30 08:43:49 +08:00
|
|
|
// Copy section contents from source object file to output file.
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> Data = this->Data;
|
2015-10-15 09:58:40 +08:00
|
|
|
memcpy(Buf + OutSecOff, Data.data(), Data.size());
|
2015-09-22 06:01:00 +08:00
|
|
|
|
2016-03-19 02:11:26 +08:00
|
|
|
// Iterate over all relocation sections that apply to this section.
|
2016-03-30 08:43:49 +08:00
|
|
|
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
|
2016-04-13 09:40:19 +08:00
|
|
|
this->relocate(Buf, BufEnd);
|
2016-04-01 05:26:23 +08:00
|
|
|
|
|
|
|
// The section might have a data/code generated by the linker and need
|
|
|
|
// to be written after the section. Usually these are thunks - small piece
|
|
|
|
// of code used to jump between "incompatible" functions like PIC and non-PIC
|
|
|
|
// or if the jump target too far and its address does not fit to the short
|
|
|
|
// jump istruction.
|
|
|
|
if (!Thunks.empty()) {
|
|
|
|
Buf += OutSecOff + getThunkOff();
|
2016-07-09 00:10:27 +08:00
|
|
|
for (const Thunk<ELFT> *T : Thunks) {
|
|
|
|
T->writeTo(Buf);
|
|
|
|
Buf += T->size();
|
2016-04-01 05:26:23 +08:00
|
|
|
}
|
|
|
|
}
|
2015-07-25 05:03:07 +08:00
|
|
|
}
|
|
|
|
|
2016-02-26 02:43:51 +08:00
|
|
|
template <class ELFT>
|
|
|
|
void InputSection<ELFT>::replace(InputSection<ELFT> *Other) {
|
2016-09-14 08:09:50 +08:00
|
|
|
assert(Other->Alignment <= this->Alignment);
|
2016-02-26 02:43:51 +08:00
|
|
|
Other->Repl = this->Repl;
|
|
|
|
Other->Live = false;
|
|
|
|
}
|
|
|
|
|
2015-11-12 03:54:14 +08:00
|
|
|
template <class ELFT>
|
2016-05-24 12:19:20 +08:00
|
|
|
EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F,
|
2016-09-08 22:06:08 +08:00
|
|
|
const Elf_Shdr *Header, StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) {
|
2015-12-24 18:08:54 +08:00
|
|
|
// Mark .eh_frame sections as live by default because there are
|
|
|
|
// usually no relocations that point to .eh_frames. Otherwise,
|
2016-02-18 23:17:01 +08:00
|
|
|
// the garbage collector would drop all .eh_frame sections.
|
2015-12-24 18:08:54 +08:00
|
|
|
this->Live = true;
|
|
|
|
}
|
2015-11-12 03:54:14 +08:00
|
|
|
|
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool EhInputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == InputSectionBase<ELFT>::EHFrame;
|
2015-11-12 03:54:14 +08:00
|
|
|
}
|
|
|
|
|
2016-07-22 04:18:30 +08:00
|
|
|
// Returns the index of the first relocation that points to a region between
|
|
|
|
// Begin and Begin+Size.
|
|
|
|
template <class IntTy, class RelTy>
|
|
|
|
static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels,
|
|
|
|
unsigned &RelocI) {
|
|
|
|
// Start search from RelocI for fast access. That works because the
|
|
|
|
// relocations are sorted in .eh_frame.
|
|
|
|
for (unsigned N = Rels.size(); RelocI < N; ++RelocI) {
|
|
|
|
const RelTy &Rel = Rels[RelocI];
|
|
|
|
if (Rel.r_offset < Begin)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (Rel.r_offset < Begin + Size)
|
|
|
|
return RelocI;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2016-05-23 07:53:00 +08:00
|
|
|
// .eh_frame is a sequence of CIE or FDE records.
|
|
|
|
// This function splits an input section into records and returns them.
|
2016-10-20 16:36:42 +08:00
|
|
|
template <class ELFT> void EhInputSection<ELFT>::split() {
|
2016-07-22 04:18:30 +08:00
|
|
|
// Early exit if already split.
|
|
|
|
if (!this->Pieces.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (RelocSection) {
|
2016-11-04 04:44:50 +08:00
|
|
|
ELFFile<ELFT> Obj = this->File->getObj();
|
2016-07-22 04:18:30 +08:00
|
|
|
if (RelocSection->sh_type == SHT_RELA)
|
2016-11-04 03:07:44 +08:00
|
|
|
split(check(Obj.relas(RelocSection)));
|
2016-07-22 04:18:30 +08:00
|
|
|
else
|
2016-11-04 03:07:44 +08:00
|
|
|
split(check(Obj.rels(RelocSection)));
|
2016-07-22 04:18:30 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT>
|
|
|
|
template <class RelTy>
|
|
|
|
void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) {
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> Data = this->Data;
|
2016-07-22 04:18:30 +08:00
|
|
|
unsigned RelI = 0;
|
2016-05-23 07:53:00 +08:00
|
|
|
for (size_t Off = 0, End = Data.size(); Off != End;) {
|
2016-05-24 10:55:45 +08:00
|
|
|
size_t Size = readEhRecordSize<ELFT>(Data.slice(Off));
|
2016-07-22 04:18:30 +08:00
|
|
|
this->Pieces.emplace_back(Off, Data.slice(Off, Size),
|
|
|
|
getReloc(Off, Size, Rels, RelI));
|
2016-05-23 07:53:00 +08:00
|
|
|
// The empty record is the end marker.
|
|
|
|
if (Size == 4)
|
|
|
|
break;
|
|
|
|
Off += Size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-22 09:15:32 +08:00
|
|
|
static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) {
|
2016-04-23 06:09:35 +08:00
|
|
|
// Optimize the common case.
|
2016-05-22 09:15:32 +08:00
|
|
|
StringRef S((const char *)A.data(), A.size());
|
2016-04-23 06:09:35 +08:00
|
|
|
if (EntSize == 1)
|
|
|
|
return S.find(0);
|
|
|
|
|
|
|
|
for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
|
|
|
|
const char *B = S.begin() + I;
|
|
|
|
if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
|
|
|
|
return I;
|
|
|
|
}
|
|
|
|
return StringRef::npos;
|
|
|
|
}
|
|
|
|
|
2016-05-23 08:40:24 +08:00
|
|
|
// Split SHF_STRINGS section. Such section is a sequence of
|
|
|
|
// null-terminated strings.
|
2016-08-03 12:39:42 +08:00
|
|
|
template <class ELFT>
|
|
|
|
std::vector<SectionPiece>
|
|
|
|
MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) {
|
2016-05-23 08:40:24 +08:00
|
|
|
std::vector<SectionPiece> V;
|
|
|
|
size_t Off = 0;
|
2016-10-26 20:36:56 +08:00
|
|
|
bool IsAlloca = this->Flags & SHF_ALLOC;
|
2016-05-23 08:40:24 +08:00
|
|
|
while (!Data.empty()) {
|
|
|
|
size_t End = findNull(Data, EntSize);
|
|
|
|
if (End == StringRef::npos)
|
2016-08-03 12:39:42 +08:00
|
|
|
fatal(getName(this) + ": string is not null terminated");
|
2016-05-23 08:40:24 +08:00
|
|
|
size_t Size = End + EntSize;
|
2016-10-20 18:55:58 +08:00
|
|
|
V.emplace_back(Off, !IsAlloca);
|
|
|
|
Hashes.push_back(hash_value(toStringRef(Data.slice(0, Size))));
|
2016-05-23 08:40:24 +08:00
|
|
|
Data = Data.slice(Size);
|
|
|
|
Off += Size;
|
2016-04-23 06:09:35 +08:00
|
|
|
}
|
2016-05-23 08:40:24 +08:00
|
|
|
return V;
|
|
|
|
}
|
2016-04-23 06:09:35 +08:00
|
|
|
|
2016-10-20 18:55:58 +08:00
|
|
|
template <class ELFT>
|
|
|
|
ArrayRef<uint8_t> MergeInputSection<ELFT>::getData(
|
|
|
|
std::vector<SectionPiece>::const_iterator I) const {
|
|
|
|
auto Next = I + 1;
|
|
|
|
size_t End = Next == Pieces.end() ? this->Data.size() : Next->InputOff;
|
|
|
|
return this->Data.slice(I->InputOff, End - I->InputOff);
|
|
|
|
}
|
|
|
|
|
2016-05-23 08:40:24 +08:00
|
|
|
// Split non-SHF_STRINGS section. Such section is a sequence of
|
|
|
|
// fixed size records.
|
2016-08-03 12:39:42 +08:00
|
|
|
template <class ELFT>
|
|
|
|
std::vector<SectionPiece>
|
|
|
|
MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data,
|
|
|
|
size_t EntSize) {
|
2016-05-23 08:40:24 +08:00
|
|
|
std::vector<SectionPiece> V;
|
2016-04-23 06:09:35 +08:00
|
|
|
size_t Size = Data.size();
|
|
|
|
assert((Size % EntSize) == 0);
|
2016-10-26 20:36:56 +08:00
|
|
|
bool IsAlloca = this->Flags & SHF_ALLOC;
|
2016-10-20 18:55:58 +08:00
|
|
|
for (unsigned I = 0, N = Size; I != N; I += EntSize) {
|
|
|
|
Hashes.push_back(hash_value(toStringRef(Data.slice(I, EntSize))));
|
|
|
|
V.emplace_back(I, !IsAlloca);
|
|
|
|
}
|
2016-05-23 08:40:24 +08:00
|
|
|
return V;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT>
|
|
|
|
MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F,
|
2016-09-08 22:06:08 +08:00
|
|
|
const Elf_Shdr *Header,
|
|
|
|
StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {}
|
2016-05-24 00:55:43 +08:00
|
|
|
|
|
|
|
template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() {
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> Data = this->Data;
|
2016-10-26 20:36:56 +08:00
|
|
|
uintX_t EntSize = this->Entsize;
|
|
|
|
if (this->Flags & SHF_STRINGS)
|
2016-05-23 08:40:24 +08:00
|
|
|
this->Pieces = splitStrings(Data, EntSize);
|
|
|
|
else
|
|
|
|
this->Pieces = splitNonStrings(Data, EntSize);
|
2016-05-24 00:55:43 +08:00
|
|
|
|
2016-10-26 20:36:56 +08:00
|
|
|
if (Config->GcSections && (this->Flags & SHF_ALLOC))
|
2016-10-06 01:02:09 +08:00
|
|
|
for (uintX_t Off : LiveOffsets)
|
|
|
|
this->getSectionPiece(Off)->Live = true;
|
2016-04-23 06:09:35 +08:00
|
|
|
}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool MergeInputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == InputSectionBase<ELFT>::Merge;
|
2015-10-20 05:00:02 +08:00
|
|
|
}
|
|
|
|
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
// Do binary search to get a section piece at a given input offset.
|
2015-10-20 05:00:02 +08:00
|
|
|
template <class ELFT>
|
2016-07-21 21:32:37 +08:00
|
|
|
SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
|
|
|
|
auto *This = static_cast<const MergeInputSection<ELFT> *>(this);
|
2016-06-23 12:33:42 +08:00
|
|
|
return const_cast<SectionPiece *>(This->getSectionPiece(Offset));
|
|
|
|
}
|
|
|
|
|
2016-10-19 22:17:36 +08:00
|
|
|
template <class It, class T, class Compare>
|
|
|
|
static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) {
|
|
|
|
size_t Size = std::distance(First, Last);
|
|
|
|
assert(Size != 0);
|
|
|
|
while (Size != 1) {
|
|
|
|
size_t H = Size / 2;
|
|
|
|
const It MI = First + H;
|
|
|
|
Size -= H;
|
|
|
|
First = Comp(Value, *MI) ? First : First + H;
|
|
|
|
}
|
|
|
|
return Comp(Value, *First) ? First : First + 1;
|
|
|
|
}
|
|
|
|
|
2016-06-23 12:33:42 +08:00
|
|
|
template <class ELFT>
|
|
|
|
const SectionPiece *
|
2016-07-21 21:32:37 +08:00
|
|
|
MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const {
|
2016-09-12 21:13:53 +08:00
|
|
|
uintX_t Size = this->Data.size();
|
2015-10-25 06:51:01 +08:00
|
|
|
if (Offset >= Size)
|
2016-08-03 12:39:42 +08:00
|
|
|
fatal(getName(this) + ": entry is past the end of the section");
|
2015-10-25 06:51:01 +08:00
|
|
|
|
|
|
|
// Find the element this offset points to.
|
2016-10-19 22:17:36 +08:00
|
|
|
auto I = fastUpperBound(
|
2016-05-22 08:13:04 +08:00
|
|
|
Pieces.begin(), Pieces.end(), Offset,
|
|
|
|
[](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; });
|
2015-10-25 06:51:01 +08:00
|
|
|
--I;
|
2016-05-22 08:41:38 +08:00
|
|
|
return &*I;
|
2015-11-12 03:54:14 +08:00
|
|
|
}
|
|
|
|
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
// Returns the offset in an output section for a given input offset.
|
|
|
|
// Because contents of a mergeable section is not contiguous in output,
|
|
|
|
// it is not just an addition to a base output offset.
|
2015-11-12 03:54:14 +08:00
|
|
|
template <class ELFT>
|
2016-06-23 12:33:42 +08:00
|
|
|
typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const {
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
auto It = OffsetMap.find(Offset);
|
|
|
|
if (It != OffsetMap.end())
|
|
|
|
return It->second;
|
|
|
|
|
2016-09-30 14:48:09 +08:00
|
|
|
if (!this->Live)
|
|
|
|
return 0;
|
|
|
|
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
// If Offset is not at beginning of a section piece, it is not in the map.
|
|
|
|
// In that case we need to search from the original section piece vector.
|
2016-06-23 12:33:42 +08:00
|
|
|
const SectionPiece &Piece = *this->getSectionPiece(Offset);
|
2016-09-30 14:37:29 +08:00
|
|
|
if (!Piece.Live)
|
|
|
|
return 0;
|
|
|
|
|
2016-05-22 08:13:04 +08:00
|
|
|
uintX_t Addend = Offset - Piece.InputOff;
|
2016-05-29 02:40:38 +08:00
|
|
|
return Piece.OutputOff + Addend;
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create a map from input offsets to output offsets for all section pieces.
|
|
|
|
// It is called after finalize().
|
2016-10-20 16:36:42 +08:00
|
|
|
template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() {
|
2016-10-18 23:31:23 +08:00
|
|
|
OffsetMap.reserve(this->Pieces.size());
|
2016-10-20 18:55:58 +08:00
|
|
|
auto HashI = Hashes.begin();
|
|
|
|
for (auto I = Pieces.begin(), E = Pieces.end(); I != E; ++I) {
|
|
|
|
uint32_t Hash = *HashI;
|
|
|
|
++HashI;
|
|
|
|
SectionPiece &Piece = *I;
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
if (!Piece.Live)
|
|
|
|
continue;
|
2016-10-20 18:55:58 +08:00
|
|
|
if (Piece.OutputOff == -1) {
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
// Offsets of tail-merged strings are computed lazily.
|
|
|
|
auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
|
2016-10-20 18:55:58 +08:00
|
|
|
ArrayRef<uint8_t> D = this->getData(I);
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
StringRef S((const char *)D.data(), D.size());
|
2016-10-20 18:55:58 +08:00
|
|
|
CachedHashStringRef V(S, Hash);
|
2016-10-06 03:36:02 +08:00
|
|
|
Piece.OutputOff = OutSec->getOffset(V);
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
}
|
|
|
|
OffsetMap[Piece.InputOff] = Piece.OutputOff;
|
|
|
|
}
|
2015-08-14 03:18:30 +08:00
|
|
|
}
|
|
|
|
|
2015-12-20 18:57:34 +08:00
|
|
|
template <class ELFT>
|
2016-03-12 02:46:51 +08:00
|
|
|
MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(elf::ObjectFile<ELFT> *F,
|
2016-09-08 22:06:08 +08:00
|
|
|
const Elf_Shdr *Hdr,
|
|
|
|
StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Hdr, Name,
|
|
|
|
InputSectionBase<ELFT>::MipsReginfo) {
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> Data = this->Data;
|
2016-01-07 06:42:43 +08:00
|
|
|
// Initialize this->Reginfo.
|
2016-09-12 21:13:53 +08:00
|
|
|
if (Data.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
|
2016-08-03 12:39:42 +08:00
|
|
|
error(getName(this) + ": invalid size of .reginfo section");
|
2016-05-04 18:07:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-09-12 21:13:53 +08:00
|
|
|
Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(Data.data());
|
2016-10-04 16:24:25 +08:00
|
|
|
if (Config->Relocatable && Reginfo->ri_gp_value)
|
2016-09-29 20:58:48 +08:00
|
|
|
error(getName(this) + ": unsupported non-zero ri_gp_value");
|
2015-12-25 21:02:13 +08:00
|
|
|
}
|
|
|
|
|
2015-12-20 18:57:34 +08:00
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool MipsReginfoInputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == InputSectionBase<ELFT>::MipsReginfo;
|
2015-12-20 18:57:34 +08:00
|
|
|
}
|
|
|
|
|
2016-05-04 18:07:38 +08:00
|
|
|
template <class ELFT>
|
|
|
|
MipsOptionsInputSection<ELFT>::MipsOptionsInputSection(elf::ObjectFile<ELFT> *F,
|
2016-09-08 22:06:08 +08:00
|
|
|
const Elf_Shdr *Hdr,
|
|
|
|
StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Hdr, Name,
|
|
|
|
InputSectionBase<ELFT>::MipsOptions) {
|
2016-05-04 18:07:38 +08:00
|
|
|
// Find ODK_REGINFO option in the section's content.
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> D = this->Data;
|
2016-05-04 18:07:38 +08:00
|
|
|
while (!D.empty()) {
|
|
|
|
if (D.size() < sizeof(Elf_Mips_Options<ELFT>)) {
|
2016-08-03 12:39:42 +08:00
|
|
|
error(getName(this) + ": invalid size of .MIPS.options section");
|
2016-05-04 18:07:38 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(D.data());
|
|
|
|
if (O->kind == ODK_REGINFO) {
|
|
|
|
Reginfo = &O->getRegInfo();
|
2016-10-04 16:24:25 +08:00
|
|
|
if (Config->Relocatable && Reginfo->ri_gp_value)
|
2016-09-29 20:58:48 +08:00
|
|
|
error(getName(this) + ": unsupported non-zero ri_gp_value");
|
2016-05-04 18:07:38 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-10-04 18:23:07 +08:00
|
|
|
if (!O->size)
|
|
|
|
fatal(getName(this) + ": zero option descriptor size");
|
2016-05-04 18:07:38 +08:00
|
|
|
D = D.slice(O->size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool MipsOptionsInputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == InputSectionBase<ELFT>::MipsOptions;
|
2016-05-04 18:07:38 +08:00
|
|
|
}
|
|
|
|
|
2016-08-12 14:28:49 +08:00
|
|
|
template <class ELFT>
|
|
|
|
MipsAbiFlagsInputSection<ELFT>::MipsAbiFlagsInputSection(
|
2016-09-08 22:06:08 +08:00
|
|
|
elf::ObjectFile<ELFT> *F, const Elf_Shdr *Hdr, StringRef Name)
|
|
|
|
: InputSectionBase<ELFT>(F, Hdr, Name,
|
|
|
|
InputSectionBase<ELFT>::MipsAbiFlags) {
|
2016-08-12 14:28:49 +08:00
|
|
|
// Initialize this->Flags.
|
2016-09-12 21:13:53 +08:00
|
|
|
ArrayRef<uint8_t> Data = this->Data;
|
|
|
|
if (Data.size() != sizeof(Elf_Mips_ABIFlags<ELFT>)) {
|
2016-08-12 14:28:49 +08:00
|
|
|
error("invalid size of .MIPS.abiflags section");
|
|
|
|
return;
|
|
|
|
}
|
2016-09-12 21:13:53 +08:00
|
|
|
Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Data.data());
|
2016-08-12 14:28:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class ELFT>
|
2016-10-27 02:44:57 +08:00
|
|
|
bool MipsAbiFlagsInputSection<ELFT>::classof(const InputSectionData *S) {
|
2016-09-08 20:33:41 +08:00
|
|
|
return S->kind() == InputSectionBase<ELFT>::MipsAbiFlags;
|
2016-08-12 14:28:49 +08:00
|
|
|
}
|
|
|
|
|
2016-02-28 08:25:54 +08:00
|
|
|
template class elf::InputSectionBase<ELF32LE>;
|
|
|
|
template class elf::InputSectionBase<ELF32BE>;
|
|
|
|
template class elf::InputSectionBase<ELF64LE>;
|
|
|
|
template class elf::InputSectionBase<ELF64BE>;
|
|
|
|
|
|
|
|
template class elf::InputSection<ELF32LE>;
|
|
|
|
template class elf::InputSection<ELF32BE>;
|
|
|
|
template class elf::InputSection<ELF64LE>;
|
|
|
|
template class elf::InputSection<ELF64BE>;
|
|
|
|
|
2016-05-24 12:19:20 +08:00
|
|
|
template class elf::EhInputSection<ELF32LE>;
|
|
|
|
template class elf::EhInputSection<ELF32BE>;
|
|
|
|
template class elf::EhInputSection<ELF64LE>;
|
|
|
|
template class elf::EhInputSection<ELF64BE>;
|
2016-02-28 08:25:54 +08:00
|
|
|
|
|
|
|
template class elf::MergeInputSection<ELF32LE>;
|
|
|
|
template class elf::MergeInputSection<ELF32BE>;
|
|
|
|
template class elf::MergeInputSection<ELF64LE>;
|
|
|
|
template class elf::MergeInputSection<ELF64BE>;
|
|
|
|
|
|
|
|
template class elf::MipsReginfoInputSection<ELF32LE>;
|
|
|
|
template class elf::MipsReginfoInputSection<ELF32BE>;
|
|
|
|
template class elf::MipsReginfoInputSection<ELF64LE>;
|
|
|
|
template class elf::MipsReginfoInputSection<ELF64BE>;
|
2016-05-04 18:07:38 +08:00
|
|
|
|
|
|
|
template class elf::MipsOptionsInputSection<ELF32LE>;
|
|
|
|
template class elf::MipsOptionsInputSection<ELF32BE>;
|
|
|
|
template class elf::MipsOptionsInputSection<ELF64LE>;
|
|
|
|
template class elf::MipsOptionsInputSection<ELF64BE>;
|
2016-07-29 03:24:13 +08:00
|
|
|
|
2016-08-12 14:28:49 +08:00
|
|
|
template class elf::MipsAbiFlagsInputSection<ELF32LE>;
|
|
|
|
template class elf::MipsAbiFlagsInputSection<ELF32BE>;
|
|
|
|
template class elf::MipsAbiFlagsInputSection<ELF64LE>;
|
|
|
|
template class elf::MipsAbiFlagsInputSection<ELF64BE>;
|