2015-09-22 08:01:39 +08:00
|
|
|
|
//===- InputSection.cpp ---------------------------------------------------===//
|
2015-07-25 05:03:07 +08:00
|
|
|
|
//
|
|
|
|
|
// The LLVM Linker
|
|
|
|
|
//
|
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2015-09-22 08:01:39 +08:00
|
|
|
|
#include "InputSection.h"
|
2015-09-26 03:24:57 +08:00
|
|
|
|
#include "Config.h"
|
2016-05-24 10:55:45 +08:00
|
|
|
|
#include "EhFrame.h"
|
2015-08-28 07:15:56 +08:00
|
|
|
|
#include "InputFiles.h"
|
2016-07-12 16:50:42 +08:00
|
|
|
|
#include "LinkerScript.h"
|
2015-09-22 06:01:00 +08:00
|
|
|
|
#include "OutputSections.h"
|
2016-11-23 18:07:46 +08:00
|
|
|
|
#include "Relocations.h"
|
2018-07-18 07:16:02 +08:00
|
|
|
|
#include "SymbolTable.h"
|
2017-12-10 00:56:18 +08:00
|
|
|
|
#include "Symbols.h"
|
2016-11-10 17:48:29 +08:00
|
|
|
|
#include "SyntheticSections.h"
|
2015-09-23 02:19:46 +08:00
|
|
|
|
#include "Target.h"
|
2016-07-09 00:10:27 +08:00
|
|
|
|
#include "Thunks.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-29 04:39:17 +08:00
|
|
|
|
#include "lld/Common/Memory.h"
|
2017-01-12 18:53:31 +08:00
|
|
|
|
#include "llvm/Object/Decompressor.h"
|
2017-06-09 04:16:21 +08:00
|
|
|
|
#include "llvm/Support/Compiler.h"
|
2016-06-24 19:18:44 +08:00
|
|
|
|
#include "llvm/Support/Compression.h"
|
2016-02-26 05:33:56 +08:00
|
|
|
|
#include "llvm/Support/Endian.h"
|
2017-05-25 02:31:48 +08:00
|
|
|
|
#include "llvm/Support/Threading.h"
|
2017-10-01 05:28:49 +08:00
|
|
|
|
#include "llvm/Support/xxhash.h"
|
2018-07-18 07:16:02 +08:00
|
|
|
|
#include <algorithm>
|
Parallelize uncompress() and splitIntoPieces().
Uncompressing section contents and spliting mergeable section contents
into smaller chunks are heavy tasks. They scan entire section contents
and do CPU-intensive tasks such as uncompressing zlib-compressed data
or computing a hash value for each section piece.
Luckily, these tasks are independent to each other, so we can do that
in parallel_for_each. The number of input sections is large (as opposed
to the number of output sections), so there's a large parallelism here.
Actually the current design to call uncompress() and splitIntoPieces()
in batch was chosen with doing this in mind. Basically what we need to
do here is to replace `for` with `parallel_for_each`.
It seems this patch improves latency significantly if linked programs
contain debug info (which in turn contain lots of mergeable strings.)
For example, the latency to link Clang (debug build) improved by 20% on
my machine as shown below. Note that ld.gold took 19.2 seconds to do
the same thing.
Before:
30801.782712 task-clock (msec) # 3.652 CPUs utilized ( +- 2.59% )
104,084 context-switches # 0.003 M/sec ( +- 1.02% )
5,063 cpu-migrations # 0.164 K/sec ( +- 13.66% )
2,528,130 page-faults # 0.082 M/sec ( +- 0.47% )
85,317,809,130 cycles # 2.770 GHz ( +- 2.62% )
67,352,463,373 stalled-cycles-frontend # 78.94% frontend cycles idle ( +- 3.06% )
<not supported> stalled-cycles-backend
44,295,945,493 instructions # 0.52 insns per cycle
# 1.52 stalled cycles per insn ( +- 0.44% )
8,572,384,877 branches # 278.308 M/sec ( +- 0.66% )
141,806,726 branch-misses # 1.65% of all branches ( +- 0.13% )
8.433424003 seconds time elapsed ( +- 1.20% )
After:
35523.764575 task-clock (msec) # 5.265 CPUs utilized ( +- 2.67% )
159,107 context-switches # 0.004 M/sec ( +- 0.48% )
8,123 cpu-migrations # 0.229 K/sec ( +- 23.34% )
2,372,483 page-faults # 0.067 M/sec ( +- 0.36% )
98,395,342,152 cycles # 2.770 GHz ( +- 2.62% )
79,294,670,125 stalled-cycles-frontend # 80.59% frontend cycles idle ( +- 3.03% )
<not supported> stalled-cycles-backend
46,274,151,813 instructions # 0.47 insns per cycle
# 1.71 stalled cycles per insn ( +- 0.47% )
8,987,621,670 branches # 253.003 M/sec ( +- 0.60% )
148,900,624 branch-misses # 1.66% of all branches ( +- 0.27% )
6.747548004 seconds time elapsed ( +- 0.40% )
llvm-svn: 287946
2016-11-26 04:05:08 +08:00
|
|
|
|
#include <mutex>
|
2018-07-18 07:16:02 +08:00
|
|
|
|
#include <set>
|
|
|
|
|
#include <vector>
|
2016-02-26 05:33:56 +08:00
|
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace llvm::ELF;
|
2015-09-22 06:01:00 +08:00
|
|
|
|
using namespace llvm::object;
|
2016-10-13 06:36:31 +08:00
|
|
|
|
using namespace llvm::support;
|
2016-02-26 05:33:56 +08:00
|
|
|
|
using namespace llvm::support::endian;
|
2017-03-31 03:13:47 +08:00
|
|
|
|
using namespace llvm::sys;
|
2015-07-25 05:03:07 +08:00
|
|
|
|
|
|
|
|
|
using namespace lld;
|
2016-02-28 08:25:54 +08:00
|
|
|
|
using namespace lld::elf;
|
2015-07-25 05:03:07 +08:00
|
|
|
|
|
2017-02-27 10:32:08 +08:00
|
|
|
|
std::vector<InputSectionBase *> elf::InputSections;
|
|
|
|
|
|
2016-11-24 02:07:33 +08:00
|
|
|
|
// Returns a string to construct an error message.
|
2017-02-23 10:28:28 +08:00
|
|
|
|
std::string lld::toString(const InputSectionBase *Sec) {
|
2017-04-29 04:00:09 +08:00
|
|
|
|
return (toString(Sec->File) + ":(" + Sec->Name + ")").str();
|
2016-11-24 02:07:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-12 21:13:53 +08:00
|
|
|
|
template <class ELFT>
|
2017-12-21 10:03:39 +08:00
|
|
|
|
static ArrayRef<uint8_t> getSectionContents(ObjFile<ELFT> &File,
|
|
|
|
|
const typename ELFT::Shdr &Hdr) {
|
|
|
|
|
if (Hdr.sh_type == SHT_NOBITS)
|
|
|
|
|
return makeArrayRef<uint8_t>(nullptr, Hdr.sh_size);
|
|
|
|
|
return check(File.getObj().getSectionContents(&Hdr));
|
2016-09-12 21:13:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 10:28:28 +08:00
|
|
|
|
InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
|
|
|
|
|
uint32_t Type, uint64_t Entsize,
|
|
|
|
|
uint32_t Link, uint32_t Info,
|
2017-03-09 03:35:29 +08:00
|
|
|
|
uint32_t Alignment, ArrayRef<uint8_t> Data,
|
2017-02-23 10:28:28 +08:00
|
|
|
|
StringRef Name, Kind SectionKind)
|
2017-03-09 06:36:28 +08:00
|
|
|
|
: SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info,
|
|
|
|
|
Link),
|
2017-12-14 06:59:23 +08:00
|
|
|
|
File(File), Data(Data) {
|
2017-12-21 09:21:59 +08:00
|
|
|
|
// In order to reduce memory allocation, we assume that mergeable
|
|
|
|
|
// sections are smaller than 4 GiB, which is not an unreasonable
|
|
|
|
|
// assumption as of 2017.
|
|
|
|
|
if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX)
|
|
|
|
|
error(toString(this) + ": section too large");
|
|
|
|
|
|
2016-11-10 22:53:24 +08:00
|
|
|
|
NumRelocations = 0;
|
|
|
|
|
AreRelocsRela = false;
|
|
|
|
|
|
2016-02-24 08:38:18 +08:00
|
|
|
|
// The ELF spec states that a value of 0 means the section has
|
2016-10-08 03:54:57 +08:00
|
|
|
|
// no alignment constraits.
|
2017-03-09 03:35:29 +08:00
|
|
|
|
uint32_t V = std::max<uint64_t>(Alignment, 1);
|
2016-10-07 20:27:45 +08:00
|
|
|
|
if (!isPowerOf2_64(V))
|
2016-11-24 02:07:33 +08:00
|
|
|
|
fatal(toString(File) + ": section sh_addralign is not a power of 2");
|
2017-03-07 23:11:21 +08:00
|
|
|
|
this->Alignment = V;
|
2016-02-24 08:23:15 +08:00
|
|
|
|
}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
|
2017-06-10 08:38:55 +08:00
|
|
|
|
// Drop SHF_GROUP bit unless we are producing a re-linkable object file.
|
|
|
|
|
// SHF_GROUP is a marker that a section belongs to some comdat group.
|
|
|
|
|
// That flag doesn't make sense in an executable.
|
|
|
|
|
static uint64_t getFlags(uint64_t Flags) {
|
|
|
|
|
Flags &= ~(uint64_t)SHF_INFO_LINK;
|
|
|
|
|
if (!Config->Relocatable)
|
|
|
|
|
Flags &= ~(uint64_t)SHF_GROUP;
|
|
|
|
|
return Flags;
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-23 09:00:41 +08:00
|
|
|
|
// GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of
|
|
|
|
|
// March 2017) fail to infer section types for sections starting with
|
|
|
|
|
// ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of
|
|
|
|
|
// SHF_INIT_ARRAY. As a result, the following assembler directive
|
|
|
|
|
// creates ".init_array.100" with SHT_PROGBITS, for example.
|
|
|
|
|
//
|
|
|
|
|
// .section .init_array.100, "aw"
|
|
|
|
|
//
|
|
|
|
|
// This function forces SHT_{INIT,FINI}_ARRAY so that we can handle
|
|
|
|
|
// incorrect inputs as if they were correct from the beginning.
|
|
|
|
|
static uint64_t getType(uint64_t Type, StringRef Name) {
|
|
|
|
|
if (Type == SHT_PROGBITS && Name.startswith(".init_array."))
|
|
|
|
|
return SHT_INIT_ARRAY;
|
|
|
|
|
if (Type == SHT_PROGBITS && Name.startswith(".fini_array."))
|
|
|
|
|
return SHT_FINI_ARRAY;
|
|
|
|
|
return Type;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-26 08:54:03 +08:00
|
|
|
|
template <class ELFT>
|
2017-12-21 10:03:39 +08:00
|
|
|
|
InputSectionBase::InputSectionBase(ObjFile<ELFT> &File,
|
|
|
|
|
const typename ELFT::Shdr &Hdr,
|
2017-02-23 10:28:28 +08:00
|
|
|
|
StringRef Name, Kind SectionKind)
|
2017-12-21 10:03:39 +08:00
|
|
|
|
: InputSectionBase(&File, getFlags(Hdr.sh_flags),
|
|
|
|
|
getType(Hdr.sh_type, Name), Hdr.sh_entsize, Hdr.sh_link,
|
|
|
|
|
Hdr.sh_info, Hdr.sh_addralign,
|
2017-03-23 09:00:41 +08:00
|
|
|
|
getSectionContents(File, Hdr), Name, SectionKind) {
|
2017-03-09 03:35:29 +08:00
|
|
|
|
// We reject object files having insanely large alignments even though
|
|
|
|
|
// they are allowed by the spec. I think 4GB is a reasonable limitation.
|
|
|
|
|
// We might want to relax this in the future.
|
2017-12-21 10:03:39 +08:00
|
|
|
|
if (Hdr.sh_addralign > UINT32_MAX)
|
|
|
|
|
fatal(toString(&File) + ": section sh_addralign is too large");
|
2016-11-01 17:17:50 +08:00
|
|
|
|
}
|
2016-10-26 08:54:03 +08:00
|
|
|
|
|
2017-03-08 23:44:30 +08:00
|
|
|
|
size_t InputSectionBase::getSize() const {
|
2017-02-27 10:56:02 +08:00
|
|
|
|
if (auto *S = dyn_cast<SyntheticSection>(this))
|
2016-11-10 17:48:29 +08:00
|
|
|
|
return S->getSize();
|
|
|
|
|
|
2016-11-08 22:47:16 +08:00
|
|
|
|
return Data.size();
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-08 23:57:17 +08:00
|
|
|
|
uint64_t InputSectionBase::getOffsetInFile() const {
|
2017-03-08 22:12:52 +08:00
|
|
|
|
const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart();
|
|
|
|
|
const uint8_t *SecStart = Data.begin();
|
|
|
|
|
return SecStart - FileStart;
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 06:36:28 +08:00
|
|
|
|
uint64_t SectionBase::getOffset(uint64_t Offset) const {
|
2016-09-08 20:33:41 +08:00
|
|
|
|
switch (kind()) {
|
2017-03-09 06:36:28 +08:00
|
|
|
|
case Output: {
|
|
|
|
|
auto *OS = cast<OutputSection>(this);
|
|
|
|
|
// For output sections we treat offset -1 as the end of the section.
|
|
|
|
|
return Offset == uint64_t(-1) ? OS->Size : Offset;
|
|
|
|
|
}
|
2015-11-12 00:50:37 +08:00
|
|
|
|
case Regular:
|
2018-04-20 00:54:30 +08:00
|
|
|
|
case Synthetic:
|
2018-04-20 02:00:46 +08:00
|
|
|
|
return cast<InputSection>(this)->getOffset(Offset);
|
2015-11-12 03:54:14 +08:00
|
|
|
|
case EHFrame:
|
2016-07-21 04:19:58 +08:00
|
|
|
|
// The file crtbeginT.o has relocations pointing to the start of an empty
|
|
|
|
|
// .eh_frame that is known to be the first in the link. It does that to
|
|
|
|
|
// identify the start of the output .eh_frame.
|
|
|
|
|
return Offset;
|
2015-11-12 00:50:37 +08:00
|
|
|
|
case Merge:
|
2017-03-07 04:23:56 +08:00
|
|
|
|
const MergeInputSection *MS = cast<MergeInputSection>(this);
|
2017-06-01 04:17:44 +08:00
|
|
|
|
if (InputSection *IS = MS->getParent())
|
2018-04-20 02:00:46 +08:00
|
|
|
|
return IS->getOffset(MS->getParentOffset(Offset));
|
2018-04-20 00:05:07 +08:00
|
|
|
|
return MS->getParentOffset(Offset);
|
2015-11-12 00:50:37 +08:00
|
|
|
|
}
|
2016-03-12 16:31:34 +08:00
|
|
|
|
llvm_unreachable("invalid section kind");
|
2015-11-12 00:50:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-03-24 08:35:11 +08:00
|
|
|
|
uint64_t SectionBase::getVA(uint64_t Offset) const {
|
|
|
|
|
const OutputSection *Out = getOutputSection();
|
|
|
|
|
return (Out ? Out->Addr : 0) + getOffset(Offset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 06:36:28 +08:00
|
|
|
|
OutputSection *SectionBase::getOutputSection() {
|
2017-06-01 04:17:44 +08:00
|
|
|
|
InputSection *Sec;
|
2017-03-09 06:36:28 +08:00
|
|
|
|
if (auto *IS = dyn_cast<InputSection>(this))
|
2018-03-24 01:19:18 +08:00
|
|
|
|
Sec = IS;
|
2017-06-01 04:17:44 +08:00
|
|
|
|
else if (auto *MS = dyn_cast<MergeInputSection>(this))
|
|
|
|
|
Sec = MS->getParent();
|
|
|
|
|
else if (auto *EH = dyn_cast<EhInputSection>(this))
|
|
|
|
|
Sec = EH->getParent();
|
|
|
|
|
else
|
|
|
|
|
return cast<OutputSection>(this);
|
2018-04-20 01:26:50 +08:00
|
|
|
|
return Sec ? Sec->getParent() : nullptr;
|
2017-02-03 21:06:18 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-02-13 05:56:14 +08:00
|
|
|
|
// Decompress section contents if required. Note that this function
|
2017-10-04 08:19:41 +08:00
|
|
|
|
// is called from parallelForEach, so it must be thread-safe.
|
2018-02-13 05:56:14 +08:00
|
|
|
|
void InputSectionBase::maybeDecompress() {
|
2018-02-13 06:32:57 +08:00
|
|
|
|
if (DecompressBuf)
|
|
|
|
|
return;
|
|
|
|
|
if (!(Flags & SHF_COMPRESSED) && !Name.startswith(".zdebug"))
|
2017-10-04 08:19:41 +08:00
|
|
|
|
return;
|
|
|
|
|
|
2018-02-13 06:25:45 +08:00
|
|
|
|
// Decompress a section.
|
2017-03-22 08:01:11 +08:00
|
|
|
|
Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data),
|
|
|
|
|
Config->IsLE, Config->Is64));
|
2017-01-12 18:53:31 +08:00
|
|
|
|
|
2017-01-13 05:44:20 +08:00
|
|
|
|
size_t Size = Dec.getDecompressedSize();
|
2018-02-13 06:25:45 +08:00
|
|
|
|
DecompressBuf.reset(new char[Size + Name.size()]());
|
2018-02-13 05:56:14 +08:00
|
|
|
|
if (Error E = Dec.decompress({DecompressBuf.get(), Size}))
|
2017-01-13 05:43:58 +08:00
|
|
|
|
fatal(toString(this) +
|
|
|
|
|
": decompress failed: " + llvm::toString(std::move(E)));
|
2017-08-17 08:27:55 +08:00
|
|
|
|
|
2018-02-13 05:56:14 +08:00
|
|
|
|
Data = makeArrayRef((uint8_t *)DecompressBuf.get(), Size);
|
2017-12-20 00:29:02 +08:00
|
|
|
|
Flags &= ~(uint64_t)SHF_COMPRESSED;
|
2018-02-13 06:25:45 +08:00
|
|
|
|
|
|
|
|
|
// A section name may have been altered if compressed. If that's
|
|
|
|
|
// the case, restore the original name. (i.e. ".zdebug_" -> ".debug_")
|
|
|
|
|
if (Name.startswith(".zdebug")) {
|
|
|
|
|
DecompressBuf[Size] = '.';
|
|
|
|
|
memcpy(&DecompressBuf[Size + 1], Name.data() + 2, Name.size() - 2);
|
|
|
|
|
Name = StringRef(&DecompressBuf[Size], Name.size() - 1);
|
|
|
|
|
}
|
2016-06-24 19:18:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-06-01 03:09:52 +08:00
|
|
|
|
InputSection *InputSectionBase::getLinkOrderDep() const {
|
2018-03-08 23:06:58 +08:00
|
|
|
|
assert(Link);
|
|
|
|
|
assert(Flags & SHF_LINK_ORDER);
|
|
|
|
|
return cast<InputSection>(File->getSections()[Link]);
|
2016-10-10 17:39:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-18 07:16:02 +08:00
|
|
|
|
// Find a function symbol that encloses a given location.
|
|
|
|
|
template <class ELFT>
|
|
|
|
|
Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) {
|
|
|
|
|
for (Symbol *B : File->getSymbols())
|
|
|
|
|
if (Defined *D = dyn_cast<Defined>(B))
|
2018-08-01 16:11:54 +08:00
|
|
|
|
if (D->Section == this && D->Type == STT_FUNC && D->Value <= Offset &&
|
|
|
|
|
Offset < D->Value + D->Size)
|
2018-07-18 07:16:02 +08:00
|
|
|
|
return D;
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-26 02:51:53 +08:00
|
|
|
|
// Returns a source location string. Used to construct an error message.
|
|
|
|
|
template <class ELFT>
|
2017-02-23 10:28:28 +08:00
|
|
|
|
std::string InputSectionBase::getLocation(uint64_t Offset) {
|
2017-03-14 16:33:45 +08:00
|
|
|
|
// We don't have file for synthetic sections.
|
|
|
|
|
if (getFile<ELFT>() == nullptr)
|
|
|
|
|
return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")")
|
|
|
|
|
.str();
|
|
|
|
|
|
2016-11-26 02:51:53 +08:00
|
|
|
|
// First check if we can get desired values from debugging information.
|
2018-07-16 23:29:35 +08:00
|
|
|
|
if (Optional<DILineInfo> Info = getFile<ELFT>()->getDILineInfo(this, Offset))
|
|
|
|
|
return Info->FileName + ":" + std::to_string(Info->Line);
|
2016-11-26 02:51:53 +08:00
|
|
|
|
|
|
|
|
|
// File->SourceFile contains STT_FILE symbol that contains a
|
|
|
|
|
// source file name. If it's missing, we use an object file name.
|
2017-02-23 10:28:28 +08:00
|
|
|
|
std::string SrcFile = getFile<ELFT>()->SourceFile;
|
2016-11-26 02:51:53 +08:00
|
|
|
|
if (SrcFile.empty())
|
|
|
|
|
SrcFile = toString(File);
|
|
|
|
|
|
2018-07-18 07:16:02 +08:00
|
|
|
|
if (Defined *D = getEnclosingFunction<ELFT>(Offset))
|
|
|
|
|
return SrcFile + ":(function " + toString(*D) + ")";
|
2016-11-26 02:51:53 +08:00
|
|
|
|
|
|
|
|
|
// If there's no symbol, print out the offset in the section.
|
|
|
|
|
return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-01 15:42:38 +08:00
|
|
|
|
// This function is intended to be used for constructing an error message.
|
|
|
|
|
// The returned message looks like this:
|
2017-03-31 03:13:47 +08:00
|
|
|
|
//
|
|
|
|
|
// foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
|
|
|
|
|
//
|
2017-11-01 15:42:38 +08:00
|
|
|
|
// Returns an empty string if there's no way to get line info.
|
2017-11-04 05:21:47 +08:00
|
|
|
|
std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
|
2017-12-24 01:21:39 +08:00
|
|
|
|
return File->getSrcMsg(Sym, *this, Offset);
|
2017-03-31 03:13:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns a filename string along with an optional section name. This
|
|
|
|
|
// function is intended to be used for constructing an error
|
|
|
|
|
// message. The returned message looks like this:
|
|
|
|
|
//
|
|
|
|
|
// path/to/foo.o:(function bar)
|
|
|
|
|
//
|
|
|
|
|
// or
|
|
|
|
|
//
|
|
|
|
|
// path/to/foo.o:(function bar) in archive path/to/bar.a
|
2017-10-27 11:13:54 +08:00
|
|
|
|
std::string InputSectionBase::getObjMsg(uint64_t Off) {
|
2017-07-05 22:55:43 +08:00
|
|
|
|
std::string Filename = File->getName();
|
2017-03-31 03:13:47 +08:00
|
|
|
|
|
|
|
|
|
std::string Archive;
|
|
|
|
|
if (!File->ArchiveName.empty())
|
2018-02-16 11:26:53 +08:00
|
|
|
|
Archive = " in archive " + File->ArchiveName;
|
2017-03-31 03:13:47 +08:00
|
|
|
|
|
|
|
|
|
// Find a symbol that encloses a given location.
|
2017-11-04 05:21:47 +08:00
|
|
|
|
for (Symbol *B : File->getSymbols())
|
2017-11-06 12:35:31 +08:00
|
|
|
|
if (auto *D = dyn_cast<Defined>(B))
|
2017-03-31 03:13:47 +08:00
|
|
|
|
if (D->Section == this && D->Value <= Off && Off < D->Value + D->Size)
|
|
|
|
|
return Filename + ":(" + toString(*D) + ")" + Archive;
|
|
|
|
|
|
|
|
|
|
// If there's no symbol, print out the offset in the section.
|
|
|
|
|
return (Filename + ":(" + Name + "+0x" + utohexstr(Off) + ")" + Archive)
|
|
|
|
|
.str();
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-21 10:11:51 +08:00
|
|
|
|
InputSection InputSection::Discarded(nullptr, 0, 0, 0, ArrayRef<uint8_t>(), "");
|
2017-02-24 00:49:07 +08:00
|
|
|
|
|
2017-12-21 10:11:51 +08:00
|
|
|
|
InputSection::InputSection(InputFile *F, uint64_t Flags, uint32_t Type,
|
|
|
|
|
uint32_t Alignment, ArrayRef<uint8_t> Data,
|
|
|
|
|
StringRef Name, Kind K)
|
|
|
|
|
: InputSectionBase(F, Flags, Type,
|
2017-03-07 23:11:21 +08:00
|
|
|
|
/*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Alignment, Data,
|
2017-02-23 10:28:28 +08:00
|
|
|
|
Name, K) {}
|
2016-10-26 08:54:03 +08:00
|
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
|
template <class ELFT>
|
2017-12-21 10:03:39 +08:00
|
|
|
|
InputSection::InputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
|
2017-07-27 06:13:32 +08:00
|
|
|
|
StringRef Name)
|
2017-02-23 10:28:28 +08:00
|
|
|
|
: InputSectionBase(F, Header, Name, InputSectionBase::Regular) {}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
|
2017-03-09 06:36:28 +08:00
|
|
|
|
bool InputSection::classof(const SectionBase *S) {
|
|
|
|
|
return S->kind() == SectionBase::Regular ||
|
|
|
|
|
S->kind() == SectionBase::Synthetic;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-01 04:17:44 +08:00
|
|
|
|
OutputSection *InputSection::getParent() const {
|
|
|
|
|
return cast_or_null<OutputSection>(Parent);
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-09 11:19:08 +08:00
|
|
|
|
// Copy SHT_GROUP section contents. Used only for the -r option.
|
|
|
|
|
template <class ELFT> void InputSection::copyShtGroup(uint8_t *Buf) {
|
|
|
|
|
// ELFT::Word is the 32-bit integral type in the target endianness.
|
|
|
|
|
typedef typename ELFT::Word u32;
|
|
|
|
|
ArrayRef<u32> From = getDataAs<u32>();
|
|
|
|
|
auto *To = reinterpret_cast<u32 *>(Buf);
|
|
|
|
|
|
|
|
|
|
// The first entry is not a section number but a flag.
|
2017-05-29 16:37:50 +08:00
|
|
|
|
*To++ = From[0];
|
|
|
|
|
|
2017-06-09 11:19:08 +08:00
|
|
|
|
// Adjust section numbers because section numbers in an input object
|
|
|
|
|
// files are different in the output.
|
2017-12-20 00:29:02 +08:00
|
|
|
|
ArrayRef<InputSectionBase *> Sections = File->getSections();
|
2017-06-09 11:19:08 +08:00
|
|
|
|
for (uint32_t Idx : From.slice(1))
|
|
|
|
|
*To++ = Sections[Idx]->getOutputSection()->SectionIndex;
|
2017-05-29 16:37:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-23 09:58:43 +08:00
|
|
|
|
InputSectionBase *InputSection::getRelocatedSection() const {
|
2018-03-27 02:49:31 +08:00
|
|
|
|
if (!File || (Type != SHT_RELA && Type != SHT_REL))
|
|
|
|
|
return nullptr;
|
2017-12-20 00:29:02 +08:00
|
|
|
|
ArrayRef<InputSectionBase *> Sections = File->getSections();
|
|
|
|
|
return Sections[Info];
|
2016-02-25 16:23:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-02-09 00:18:10 +08:00
|
|
|
|
// This is used for -r and --emit-relocs. We can't use memcpy to copy
|
|
|
|
|
// relocations because we need to update symbol table offset and section index
|
|
|
|
|
// for each relocation. So we copy relocations one by one.
|
2017-02-24 00:49:07 +08:00
|
|
|
|
template <class ELFT, class RelTy>
|
|
|
|
|
void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
2017-10-10 12:53:14 +08:00
|
|
|
|
InputSectionBase *Sec = getRelocatedSection();
|
|
|
|
|
|
2016-03-13 13:06:50 +08:00
|
|
|
|
for (const RelTy &Rel : Rels) {
|
2017-10-12 06:49:24 +08:00
|
|
|
|
RelType Type = Rel.getType(Config->IsMips64EL);
|
2017-12-20 00:29:02 +08:00
|
|
|
|
Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
|
2016-02-25 16:23:37 +08:00
|
|
|
|
|
2017-02-24 00:49:07 +08:00
|
|
|
|
auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf);
|
2016-03-13 13:06:50 +08:00
|
|
|
|
Buf += sizeof(RelTy);
|
2016-02-25 16:23:37 +08:00
|
|
|
|
|
2018-02-02 17:50:07 +08:00
|
|
|
|
if (RelTy::IsRela)
|
2016-08-02 16:49:57 +08:00
|
|
|
|
P->r_addend = getAddend<ELFT>(Rel);
|
2017-02-09 00:18:10 +08:00
|
|
|
|
|
2017-02-15 09:53:23 +08:00
|
|
|
|
// Output section VA is zero for -r, so r_offset is an offset within the
|
|
|
|
|
// section, but for --emit-relocs it is an virtual address.
|
2018-03-24 08:35:11 +08:00
|
|
|
|
P->r_offset = Sec->getVA(Rel.r_offset);
|
2017-11-04 08:31:04 +08:00
|
|
|
|
P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Sym), Type,
|
2017-03-18 07:29:01 +08:00
|
|
|
|
Config->IsMips64EL);
|
2017-02-15 09:53:23 +08:00
|
|
|
|
|
2017-11-04 08:31:04 +08:00
|
|
|
|
if (Sym.Type == STT_SECTION) {
|
2017-02-11 09:40:49 +08:00
|
|
|
|
// We combine multiple section symbols into only one per
|
|
|
|
|
// section. This means we have to update the addend. That is
|
|
|
|
|
// trivial for Elf_Rela, but for Elf_Rel we have to write to the
|
|
|
|
|
// section data. We do that by adding to the Relocation vector.
|
2017-02-15 08:59:50 +08:00
|
|
|
|
|
|
|
|
|
// .eh_frame is horribly special and can reference discarded sections. To
|
|
|
|
|
// avoid having to parse and recreate .eh_frame, we just replace any
|
|
|
|
|
// relocation in it pointing to discarded sections with R_*_NONE, which
|
|
|
|
|
// hopefully creates a frame that is ignored at runtime.
|
2017-11-30 14:18:31 +08:00
|
|
|
|
auto *D = dyn_cast<Defined>(&Sym);
|
|
|
|
|
if (!D) {
|
|
|
|
|
error("STT_SECTION symbol should be defined");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
SectionBase *Section = D->Section;
|
2017-02-24 00:49:07 +08:00
|
|
|
|
if (Section == &InputSection::Discarded) {
|
2017-02-15 08:59:50 +08:00
|
|
|
|
P->setSymbolAndType(0, 0, false);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 23:34:06 +08:00
|
|
|
|
int64_t Addend = getAddend<ELFT>(Rel);
|
|
|
|
|
const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset;
|
|
|
|
|
if (!RelTy::IsRela)
|
|
|
|
|
Addend = Target->getImplicitAddend(BufLoc, Type);
|
|
|
|
|
|
|
|
|
|
if (Config->EMachine == EM_MIPS && Config->Relocatable &&
|
|
|
|
|
Target->getRelExpr(Type, Sym, BufLoc) == R_MIPS_GOTREL) {
|
|
|
|
|
// Some MIPS relocations depend on "gp" value. By default,
|
|
|
|
|
// this value has 0x7ff0 offset from a .got section. But
|
|
|
|
|
// relocatable files produced by a complier or a linker
|
|
|
|
|
// might redefine this default value and we must use it
|
|
|
|
|
// for a calculation of the relocation result. When we
|
|
|
|
|
// generate EXE or DSO it's trivial. Generating a relocatable
|
|
|
|
|
// output is more difficult case because the linker does
|
|
|
|
|
// not calculate relocations in this mode and loses
|
|
|
|
|
// individual "gp" values used by each input object file.
|
|
|
|
|
// As a workaround we add the "gp" value to the relocation
|
|
|
|
|
// addend and save it back to the file.
|
|
|
|
|
Addend += Sec->getFile<ELFT>()->MipsGp0;
|
2017-02-11 09:40:49 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 23:34:06 +08:00
|
|
|
|
if (RelTy::IsRela)
|
|
|
|
|
P->r_addend = Sym.getVA(Addend) - Section->getOutputSection()->Addr;
|
|
|
|
|
else if (Config->Relocatable)
|
|
|
|
|
Sec->Relocations.push_back({R_ABS, Type, Rel.r_offset, Addend, &Sym});
|
|
|
|
|
}
|
2016-02-25 16:23:37 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// The ARM and AArch64 ABI handle pc-relative relocations to undefined weak
|
|
|
|
|
// references specially. The general rule is that the value of the symbol in
|
|
|
|
|
// this context is the address of the place P. A further special case is that
|
|
|
|
|
// branch relocations to an undefined weak reference resolve to the next
|
|
|
|
|
// instruction.
|
2017-10-12 06:49:24 +08:00
|
|
|
|
static uint32_t getARMUndefinedRelativeWeakVA(RelType Type, uint32_t A,
|
2016-11-09 18:22:29 +08:00
|
|
|
|
uint32_t P) {
|
|
|
|
|
switch (Type) {
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// Unresolved branch relocations to weak references resolve to next
|
|
|
|
|
// instruction, this will be either 2 or 4 bytes on from P.
|
2016-11-09 18:22:29 +08:00
|
|
|
|
case R_ARM_THM_JUMP11:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + 2 + A;
|
2016-11-09 18:22:29 +08:00
|
|
|
|
case R_ARM_CALL:
|
|
|
|
|
case R_ARM_JUMP24:
|
|
|
|
|
case R_ARM_PC24:
|
|
|
|
|
case R_ARM_PLT32:
|
|
|
|
|
case R_ARM_PREL31:
|
|
|
|
|
case R_ARM_THM_JUMP19:
|
|
|
|
|
case R_ARM_THM_JUMP24:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + 4 + A;
|
2016-11-09 18:22:29 +08:00
|
|
|
|
case R_ARM_THM_CALL:
|
|
|
|
|
// We don't want an interworking BLX to ARM
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + 5 + A;
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// Unresolved non branch pc-relative relocations
|
|
|
|
|
// R_ARM_TARGET2 which can be resolved relatively is not present as it never
|
|
|
|
|
// targets a weak-reference.
|
|
|
|
|
case R_ARM_MOVW_PREL_NC:
|
|
|
|
|
case R_ARM_MOVT_PREL:
|
|
|
|
|
case R_ARM_REL32:
|
|
|
|
|
case R_ARM_THM_MOVW_PREL_NC:
|
|
|
|
|
case R_ARM_THM_MOVT_PREL:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + A;
|
2016-11-09 18:22:29 +08:00
|
|
|
|
}
|
2017-06-19 17:43:43 +08:00
|
|
|
|
llvm_unreachable("ARM pc-relative relocation expected\n");
|
2016-11-09 18:22:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// The comment above getARMUndefinedRelativeWeakVA applies to this function.
|
2016-11-14 18:14:18 +08:00
|
|
|
|
static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A,
|
2016-11-09 18:22:29 +08:00
|
|
|
|
uint64_t P) {
|
|
|
|
|
switch (Type) {
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// Unresolved branch relocations to weak references resolve to next
|
|
|
|
|
// instruction, this is 4 bytes on from P.
|
2016-11-09 18:22:29 +08:00
|
|
|
|
case R_AARCH64_CALL26:
|
|
|
|
|
case R_AARCH64_CONDBR19:
|
|
|
|
|
case R_AARCH64_JUMP26:
|
|
|
|
|
case R_AARCH64_TSTBR14:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + 4 + A;
|
2017-06-19 17:43:43 +08:00
|
|
|
|
// Unresolved non branch pc-relative relocations
|
|
|
|
|
case R_AARCH64_PREL16:
|
|
|
|
|
case R_AARCH64_PREL32:
|
|
|
|
|
case R_AARCH64_PREL64:
|
|
|
|
|
case R_AARCH64_ADR_PREL_LO21:
|
2017-09-21 07:49:50 +08:00
|
|
|
|
case R_AARCH64_LD_PREL_LO19:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return P + A;
|
2016-11-09 18:22:29 +08:00
|
|
|
|
}
|
2017-06-19 17:43:43 +08:00
|
|
|
|
llvm_unreachable("AArch64 pc-relative relocation expected\n");
|
2016-11-09 18:22:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-05-18 17:12:21 +08:00
|
|
|
|
// ARM SBREL relocations are of the form S + A - B where B is the static base
|
|
|
|
|
// The ARM ABI defines base to be "addressing origin of the output segment
|
|
|
|
|
// defining the symbol S". We defined the "addressing origin"/static base to be
|
2017-11-04 08:31:04 +08:00
|
|
|
|
// the base of the PT_LOAD segment containing the Sym.
|
2017-05-18 17:12:21 +08:00
|
|
|
|
// The procedure call standard only defines a Read Write Position Independent
|
|
|
|
|
// RWPI variant so in practice we should expect the static base to be the base
|
|
|
|
|
// of the RW segment.
|
2017-11-04 08:31:04 +08:00
|
|
|
|
static uint64_t getARMStaticBase(const Symbol &Sym) {
|
|
|
|
|
OutputSection *OS = Sym.getOutputSection();
|
2017-09-07 19:01:10 +08:00
|
|
|
|
if (!OS || !OS->PtLoad || !OS->PtLoad->FirstSec)
|
2017-11-04 08:31:04 +08:00
|
|
|
|
fatal("SBREL relocation to " + Sym.getName() + " without static base");
|
2017-09-07 19:01:10 +08:00
|
|
|
|
return OS->PtLoad->FirstSec->Addr;
|
2017-05-18 17:12:21 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-10 01:59:56 +08:00
|
|
|
|
// For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually
|
|
|
|
|
// points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA
|
|
|
|
|
// is calculated using PCREL_HI20's symbol.
|
|
|
|
|
//
|
|
|
|
|
// This function returns the R_RISCV_PCREL_HI20 relocation from
|
|
|
|
|
// R_RISCV_PCREL_LO12's symbol and addend.
|
|
|
|
|
Relocation *lld::elf::getRISCVPCRelHi20(const Symbol *Sym, uint64_t Addend) {
|
|
|
|
|
const Defined *D = cast<Defined>(Sym);
|
|
|
|
|
InputSection *IS = cast<InputSection>(D->Section);
|
|
|
|
|
|
|
|
|
|
if (Addend != 0)
|
|
|
|
|
warn("Non-zero addend in R_RISCV_PCREL_LO12 relocation to " +
|
|
|
|
|
IS->getObjMsg(D->Value) + " is ignored");
|
|
|
|
|
|
|
|
|
|
// Relocations are sorted by offset, so we can use std::equal_range to do
|
|
|
|
|
// binary search.
|
|
|
|
|
auto Range = std::equal_range(IS->Relocations.begin(), IS->Relocations.end(),
|
|
|
|
|
D->Value, RelocationOffsetComparator{});
|
|
|
|
|
for (auto It = std::get<0>(Range); It != std::get<1>(Range); ++It)
|
|
|
|
|
if (isRelExprOneOf<R_PC>(It->Expr))
|
|
|
|
|
return &*It;
|
|
|
|
|
|
|
|
|
|
error("R_RISCV_PCREL_LO12 relocation points to " + IS->getObjMsg(D->Value) +
|
|
|
|
|
" without an associated R_RISCV_PCREL_HI20 relocation");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-11 16:37:19 +08:00
|
|
|
|
static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
2018-06-11 15:24:31 +08:00
|
|
|
|
uint64_t P, const Symbol &Sym, RelExpr Expr) {
|
2016-04-13 09:40:19 +08:00
|
|
|
|
switch (Expr) {
|
2017-10-12 11:14:06 +08:00
|
|
|
|
case R_INVALID:
|
|
|
|
|
return 0;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_ABS:
|
2018-07-10 00:35:51 +08:00
|
|
|
|
case R_RELAX_TLS_LD_TO_LE_ABS:
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_GOT_PC_NOPIC:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A);
|
2018-02-16 18:01:17 +08:00
|
|
|
|
case R_ADDEND:
|
|
|
|
|
return A;
|
2017-05-18 17:12:21 +08:00
|
|
|
|
case R_ARM_SBREL:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) - getARMStaticBase(Sym);
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_GOT:
|
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_ABS:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getGotVA() + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_GOTONLY_PC:
|
2017-05-12 07:26:03 +08:00
|
|
|
|
return InX::Got->getVA() + A - P;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_GOTONLY_PC_FROM_END:
|
2017-05-12 07:26:03 +08:00
|
|
|
|
return InX::Got->getVA() + A - P + InX::Got->getSize();
|
2016-04-18 20:07:13 +08:00
|
|
|
|
case R_GOTREL:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) - InX::Got->getVA();
|
2016-09-01 07:24:11 +08:00
|
|
|
|
case R_GOTREL_FROM_END:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) - InX::Got->getVA() - InX::Got->getSize();
|
2016-04-18 20:07:13 +08:00
|
|
|
|
case R_GOT_FROM_END:
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_END:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getGotOffset() + A - InX::Got->getSize();
|
2018-06-27 21:55:41 +08:00
|
|
|
|
case R_TLSLD_GOT_OFF:
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_GOT_OFF:
|
[PPC64] Thread-local storage general-dynamic to initial-exec relaxation.
Patch adds support for relaxing the general-dynamic tls sequence to
initial-exec.
the relaxation performs the following transformation:
addis r3, r2, x@got@tlsgd@ha --> addis r3, r2, x@got@tprel@ha
addi r3, r3, x@got@tlsgd@l --> ld r3, x@got@tprel@l(r3)
bl __tls_get_addr(x@tlsgd) --> nop
nop --> add r3, r3, r13
and instead of emitting a DTPMOD64/DTPREL64 pair for x, we emit a single
R_PPC64_TPREL64.
Differential Revision: https://reviews.llvm.org/D48090
llvm-svn: 335651
2018-06-27 03:38:18 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getGotOffset() + A;
|
2016-04-13 09:40:19 +08:00
|
|
|
|
case R_GOT_PAGE_PC:
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P);
|
2016-04-13 09:40:19 +08:00
|
|
|
|
case R_GOT_PC:
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getGotVA() + A - P;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_MIPS_GOTREL:
|
2018-06-11 16:37:19 +08:00
|
|
|
|
return Sym.getVA(A) - InX::MipsGot->getGp(File);
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_MIPS_GOT_GP:
|
2018-06-11 16:37:19 +08:00
|
|
|
|
return InX::MipsGot->getGp(File) + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_MIPS_GOT_GP_PC: {
|
|
|
|
|
// R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
|
|
|
|
|
// is _gp_disp symbol. In that case we should use the following
|
|
|
|
|
// formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at
|
|
|
|
|
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
|
2017-11-09 20:10:14 +08:00
|
|
|
|
// microMIPS variants of these relocations use slightly different
|
|
|
|
|
// expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi()
|
|
|
|
|
// to correctly handle less-sugnificant bit of the microMIPS symbol.
|
2018-06-11 16:37:19 +08:00
|
|
|
|
uint64_t V = InX::MipsGot->getGp(File) + A - P;
|
2017-09-12 21:08:24 +08:00
|
|
|
|
if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16)
|
2017-03-26 12:10:43 +08:00
|
|
|
|
V += 4;
|
2017-11-09 20:10:14 +08:00
|
|
|
|
if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16)
|
|
|
|
|
V -= 1;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
return V;
|
|
|
|
|
}
|
2016-05-16 02:13:50 +08:00
|
|
|
|
case R_MIPS_GOT_LOCAL_PAGE:
|
2016-03-13 23:37:38 +08:00
|
|
|
|
// If relocation against MIPS local symbol requires GOT entry, this entry
|
|
|
|
|
// should be initialized by 'page address'. This address is high 16-bits
|
2016-03-30 20:45:58 +08:00
|
|
|
|
// of sum the symbol's value and the addend.
|
2018-06-11 15:24:31 +08:00
|
|
|
|
return InX::MipsGot->getVA() +
|
|
|
|
|
InX::MipsGot->getPageEntryOffset(File, Sym, A) -
|
2018-06-11 16:37:19 +08:00
|
|
|
|
InX::MipsGot->getGp(File);
|
2016-06-20 05:39:37 +08:00
|
|
|
|
case R_MIPS_GOT_OFF:
|
2016-10-21 15:22:30 +08:00
|
|
|
|
case R_MIPS_GOT_OFF32:
|
2016-06-20 05:39:37 +08:00
|
|
|
|
// In case of MIPS if a GOT relocation has non-zero addend this addend
|
|
|
|
|
// should be applied to the GOT entry content not to the GOT entry offset.
|
|
|
|
|
// That is why we use separate expression type.
|
2018-06-11 15:24:31 +08:00
|
|
|
|
return InX::MipsGot->getVA() +
|
|
|
|
|
InX::MipsGot->getSymEntryOffset(File, Sym, A) -
|
2018-06-11 16:37:19 +08:00
|
|
|
|
InX::MipsGot->getGp(File);
|
2016-06-23 23:26:31 +08:00
|
|
|
|
case R_MIPS_TLSGD:
|
2018-06-11 16:37:19 +08:00
|
|
|
|
return InX::MipsGot->getVA() + InX::MipsGot->getGlobalDynOffset(File, Sym) -
|
|
|
|
|
InX::MipsGot->getGp(File);
|
2016-06-23 23:26:31 +08:00
|
|
|
|
case R_MIPS_TLSLD:
|
2018-06-11 15:24:31 +08:00
|
|
|
|
return InX::MipsGot->getVA() + InX::MipsGot->getTlsIndexOffset(File) -
|
2018-06-11 16:37:19 +08:00
|
|
|
|
InX::MipsGot->getGp(File);
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_PAGE_PC:
|
2017-06-13 02:05:01 +08:00
|
|
|
|
case R_PLT_PAGE_PC: {
|
|
|
|
|
uint64_t Dest;
|
2017-11-04 08:31:04 +08:00
|
|
|
|
if (Sym.isUndefWeak())
|
2017-06-13 02:05:01 +08:00
|
|
|
|
Dest = getAArch64Page(A);
|
|
|
|
|
else
|
2017-11-04 08:31:04 +08:00
|
|
|
|
Dest = getAArch64Page(Sym.getVA(A));
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return Dest - getAArch64Page(P);
|
|
|
|
|
}
|
2018-08-10 01:59:56 +08:00
|
|
|
|
case R_RISCV_PC_INDIRECT: {
|
|
|
|
|
const Relocation *HiRel = getRISCVPCRelHi20(&Sym, A);
|
|
|
|
|
if (!HiRel)
|
|
|
|
|
return 0;
|
|
|
|
|
return getRelocTargetVA(File, HiRel->Type, HiRel->Addend, Sym.getVA(),
|
|
|
|
|
*HiRel->Sym, HiRel->Expr);
|
|
|
|
|
}
|
2017-06-13 02:05:01 +08:00
|
|
|
|
case R_PC: {
|
|
|
|
|
uint64_t Dest;
|
2017-11-04 08:31:04 +08:00
|
|
|
|
if (Sym.isUndefWeak()) {
|
2017-03-26 12:10:43 +08:00
|
|
|
|
// On ARM and AArch64 a branch to an undefined weak resolves to the
|
|
|
|
|
// next instruction, otherwise the place.
|
|
|
|
|
if (Config->EMachine == EM_ARM)
|
2017-06-13 02:05:01 +08:00
|
|
|
|
Dest = getARMUndefinedRelativeWeakVA(Type, A, P);
|
|
|
|
|
else if (Config->EMachine == EM_AARCH64)
|
|
|
|
|
Dest = getAArch64UndefinedRelativeWeakVA(Type, A, P);
|
|
|
|
|
else
|
2017-11-04 08:31:04 +08:00
|
|
|
|
Dest = Sym.getVA(A);
|
2017-06-13 02:05:01 +08:00
|
|
|
|
} else {
|
2017-11-04 08:31:04 +08:00
|
|
|
|
Dest = Sym.getVA(A);
|
2017-03-26 12:10:43 +08:00
|
|
|
|
}
|
2017-06-13 02:05:01 +08:00
|
|
|
|
return Dest - P;
|
|
|
|
|
}
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_PLT:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getPltVA() + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_PLT_PC:
|
2018-05-04 23:09:49 +08:00
|
|
|
|
case R_PPC_CALL_PLT:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getPltVA() + A - P;
|
2018-05-04 23:09:49 +08:00
|
|
|
|
case R_PPC_CALL: {
|
2017-11-04 08:31:04 +08:00
|
|
|
|
uint64_t SymVA = Sym.getVA(A);
|
2016-04-13 09:40:19 +08:00
|
|
|
|
// If we have an undefined weak symbol, we might get here with a symbol
|
|
|
|
|
// address of zero. That could overflow, but the code must be unreachable,
|
|
|
|
|
// so don't bother doing anything at all.
|
|
|
|
|
if (!SymVA)
|
|
|
|
|
return 0;
|
2018-04-27 23:41:19 +08:00
|
|
|
|
|
|
|
|
|
// PPC64 V2 ABI describes two entry points to a function. The global entry
|
2018-09-20 08:26:47 +08:00
|
|
|
|
// point is used for calls where the caller and callee (may) have different
|
|
|
|
|
// TOC base pointers and r2 needs to be modified to hold the TOC base for
|
|
|
|
|
// the callee. For local calls the caller and callee share the same
|
|
|
|
|
// TOC base and so the TOC pointer initialization code should be skipped by
|
|
|
|
|
// branching to the local entry point.
|
|
|
|
|
return SymVA - P + getPPC64GlobalEntryToLocalEntryOffset(Sym.StOther);
|
2016-04-13 09:40:19 +08:00
|
|
|
|
}
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_PPC_TOC:
|
|
|
|
|
return getPPC64TocBase() + A;
|
2016-05-25 22:31:37 +08:00
|
|
|
|
case R_RELAX_GOT_PC:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) - P;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_LE:
|
|
|
|
|
case R_RELAX_TLS_IE_TO_LE:
|
|
|
|
|
case R_RELAX_TLS_LD_TO_LE:
|
|
|
|
|
case R_TLS:
|
|
|
|
|
// A weak undefined TLS symbol resolves to the base of the TLS
|
|
|
|
|
// block, i.e. gets a value of zero. If we pass --gc-sections to
|
|
|
|
|
// lld and .tbss is not referenced, it gets reclaimed and we don't
|
|
|
|
|
// create a TLS program header. Therefore, we resolve this
|
|
|
|
|
// statically to zero.
|
2017-11-04 08:31:04 +08:00
|
|
|
|
if (Sym.isTls() && Sym.isUndefWeak())
|
2017-03-26 12:10:43 +08:00
|
|
|
|
return 0;
|
2018-06-09 01:04:09 +08:00
|
|
|
|
|
|
|
|
|
// For TLS variant 1 the TCB is a fixed size, whereas for TLS variant 2 the
|
|
|
|
|
// TCB is on unspecified size and content. Targets that implement variant 1
|
|
|
|
|
// should set TcbSize.
|
|
|
|
|
if (Target->TcbSize) {
|
|
|
|
|
// PPC64 V2 ABI has the thread pointer offset into the middle of the TLS
|
|
|
|
|
// storage area by TlsTpOffset for efficient addressing TCB and up to
|
|
|
|
|
// 4KB – 8 B of other thread library information (placed before the TCB).
|
|
|
|
|
// Subtracting this offset will get the address of the first TLS block.
|
|
|
|
|
if (Target->TlsTpOffset)
|
|
|
|
|
return Sym.getVA(A) - Target->TlsTpOffset;
|
|
|
|
|
|
|
|
|
|
// If thread pointer is not offset into the middle, the first thing in the
|
|
|
|
|
// TLS storage area is the TCB. Add the TcbSize to get the address of the
|
|
|
|
|
// first TLS block.
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align);
|
2018-06-09 01:04:09 +08:00
|
|
|
|
}
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Sym.getVA(A) - Out::TlsPhdr->p_memsz;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_LE_NEG:
|
|
|
|
|
case R_NEG_TLS:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return Out::TlsPhdr->p_memsz - Sym.getVA(A);
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_SIZE:
|
2018-01-06 05:41:17 +08:00
|
|
|
|
return Sym.getSize() + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_TLSDESC:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return InX::Got->getGlobalDynAddr(Sym) + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_TLSDESC_PAGE:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return getAArch64Page(InX::Got->getGlobalDynAddr(Sym) + A) -
|
2017-03-26 12:10:43 +08:00
|
|
|
|
getAArch64Page(P);
|
2018-05-29 22:34:38 +08:00
|
|
|
|
case R_TLSGD_GOT:
|
|
|
|
|
return InX::Got->getGlobalDynOffset(Sym) + A;
|
2018-06-01 02:07:06 +08:00
|
|
|
|
case R_TLSGD_GOT_FROM_END:
|
|
|
|
|
return InX::Got->getGlobalDynOffset(Sym) + A - InX::Got->getSize();
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_TLSGD_PC:
|
2017-11-04 08:31:04 +08:00
|
|
|
|
return InX::Got->getGlobalDynAddr(Sym) + A - P;
|
2018-06-01 02:07:06 +08:00
|
|
|
|
case R_TLSLD_GOT_FROM_END:
|
2017-05-12 07:26:03 +08:00
|
|
|
|
return InX::Got->getTlsIndexOff() + A - InX::Got->getSize();
|
2018-06-01 02:44:12 +08:00
|
|
|
|
case R_TLSLD_GOT:
|
2018-08-01 16:11:54 +08:00
|
|
|
|
return InX::Got->getTlsIndexOff() + A;
|
2017-03-26 12:10:43 +08:00
|
|
|
|
case R_TLSLD_PC:
|
2017-05-12 07:26:03 +08:00
|
|
|
|
return InX::Got->getTlsIndexVA() + A - P;
|
2018-08-02 22:34:39 +08:00
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("invalid expression");
|
2016-04-13 09:40:19 +08:00
|
|
|
|
}
|
2016-03-13 23:37:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-29 02:42:04 +08:00
|
|
|
|
// This function applies relocations to sections without SHF_ALLOC bit.
|
|
|
|
|
// Such sections are never mapped to memory at runtime. Debug sections are
|
|
|
|
|
// an example. Relocations in non-alloc sections are much easier to
|
|
|
|
|
// handle than in allocated sections because it will never need complex
|
|
|
|
|
// treatement such as GOT or PLT (because at runtime no one refers them).
|
|
|
|
|
// So, we handle relocations for non-alloc sections directly in this
|
|
|
|
|
// function as a performance optimization.
|
2017-02-24 00:49:07 +08:00
|
|
|
|
template <class ELFT, class RelTy>
|
|
|
|
|
void InputSection::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
2017-10-10 12:45:48 +08:00
|
|
|
|
const unsigned Bits = sizeof(typename ELFT::uint) * 8;
|
|
|
|
|
|
2016-04-29 02:42:04 +08:00
|
|
|
|
for (const RelTy &Rel : Rels) {
|
2017-10-12 06:49:24 +08:00
|
|
|
|
RelType Type = Rel.getType(Config->IsMips64EL);
|
2018-02-16 09:10:51 +08:00
|
|
|
|
|
|
|
|
|
// GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations
|
|
|
|
|
// against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed
|
|
|
|
|
// in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we
|
|
|
|
|
// need to keep this bug-compatible code for a while.
|
|
|
|
|
if (Config->EMachine == EM_386 && Type == R_386_GOTPC)
|
|
|
|
|
continue;
|
|
|
|
|
|
2017-03-09 00:03:41 +08:00
|
|
|
|
uint64_t Offset = getOffset(Rel.r_offset);
|
2016-05-22 03:48:54 +08:00
|
|
|
|
uint8_t *BufLoc = Buf + Offset;
|
2017-02-16 08:12:34 +08:00
|
|
|
|
int64_t Addend = getAddend<ELFT>(Rel);
|
2016-04-29 11:21:08 +08:00
|
|
|
|
if (!RelTy::IsRela)
|
2016-05-22 03:48:54 +08:00
|
|
|
|
Addend += Target->getImplicitAddend(BufLoc, Type);
|
2016-04-29 02:42:04 +08:00
|
|
|
|
|
2017-12-20 00:29:02 +08:00
|
|
|
|
Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
|
2017-10-12 11:14:06 +08:00
|
|
|
|
RelExpr Expr = Target->getRelExpr(Type, Sym, BufLoc);
|
2017-02-23 14:22:28 +08:00
|
|
|
|
if (Expr == R_NONE)
|
|
|
|
|
continue;
|
2018-02-16 09:10:51 +08:00
|
|
|
|
|
2017-02-23 14:22:28 +08:00
|
|
|
|
if (Expr != R_ABS) {
|
2018-02-16 09:10:51 +08:00
|
|
|
|
std::string Msg = getLocation<ELFT>(Offset) +
|
|
|
|
|
": has non-ABS relocation " + toString(Type) +
|
|
|
|
|
" against symbol '" + toString(Sym) + "'";
|
|
|
|
|
if (Expr != R_PC) {
|
|
|
|
|
error(Msg);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2017-10-27 19:49:24 +08:00
|
|
|
|
|
2018-02-16 09:10:51 +08:00
|
|
|
|
// If the control reaches here, we found a PC-relative relocation in a
|
|
|
|
|
// non-ALLOC section. Since non-ALLOC section is not loaded into memory
|
|
|
|
|
// at runtime, the notion of PC-relative doesn't make sense here. So,
|
|
|
|
|
// this is a usage error. However, GNU linkers historically accept such
|
|
|
|
|
// relocations without any errors and relocate them as if they were at
|
|
|
|
|
// address 0. For bug-compatibilty, we accept them with warnings. We
|
|
|
|
|
// know Steel Bank Common Lisp as of 2018 have this bug.
|
|
|
|
|
warn(Msg);
|
|
|
|
|
Target->relocateOne(BufLoc, Type,
|
|
|
|
|
SignExtend64<Bits>(Sym.getVA(Addend - Offset)));
|
|
|
|
|
continue;
|
2016-04-29 02:42:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-10-10 12:45:48 +08:00
|
|
|
|
if (Sym.isTls() && !Out::TlsPhdr)
|
|
|
|
|
Target->relocateOne(BufLoc, Type, 0);
|
|
|
|
|
else
|
|
|
|
|
Target->relocateOne(BufLoc, Type, SignExtend64<Bits>(Sym.getVA(Addend)));
|
2016-04-29 02:42:04 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-11 20:52:04 +08:00
|
|
|
|
// This is used when '-r' is given.
|
|
|
|
|
// For REL targets, InputSection::copyRelocations() may store artificial
|
|
|
|
|
// relocations aimed to update addends. They are handled in relocateAlloc()
|
|
|
|
|
// for allocatable sections, and this function does the same for
|
|
|
|
|
// non-allocatable sections, such as sections with debug information.
|
|
|
|
|
static void relocateNonAllocForRelocatable(InputSection *Sec, uint8_t *Buf) {
|
|
|
|
|
const unsigned Bits = Config->Is64 ? 64 : 32;
|
|
|
|
|
|
|
|
|
|
for (const Relocation &Rel : Sec->Relocations) {
|
|
|
|
|
// InputSection::copyRelocations() adds only R_ABS relocations.
|
|
|
|
|
assert(Rel.Expr == R_ABS);
|
|
|
|
|
uint8_t *BufLoc = Buf + Rel.Offset + Sec->OutSecOff;
|
|
|
|
|
uint64_t TargetVA = SignExtend64(Rel.Sym->getVA(Rel.Addend), Bits);
|
|
|
|
|
Target->relocateOne(BufLoc, Rel.Type, TargetVA);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-19 02:11:26 +08:00
|
|
|
|
template <class ELFT>
|
2017-02-23 10:28:28 +08:00
|
|
|
|
void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) {
|
2018-07-18 07:16:02 +08:00
|
|
|
|
if (Flags & SHF_EXECINSTR)
|
|
|
|
|
adjustSplitStackFunctionPrologues<ELFT>(Buf, BufEnd);
|
|
|
|
|
|
2017-10-07 04:08:51 +08:00
|
|
|
|
if (Flags & SHF_ALLOC) {
|
2017-05-19 00:45:36 +08:00
|
|
|
|
relocateAlloc(Buf, BufEnd);
|
2017-10-07 04:08:51 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
2017-05-19 00:45:36 +08:00
|
|
|
|
|
2017-10-07 04:08:51 +08:00
|
|
|
|
auto *Sec = cast<InputSection>(this);
|
2018-07-11 20:52:04 +08:00
|
|
|
|
if (Config->Relocatable)
|
|
|
|
|
relocateNonAllocForRelocatable(Sec, Buf);
|
|
|
|
|
else if (Sec->AreRelocsRela)
|
2017-10-07 04:08:51 +08:00
|
|
|
|
Sec->relocateNonAlloc<ELFT>(Buf, Sec->template relas<ELFT>());
|
2017-05-19 00:45:36 +08:00
|
|
|
|
else
|
2017-10-07 04:08:51 +08:00
|
|
|
|
Sec->relocateNonAlloc<ELFT>(Buf, Sec->template rels<ELFT>());
|
2017-05-19 00:45:36 +08:00
|
|
|
|
}
|
2016-04-29 02:42:04 +08:00
|
|
|
|
|
2017-05-19 00:45:36 +08:00
|
|
|
|
void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
|
|
|
|
|
assert(Flags & SHF_ALLOC);
|
|
|
|
|
const unsigned Bits = Config->Wordsize * 8;
|
2017-10-07 04:08:51 +08:00
|
|
|
|
|
2016-09-08 04:37:34 +08:00
|
|
|
|
for (const Relocation &Rel : Relocations) {
|
2018-04-19 11:51:26 +08:00
|
|
|
|
uint64_t Offset = Rel.Offset;
|
|
|
|
|
if (auto *Sec = dyn_cast<InputSection>(this))
|
|
|
|
|
Offset += Sec->OutSecOff;
|
2018-04-19 10:24:28 +08:00
|
|
|
|
uint8_t *BufLoc = Buf + Offset;
|
2017-10-12 06:49:24 +08:00
|
|
|
|
RelType Type = Rel.Type;
|
2016-04-13 09:40:19 +08:00
|
|
|
|
|
2018-04-19 10:24:28 +08:00
|
|
|
|
uint64_t AddrLoc = getOutputSection()->Addr + Offset;
|
2016-04-13 09:40:19 +08:00
|
|
|
|
RelExpr Expr = Rel.Expr;
|
2017-05-19 00:45:36 +08:00
|
|
|
|
uint64_t TargetVA = SignExtend64(
|
2018-06-11 16:37:19 +08:00
|
|
|
|
getRelocTargetVA(File, Type, Rel.Addend, AddrLoc, *Rel.Sym, Expr),
|
2018-06-11 15:24:31 +08:00
|
|
|
|
Bits);
|
2016-03-19 02:11:26 +08:00
|
|
|
|
|
2016-05-21 05:14:06 +08:00
|
|
|
|
switch (Expr) {
|
2016-05-25 22:31:37 +08:00
|
|
|
|
case R_RELAX_GOT_PC:
|
[ELF] - Implemented support for test/binop relaxations from latest ABI.
Patch implements next relaxation from latest ABI:
"Convert memory operand of test and binop into immediate operand, where binop is one of adc, add, and, cmp, or,
sbb, sub, xor instructions, when position-independent code is disabled."
It is described in System V Application Binary Interface AMD64 Architecture Processor
Supplement Draft Version 0.99.8 (https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-r249.pdf,
B.2 "B.2 Optimize GOTPCRELX Relocations").
Differential revision: http://reviews.llvm.org/D20793
llvm-svn: 271405
2016-06-02 00:45:30 +08:00
|
|
|
|
case R_RELAX_GOT_PC_NOPIC:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relaxGot(BufLoc, TargetVA);
|
2016-05-25 22:31:37 +08:00
|
|
|
|
break;
|
2016-05-21 05:14:06 +08:00
|
|
|
|
case R_RELAX_TLS_IE_TO_LE:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relaxTlsIeToLe(BufLoc, Type, TargetVA);
|
2016-05-21 05:14:06 +08:00
|
|
|
|
break;
|
|
|
|
|
case R_RELAX_TLS_LD_TO_LE:
|
2018-07-10 00:35:51 +08:00
|
|
|
|
case R_RELAX_TLS_LD_TO_LE_ABS:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relaxTlsLdToLe(BufLoc, Type, TargetVA);
|
2016-05-21 05:14:06 +08:00
|
|
|
|
break;
|
|
|
|
|
case R_RELAX_TLS_GD_TO_LE:
|
2016-06-05 07:22:34 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_LE_NEG:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relaxTlsGdToLe(BufLoc, Type, TargetVA);
|
2016-05-21 05:14:06 +08:00
|
|
|
|
break;
|
|
|
|
|
case R_RELAX_TLS_GD_TO_IE:
|
2016-06-05 07:33:31 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_ABS:
|
[PPC64] Thread-local storage general-dynamic to initial-exec relaxation.
Patch adds support for relaxing the general-dynamic tls sequence to
initial-exec.
the relaxation performs the following transformation:
addis r3, r2, x@got@tlsgd@ha --> addis r3, r2, x@got@tprel@ha
addi r3, r3, x@got@tlsgd@l --> ld r3, x@got@tprel@l(r3)
bl __tls_get_addr(x@tlsgd) --> nop
nop --> add r3, r3, r13
and instead of emitting a DTPMOD64/DTPREL64 pair for x, we emit a single
R_PPC64_TPREL64.
Differential Revision: https://reviews.llvm.org/D48090
llvm-svn: 335651
2018-06-27 03:38:18 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
|
2016-06-05 07:33:31 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
2016-06-05 07:22:34 +08:00
|
|
|
|
case R_RELAX_TLS_GD_TO_IE_END:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
|
2016-05-21 05:14:06 +08:00
|
|
|
|
break;
|
2018-05-07 03:13:29 +08:00
|
|
|
|
case R_PPC_CALL:
|
[PPC64] Thread-local storage general-dynamic to initial-exec relaxation.
Patch adds support for relaxing the general-dynamic tls sequence to
initial-exec.
the relaxation performs the following transformation:
addis r3, r2, x@got@tlsgd@ha --> addis r3, r2, x@got@tprel@ha
addi r3, r3, x@got@tlsgd@l --> ld r3, x@got@tprel@l(r3)
bl __tls_get_addr(x@tlsgd) --> nop
nop --> add r3, r3, r13
and instead of emitting a DTPMOD64/DTPREL64 pair for x, we emit a single
R_PPC64_TPREL64.
Differential Revision: https://reviews.llvm.org/D48090
llvm-svn: 335651
2018-06-27 03:38:18 +08:00
|
|
|
|
// If this is a call to __tls_get_addr, it may be part of a TLS
|
|
|
|
|
// sequence that has been relaxed and turned into a nop. In this
|
|
|
|
|
// case, we don't want to handle it as a call.
|
|
|
|
|
if (read32(BufLoc) == 0x60000000) // nop
|
|
|
|
|
break;
|
|
|
|
|
|
2016-05-24 20:17:11 +08:00
|
|
|
|
// Patch a nop (0x60000000) to a ld.
|
2018-05-07 03:13:29 +08:00
|
|
|
|
if (Rel.Sym->NeedsTocRestore) {
|
|
|
|
|
if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
|
|
|
|
|
error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
|
2018-04-23 23:01:24 +08:00
|
|
|
|
}
|
2018-04-24 04:34:35 +08:00
|
|
|
|
Target->relocateOne(BufLoc, Type, TargetVA);
|
|
|
|
|
break;
|
2016-05-21 05:14:06 +08:00
|
|
|
|
default:
|
2016-12-15 06:45:52 +08:00
|
|
|
|
Target->relocateOne(BufLoc, Type, TargetVA);
|
2016-05-21 05:14:06 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
2015-09-22 06:01:00 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-18 07:16:02 +08:00
|
|
|
|
// For each function-defining prologue, find any calls to __morestack,
|
|
|
|
|
// and replace them with calls to __morestack_non_split.
|
|
|
|
|
static void switchMorestackCallsToMorestackNonSplit(
|
2018-08-01 16:10:50 +08:00
|
|
|
|
DenseSet<Defined *> &Prologues, std::vector<Relocation *> &MorestackCalls) {
|
2018-07-18 07:16:02 +08:00
|
|
|
|
|
|
|
|
|
// If the target adjusted a function's prologue, all calls to
|
|
|
|
|
// __morestack inside that function should be switched to
|
|
|
|
|
// __morestack_non_split.
|
|
|
|
|
Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split");
|
2018-08-03 02:13:40 +08:00
|
|
|
|
if (!MoreStackNonSplit) {
|
|
|
|
|
error("Mixing split-stack objects requires a definition of "
|
|
|
|
|
"__morestack_non_split");
|
|
|
|
|
return;
|
|
|
|
|
}
|
2018-07-18 07:16:02 +08:00
|
|
|
|
|
|
|
|
|
// Sort both collections to compare addresses efficiently.
|
|
|
|
|
llvm::sort(MorestackCalls.begin(), MorestackCalls.end(),
|
|
|
|
|
[](const Relocation *L, const Relocation *R) {
|
|
|
|
|
return L->Offset < R->Offset;
|
|
|
|
|
});
|
|
|
|
|
std::vector<Defined *> Functions(Prologues.begin(), Prologues.end());
|
|
|
|
|
llvm::sort(
|
|
|
|
|
Functions.begin(), Functions.end(),
|
|
|
|
|
[](const Defined *L, const Defined *R) { return L->Value < R->Value; });
|
|
|
|
|
|
|
|
|
|
auto It = MorestackCalls.begin();
|
|
|
|
|
for (Defined *F : Functions) {
|
|
|
|
|
// Find the first call to __morestack within the function.
|
|
|
|
|
while (It != MorestackCalls.end() && (*It)->Offset < F->Value)
|
|
|
|
|
++It;
|
|
|
|
|
// Adjust all calls inside the function.
|
|
|
|
|
while (It != MorestackCalls.end() && (*It)->Offset < F->Value + F->Size) {
|
|
|
|
|
(*It)->Sym = MoreStackNonSplit;
|
|
|
|
|
++It;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-03 02:13:40 +08:00
|
|
|
|
static bool enclosingPrologueAttempted(uint64_t Offset,
|
|
|
|
|
const DenseSet<Defined *> &Prologues) {
|
2018-07-18 07:16:02 +08:00
|
|
|
|
for (Defined *F : Prologues)
|
|
|
|
|
if (F->Value <= Offset && Offset < F->Value + F->Size)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If a function compiled for split stack calls a function not
|
|
|
|
|
// compiled for split stack, then the caller needs its prologue
|
|
|
|
|
// adjusted to ensure that the called function will have enough stack
|
|
|
|
|
// available. Find those functions, and adjust their prologues.
|
|
|
|
|
template <class ELFT>
|
|
|
|
|
void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
|
|
|
|
|
uint8_t *End) {
|
|
|
|
|
if (!getFile<ELFT>()->SplitStack)
|
|
|
|
|
return;
|
2018-08-03 02:13:40 +08:00
|
|
|
|
DenseSet<Defined *> Prologues;
|
2018-07-18 07:16:02 +08:00
|
|
|
|
std::vector<Relocation *> MorestackCalls;
|
|
|
|
|
|
|
|
|
|
for (Relocation &Rel : Relocations) {
|
|
|
|
|
// Local symbols can't possibly be cross-calls, and should have been
|
|
|
|
|
// resolved long before this line.
|
|
|
|
|
if (Rel.Sym->isLocal())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Ignore calls into the split-stack api.
|
2018-08-14 06:29:15 +08:00
|
|
|
|
if (Rel.Sym->getName().startswith("__morestack")) {
|
|
|
|
|
if (Rel.Sym->getName().equals("__morestack"))
|
2018-07-18 07:16:02 +08:00
|
|
|
|
MorestackCalls.push_back(&Rel);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A relocation to non-function isn't relevant. Sometimes
|
|
|
|
|
// __morestack is not marked as a function, so this check comes
|
|
|
|
|
// after the name check.
|
2018-08-14 06:29:15 +08:00
|
|
|
|
if (Rel.Sym->Type != STT_FUNC)
|
2018-07-18 07:16:02 +08:00
|
|
|
|
continue;
|
|
|
|
|
|
2018-08-14 06:29:15 +08:00
|
|
|
|
// If the callee's-file was compiled with split stack, nothing to do. In
|
|
|
|
|
// this context, a "Defined" symbol is one "defined by the binary currently
|
|
|
|
|
// being produced". So an "undefined" symbol might be provided by a shared
|
|
|
|
|
// library. It is not possible to tell how such symbols were compiled, so be
|
|
|
|
|
// conservative.
|
|
|
|
|
if (Defined *D = dyn_cast<Defined>(Rel.Sym))
|
|
|
|
|
if (InputSection *IS = cast_or_null<InputSection>(D->Section))
|
2018-09-05 05:06:59 +08:00
|
|
|
|
if (!IS || !IS->getFile<ELFT>() || IS->getFile<ELFT>()->SplitStack)
|
2018-08-14 06:29:15 +08:00
|
|
|
|
continue;
|
2018-08-03 02:13:40 +08:00
|
|
|
|
|
|
|
|
|
if (enclosingPrologueAttempted(Rel.Offset, Prologues))
|
2018-07-18 07:16:02 +08:00
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
|
2018-08-03 02:13:40 +08:00
|
|
|
|
Prologues.insert(F);
|
|
|
|
|
if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
|
|
|
|
|
End))
|
2018-07-18 07:16:02 +08:00
|
|
|
|
continue;
|
2018-08-03 02:13:40 +08:00
|
|
|
|
if (!getFile<ELFT>()->SomeNoSplitStack)
|
|
|
|
|
error(lld::toString(this) + ": " + F->getName() +
|
2018-08-14 06:29:15 +08:00
|
|
|
|
" (with -fsplit-stack) calls " + Rel.Sym->getName() +
|
2018-08-03 02:13:40 +08:00
|
|
|
|
" (without -fsplit-stack), but couldn't adjust its prologue");
|
2018-07-18 07:16:02 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2018-08-03 02:13:40 +08:00
|
|
|
|
switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
|
2018-07-18 07:16:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 00:49:07 +08:00
|
|
|
|
template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (Type == SHT_NOBITS)
|
2015-07-25 05:03:07 +08:00
|
|
|
|
return;
|
2016-02-25 16:23:37 +08:00
|
|
|
|
|
2017-02-27 10:56:02 +08:00
|
|
|
|
if (auto *S = dyn_cast<SyntheticSection>(this)) {
|
2016-11-23 17:47:38 +08:00
|
|
|
|
S->writeTo(Buf + OutSecOff);
|
2016-11-16 18:02:27 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-09 00:18:10 +08:00
|
|
|
|
// If -r or --emit-relocs is given, then an InputSection
|
|
|
|
|
// may be a relocation section.
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (Type == SHT_RELA) {
|
|
|
|
|
copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rela>());
|
2016-02-25 16:23:37 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (Type == SHT_REL) {
|
|
|
|
|
copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rel>());
|
2016-02-25 16:23:37 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-09 11:19:08 +08:00
|
|
|
|
// If -r is given, we may have a SHT_GROUP section.
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (Type == SHT_GROUP) {
|
2017-06-09 11:19:08 +08:00
|
|
|
|
copyShtGroup<ELFT>(Buf + OutSecOff);
|
2017-05-29 16:37:50 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-14 09:35:04 +08:00
|
|
|
|
// Copy section contents from source object file to output file
|
|
|
|
|
// and then apply relocations.
|
2015-10-15 09:58:40 +08:00
|
|
|
|
memcpy(Buf + OutSecOff, Data.data(), Data.size());
|
2016-03-30 08:43:49 +08:00
|
|
|
|
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
|
2017-12-20 00:29:02 +08:00
|
|
|
|
relocate<ELFT>(Buf, BufEnd);
|
2015-07-25 05:03:07 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 00:49:07 +08:00
|
|
|
|
void InputSection::replace(InputSection *Other) {
|
2017-12-20 00:29:02 +08:00
|
|
|
|
Alignment = std::max(Alignment, Other->Alignment);
|
|
|
|
|
Other->Repl = Repl;
|
2016-02-26 02:43:51 +08:00
|
|
|
|
Other->Live = false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-12 03:54:14 +08:00
|
|
|
|
template <class ELFT>
|
2017-12-21 10:03:39 +08:00
|
|
|
|
EhInputSection::EhInputSection(ObjFile<ELFT> &F,
|
|
|
|
|
const typename ELFT::Shdr &Header,
|
2017-03-07 05:17:18 +08:00
|
|
|
|
StringRef Name)
|
2017-11-30 22:01:06 +08:00
|
|
|
|
: InputSectionBase(F, Header, Name, InputSectionBase::EHFrame) {}
|
2015-11-12 03:54:14 +08:00
|
|
|
|
|
2017-06-01 04:17:44 +08:00
|
|
|
|
SyntheticSection *EhInputSection::getParent() const {
|
|
|
|
|
return cast_or_null<SyntheticSection>(Parent);
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-22 04:18:30 +08:00
|
|
|
|
// Returns the index of the first relocation that points to a region between
|
|
|
|
|
// Begin and Begin+Size.
|
|
|
|
|
template <class IntTy, class RelTy>
|
|
|
|
|
static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels,
|
|
|
|
|
unsigned &RelocI) {
|
|
|
|
|
// Start search from RelocI for fast access. That works because the
|
|
|
|
|
// relocations are sorted in .eh_frame.
|
|
|
|
|
for (unsigned N = Rels.size(); RelocI < N; ++RelocI) {
|
|
|
|
|
const RelTy &Rel = Rels[RelocI];
|
|
|
|
|
if (Rel.r_offset < Begin)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (Rel.r_offset < Begin + Size)
|
|
|
|
|
return RelocI;
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 07:53:00 +08:00
|
|
|
|
// .eh_frame is a sequence of CIE or FDE records.
|
|
|
|
|
// This function splits an input section into records and returns them.
|
2017-03-07 05:17:18 +08:00
|
|
|
|
template <class ELFT> void EhInputSection::split() {
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (AreRelocsRela)
|
|
|
|
|
split<ELFT>(relas<ELFT>());
|
2017-10-26 02:09:54 +08:00
|
|
|
|
else
|
2017-12-20 00:29:02 +08:00
|
|
|
|
split<ELFT>(rels<ELFT>());
|
2016-07-22 04:18:30 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-03-07 05:17:18 +08:00
|
|
|
|
template <class ELFT, class RelTy>
|
|
|
|
|
void EhInputSection::split(ArrayRef<RelTy> Rels) {
|
2016-07-22 04:18:30 +08:00
|
|
|
|
unsigned RelI = 0;
|
2016-05-23 07:53:00 +08:00
|
|
|
|
for (size_t Off = 0, End = Data.size(); Off != End;) {
|
2017-10-27 11:14:09 +08:00
|
|
|
|
size_t Size = readEhRecordSize(this, Off);
|
2017-12-20 00:29:02 +08:00
|
|
|
|
Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI));
|
2016-05-23 07:53:00 +08:00
|
|
|
|
// The empty record is the end marker.
|
|
|
|
|
if (Size == 4)
|
|
|
|
|
break;
|
|
|
|
|
Off += Size;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-22 09:58:30 +08:00
|
|
|
|
static size_t findNull(StringRef S, size_t EntSize) {
|
2016-04-23 06:09:35 +08:00
|
|
|
|
// Optimize the common case.
|
|
|
|
|
if (EntSize == 1)
|
|
|
|
|
return S.find(0);
|
|
|
|
|
|
|
|
|
|
for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
|
|
|
|
|
const char *B = S.begin() + I;
|
|
|
|
|
if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
|
|
|
|
|
return I;
|
|
|
|
|
}
|
|
|
|
|
return StringRef::npos;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-01 04:17:44 +08:00
|
|
|
|
SyntheticSection *MergeInputSection::getParent() const {
|
|
|
|
|
return cast_or_null<SyntheticSection>(Parent);
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 08:40:24 +08:00
|
|
|
|
// Split SHF_STRINGS section. Such section is a sequence of
|
|
|
|
|
// null-terminated strings.
|
2017-03-07 04:23:56 +08:00
|
|
|
|
void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) {
|
2016-05-23 08:40:24 +08:00
|
|
|
|
size_t Off = 0;
|
2017-12-20 00:29:02 +08:00
|
|
|
|
bool IsAlloc = Flags & SHF_ALLOC;
|
2017-10-22 09:58:30 +08:00
|
|
|
|
StringRef S = toStringRef(Data);
|
2017-10-22 07:20:13 +08:00
|
|
|
|
|
2017-10-22 09:58:30 +08:00
|
|
|
|
while (!S.empty()) {
|
|
|
|
|
size_t End = findNull(S, EntSize);
|
2016-05-23 08:40:24 +08:00
|
|
|
|
if (End == StringRef::npos)
|
2016-11-24 02:07:33 +08:00
|
|
|
|
fatal(toString(this) + ": string is not null terminated");
|
2016-05-23 08:40:24 +08:00
|
|
|
|
size_t Size = End + EntSize;
|
2017-10-22 07:20:13 +08:00
|
|
|
|
|
2017-10-22 09:58:30 +08:00
|
|
|
|
Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc);
|
|
|
|
|
S = S.substr(Size);
|
2016-05-23 08:40:24 +08:00
|
|
|
|
Off += Size;
|
2016-04-23 06:09:35 +08:00
|
|
|
|
}
|
2016-05-23 08:40:24 +08:00
|
|
|
|
}
|
2016-04-23 06:09:35 +08:00
|
|
|
|
|
2016-05-23 08:40:24 +08:00
|
|
|
|
// Split non-SHF_STRINGS section. Such section is a sequence of
|
|
|
|
|
// fixed size records.
|
2017-03-07 04:23:56 +08:00
|
|
|
|
void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> Data,
|
|
|
|
|
size_t EntSize) {
|
2016-04-23 06:09:35 +08:00
|
|
|
|
size_t Size = Data.size();
|
|
|
|
|
assert((Size % EntSize) == 0);
|
2017-12-20 00:29:02 +08:00
|
|
|
|
bool IsAlloc = Flags & SHF_ALLOC;
|
2017-10-22 07:20:13 +08:00
|
|
|
|
|
|
|
|
|
for (size_t I = 0; I != Size; I += EntSize)
|
2018-08-01 02:13:36 +08:00
|
|
|
|
Pieces.emplace_back(I, xxHash64(Data.slice(I, EntSize)), !IsAlloc);
|
2016-05-23 08:40:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <class ELFT>
|
2017-12-21 10:03:39 +08:00
|
|
|
|
MergeInputSection::MergeInputSection(ObjFile<ELFT> &F,
|
|
|
|
|
const typename ELFT::Shdr &Header,
|
2017-03-07 04:23:56 +08:00
|
|
|
|
StringRef Name)
|
2017-12-21 09:21:59 +08:00
|
|
|
|
: InputSectionBase(F, Header, Name, InputSectionBase::Merge) {}
|
|
|
|
|
|
|
|
|
|
MergeInputSection::MergeInputSection(uint64_t Flags, uint32_t Type,
|
|
|
|
|
uint64_t Entsize, ArrayRef<uint8_t> Data,
|
|
|
|
|
StringRef Name)
|
|
|
|
|
: InputSectionBase(nullptr, Flags, Type, Entsize, /*Link*/ 0, /*Info*/ 0,
|
|
|
|
|
/*Alignment*/ Entsize, Data, Name, SectionBase::Merge) {}
|
2016-05-24 00:55:43 +08:00
|
|
|
|
|
Parallelize uncompress() and splitIntoPieces().
Uncompressing section contents and spliting mergeable section contents
into smaller chunks are heavy tasks. They scan entire section contents
and do CPU-intensive tasks such as uncompressing zlib-compressed data
or computing a hash value for each section piece.
Luckily, these tasks are independent to each other, so we can do that
in parallel_for_each. The number of input sections is large (as opposed
to the number of output sections), so there's a large parallelism here.
Actually the current design to call uncompress() and splitIntoPieces()
in batch was chosen with doing this in mind. Basically what we need to
do here is to replace `for` with `parallel_for_each`.
It seems this patch improves latency significantly if linked programs
contain debug info (which in turn contain lots of mergeable strings.)
For example, the latency to link Clang (debug build) improved by 20% on
my machine as shown below. Note that ld.gold took 19.2 seconds to do
the same thing.
Before:
30801.782712 task-clock (msec) # 3.652 CPUs utilized ( +- 2.59% )
104,084 context-switches # 0.003 M/sec ( +- 1.02% )
5,063 cpu-migrations # 0.164 K/sec ( +- 13.66% )
2,528,130 page-faults # 0.082 M/sec ( +- 0.47% )
85,317,809,130 cycles # 2.770 GHz ( +- 2.62% )
67,352,463,373 stalled-cycles-frontend # 78.94% frontend cycles idle ( +- 3.06% )
<not supported> stalled-cycles-backend
44,295,945,493 instructions # 0.52 insns per cycle
# 1.52 stalled cycles per insn ( +- 0.44% )
8,572,384,877 branches # 278.308 M/sec ( +- 0.66% )
141,806,726 branch-misses # 1.65% of all branches ( +- 0.13% )
8.433424003 seconds time elapsed ( +- 1.20% )
After:
35523.764575 task-clock (msec) # 5.265 CPUs utilized ( +- 2.67% )
159,107 context-switches # 0.004 M/sec ( +- 0.48% )
8,123 cpu-migrations # 0.229 K/sec ( +- 23.34% )
2,372,483 page-faults # 0.067 M/sec ( +- 0.36% )
98,395,342,152 cycles # 2.770 GHz ( +- 2.62% )
79,294,670,125 stalled-cycles-frontend # 80.59% frontend cycles idle ( +- 3.03% )
<not supported> stalled-cycles-backend
46,274,151,813 instructions # 0.47 insns per cycle
# 1.71 stalled cycles per insn ( +- 0.47% )
8,987,621,670 branches # 253.003 M/sec ( +- 0.60% )
148,900,624 branch-misses # 1.66% of all branches ( +- 0.27% )
6.747548004 seconds time elapsed ( +- 0.40% )
llvm-svn: 287946
2016-11-26 04:05:08 +08:00
|
|
|
|
// This function is called after we obtain a complete list of input sections
|
|
|
|
|
// that need to be linked. This is responsible to split section contents
|
|
|
|
|
// into small chunks for further processing.
|
|
|
|
|
//
|
2017-08-24 03:03:20 +08:00
|
|
|
|
// Note that this function is called from parallelForEach. This must be
|
Parallelize uncompress() and splitIntoPieces().
Uncompressing section contents and spliting mergeable section contents
into smaller chunks are heavy tasks. They scan entire section contents
and do CPU-intensive tasks such as uncompressing zlib-compressed data
or computing a hash value for each section piece.
Luckily, these tasks are independent to each other, so we can do that
in parallel_for_each. The number of input sections is large (as opposed
to the number of output sections), so there's a large parallelism here.
Actually the current design to call uncompress() and splitIntoPieces()
in batch was chosen with doing this in mind. Basically what we need to
do here is to replace `for` with `parallel_for_each`.
It seems this patch improves latency significantly if linked programs
contain debug info (which in turn contain lots of mergeable strings.)
For example, the latency to link Clang (debug build) improved by 20% on
my machine as shown below. Note that ld.gold took 19.2 seconds to do
the same thing.
Before:
30801.782712 task-clock (msec) # 3.652 CPUs utilized ( +- 2.59% )
104,084 context-switches # 0.003 M/sec ( +- 1.02% )
5,063 cpu-migrations # 0.164 K/sec ( +- 13.66% )
2,528,130 page-faults # 0.082 M/sec ( +- 0.47% )
85,317,809,130 cycles # 2.770 GHz ( +- 2.62% )
67,352,463,373 stalled-cycles-frontend # 78.94% frontend cycles idle ( +- 3.06% )
<not supported> stalled-cycles-backend
44,295,945,493 instructions # 0.52 insns per cycle
# 1.52 stalled cycles per insn ( +- 0.44% )
8,572,384,877 branches # 278.308 M/sec ( +- 0.66% )
141,806,726 branch-misses # 1.65% of all branches ( +- 0.13% )
8.433424003 seconds time elapsed ( +- 1.20% )
After:
35523.764575 task-clock (msec) # 5.265 CPUs utilized ( +- 2.67% )
159,107 context-switches # 0.004 M/sec ( +- 0.48% )
8,123 cpu-migrations # 0.229 K/sec ( +- 23.34% )
2,372,483 page-faults # 0.067 M/sec ( +- 0.36% )
98,395,342,152 cycles # 2.770 GHz ( +- 2.62% )
79,294,670,125 stalled-cycles-frontend # 80.59% frontend cycles idle ( +- 3.03% )
<not supported> stalled-cycles-backend
46,274,151,813 instructions # 0.47 insns per cycle
# 1.71 stalled cycles per insn ( +- 0.47% )
8,987,621,670 branches # 253.003 M/sec ( +- 0.60% )
148,900,624 branch-misses # 1.66% of all branches ( +- 0.27% )
6.747548004 seconds time elapsed ( +- 0.40% )
llvm-svn: 287946
2016-11-26 04:05:08 +08:00
|
|
|
|
// thread-safe (i.e. no memory allocation from the pools).
|
2017-03-07 04:23:56 +08:00
|
|
|
|
void MergeInputSection::splitIntoPieces() {
|
2017-09-01 20:04:52 +08:00
|
|
|
|
assert(Pieces.empty());
|
2017-10-22 09:58:30 +08:00
|
|
|
|
|
2017-12-20 00:29:02 +08:00
|
|
|
|
if (Flags & SHF_STRINGS)
|
2017-10-22 09:58:30 +08:00
|
|
|
|
splitStrings(Data, Entsize);
|
2016-05-23 08:40:24 +08:00
|
|
|
|
else
|
2017-10-22 09:58:30 +08:00
|
|
|
|
splitNonStrings(Data, Entsize);
|
2016-05-24 00:55:43 +08:00
|
|
|
|
|
2018-04-05 08:01:57 +08:00
|
|
|
|
OffsetMap.reserve(Pieces.size());
|
|
|
|
|
for (size_t I = 0, E = Pieces.size(); I != E; ++I)
|
|
|
|
|
OffsetMap[Pieces[I].InputOff] = I;
|
2016-04-23 06:09:35 +08:00
|
|
|
|
}
|
2015-10-20 05:00:02 +08:00
|
|
|
|
|
2016-10-19 22:17:36 +08:00
|
|
|
|
template <class It, class T, class Compare>
|
|
|
|
|
static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) {
|
|
|
|
|
size_t Size = std::distance(First, Last);
|
|
|
|
|
assert(Size != 0);
|
|
|
|
|
while (Size != 1) {
|
|
|
|
|
size_t H = Size / 2;
|
|
|
|
|
const It MI = First + H;
|
|
|
|
|
Size -= H;
|
|
|
|
|
First = Comp(Value, *MI) ? First : First + H;
|
|
|
|
|
}
|
|
|
|
|
return Comp(Value, *First) ? First : First + 1;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-03 12:06:14 +08:00
|
|
|
|
SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) {
|
2018-08-31 19:51:51 +08:00
|
|
|
|
if (this->Data.size() <= Offset)
|
|
|
|
|
fatal(toString(this) + ": offset is outside the section");
|
|
|
|
|
|
2018-04-03 12:06:14 +08:00
|
|
|
|
// Find a piece starting at a given offset.
|
|
|
|
|
auto It = OffsetMap.find(Offset);
|
|
|
|
|
if (It != OffsetMap.end())
|
|
|
|
|
return &Pieces[It->second];
|
|
|
|
|
|
|
|
|
|
// If Offset is not at beginning of a section piece, it is not in the map.
|
2018-08-31 19:51:51 +08:00
|
|
|
|
// In that case we need to do a binary search of the original section piece vector.
|
|
|
|
|
auto I = fastUpperBound(
|
|
|
|
|
Pieces.begin(), Pieces.end(), Offset,
|
|
|
|
|
[](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; });
|
|
|
|
|
--I;
|
|
|
|
|
return &*I;
|
2018-04-03 12:06:14 +08:00
|
|
|
|
}
|
|
|
|
|
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
|
// Returns the offset in an output section for a given input offset.
|
|
|
|
|
// Because contents of a mergeable section is not contiguous in output,
|
|
|
|
|
// it is not just an addition to a base output offset.
|
2018-04-20 00:05:07 +08:00
|
|
|
|
uint64_t MergeInputSection::getParentOffset(uint64_t Offset) const {
|
2017-11-01 03:14:06 +08:00
|
|
|
|
// If Offset is not at beginning of a section piece, it is not in the map.
|
|
|
|
|
// In that case we need to search from the original section piece vector.
|
2018-04-03 12:06:14 +08:00
|
|
|
|
const SectionPiece &Piece =
|
2018-08-31 19:51:51 +08:00
|
|
|
|
*(const_cast<MergeInputSection *>(this)->getSectionPiece (Offset));
|
2017-03-07 04:23:56 +08:00
|
|
|
|
uint64_t Addend = Offset - Piece.InputOff;
|
2016-05-29 02:40:38 +08:00
|
|
|
|
return Piece.OutputOff + Addend;
|
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.
The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.
We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.
This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.
Before:
6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% )
238 context-switches # 0.036 K/sec ( +- 6.59% )
0 cpu-migrations # 0.000 K/sec ( +- 50.92% )
1,067,675 page-faults # 0.162 M/sec ( +- 0.15% )
18,369,931,470 cycles # 2.790 GHz ( +- 0.09% )
9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% )
<not supported> stalled-cycles-backend
21,206,747,787 instructions # 1.15 insns per cycle
# 0.45 stalled cycles per insn ( +- 0.04% )
3,817,398,032 branches # 579.786 M/sec ( +- 0.04% )
132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% )
6.579106511 seconds time elapsed ( +- 0.09% )
After:
6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% )
221 context-switches # 0.035 K/sec ( +- 4.11% )
1 cpu-migrations # 0.000 K/sec ( +- 45.21% )
1,280,775 page-faults # 0.203 M/sec ( +- 0.37% )
17,611,539,150 cycles # 2.790 GHz ( +- 0.19% )
10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% )
<not supported> stalled-cycles-backend
18,794,779,900 instructions # 1.07 insns per cycle
# 0.55 stalled cycles per insn ( +- 0.03% )
3,287,450,865 branches # 520.799 M/sec ( +- 0.03% )
72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% )
6.307411828 seconds time elapsed ( +- 0.19% )
Differential Revision: http://reviews.llvm.org/D20645
llvm-svn: 270999
2016-05-27 22:39:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-12-21 10:03:39 +08:00
|
|
|
|
template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &,
|
2017-07-27 06:13:32 +08:00
|
|
|
|
StringRef);
|
2017-12-21 10:03:39 +08:00
|
|
|
|
template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &,
|
2017-07-27 06:13:32 +08:00
|
|
|
|
StringRef);
|
2017-12-21 10:03:39 +08:00
|
|
|
|
template InputSection::InputSection(ObjFile<ELF64LE> &, const ELF64LE::Shdr &,
|
2017-07-27 06:13:32 +08:00
|
|
|
|
StringRef);
|
2017-12-21 10:03:39 +08:00
|
|
|
|
template InputSection::InputSection(ObjFile<ELF64BE> &, const ELF64BE::Shdr &,
|
2017-07-27 06:13:32 +08:00
|
|
|
|
StringRef);
|
2017-03-30 04:15:29 +08:00
|
|
|
|
|
|
|
|
|
template std::string InputSectionBase::getLocation<ELF32LE>(uint64_t);
|
|
|
|
|
template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t);
|
|
|
|
|
template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t);
|
|
|
|
|
template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t);
|
|
|
|
|
|
|
|
|
|
template void InputSection::writeTo<ELF32LE>(uint8_t *);
|
|
|
|
|
template void InputSection::writeTo<ELF32BE>(uint8_t *);
|
|
|
|
|
template void InputSection::writeTo<ELF64LE>(uint8_t *);
|
|
|
|
|
template void InputSection::writeTo<ELF64BE>(uint8_t *);
|
2016-02-28 08:25:54 +08:00
|
|
|
|
|
2017-12-21 10:03:39 +08:00
|
|
|
|
template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &,
|
|
|
|
|
const ELF32LE::Shdr &, StringRef);
|
|
|
|
|
template MergeInputSection::MergeInputSection(ObjFile<ELF32BE> &,
|
|
|
|
|
const ELF32BE::Shdr &, StringRef);
|
|
|
|
|
template MergeInputSection::MergeInputSection(ObjFile<ELF64LE> &,
|
|
|
|
|
const ELF64LE::Shdr &, StringRef);
|
|
|
|
|
template MergeInputSection::MergeInputSection(ObjFile<ELF64BE> &,
|
|
|
|
|
const ELF64BE::Shdr &, StringRef);
|
|
|
|
|
|
|
|
|
|
template EhInputSection::EhInputSection(ObjFile<ELF32LE> &,
|
|
|
|
|
const ELF32LE::Shdr &, StringRef);
|
|
|
|
|
template EhInputSection::EhInputSection(ObjFile<ELF32BE> &,
|
|
|
|
|
const ELF32BE::Shdr &, StringRef);
|
|
|
|
|
template EhInputSection::EhInputSection(ObjFile<ELF64LE> &,
|
|
|
|
|
const ELF64LE::Shdr &, StringRef);
|
|
|
|
|
template EhInputSection::EhInputSection(ObjFile<ELF64BE> &,
|
|
|
|
|
const ELF64BE::Shdr &, StringRef);
|
2017-03-07 05:17:18 +08:00
|
|
|
|
|
|
|
|
|
template void EhInputSection::split<ELF32LE>();
|
|
|
|
|
template void EhInputSection::split<ELF32BE>();
|
|
|
|
|
template void EhInputSection::split<ELF64LE>();
|
|
|
|
|
template void EhInputSection::split<ELF64BE>();
|