2015-06-25 04:40:03 +08:00
|
|
|
//===- ICF.cpp ------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Linker
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2016-12-02 16:03:58 +08:00
|
|
|
// ICF is short for Identical Code Folding. That is a size optimization to
|
|
|
|
// identify and merge two or more read-only sections (typically functions)
|
|
|
|
// that happened to have the same contents. It usually reduces output size
|
|
|
|
// by a few percent.
|
2015-09-11 12:29:03 +08:00
|
|
|
//
|
2016-12-02 16:03:58 +08:00
|
|
|
// On Windows, ICF is enabled by default.
|
2015-09-11 12:29:03 +08:00
|
|
|
//
|
2016-12-02 16:03:58 +08:00
|
|
|
// See ELF/ICF.cpp for the details about the algortihm.
|
2015-06-25 04:40:03 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "Chunks.h"
|
2016-12-02 16:03:58 +08:00
|
|
|
#include "Error.h"
|
2015-07-31 06:57:21 +08:00
|
|
|
#include "Symbols.h"
|
2015-09-19 05:06:34 +08:00
|
|
|
#include "lld/Core/Parallel.h"
|
2015-07-31 06:57:21 +08:00
|
|
|
#include "llvm/ADT/Hashing.h"
|
2015-09-11 12:29:03 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <algorithm>
|
2015-09-19 06:31:15 +08:00
|
|
|
#include <atomic>
|
2015-06-25 04:40:03 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2015-07-31 06:57:21 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2015-06-25 04:40:03 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
2015-09-11 12:29:03 +08:00
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
class ICF {
|
|
|
|
public:
|
2015-09-19 05:17:44 +08:00
|
|
|
void run(const std::vector<Chunk *> &V);
|
2015-09-16 22:19:10 +08:00
|
|
|
|
|
|
|
private:
|
2016-12-02 16:03:58 +08:00
|
|
|
void segregate(size_t Begin, size_t End, bool Constant);
|
2015-09-16 22:19:10 +08:00
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
|
|
|
|
bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
|
2015-09-16 22:19:10 +08:00
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
uint32_t getHash(SectionChunk *C);
|
|
|
|
bool isEligible(SectionChunk *C);
|
|
|
|
|
|
|
|
size_t findBoundary(size_t Begin, size_t End);
|
|
|
|
|
|
|
|
void forEachColorRange(size_t Begin, size_t End,
|
|
|
|
std::function<void(size_t, size_t)> Fn);
|
|
|
|
|
|
|
|
void forEachColor(std::function<void(size_t, size_t)> Fn);
|
|
|
|
|
|
|
|
std::vector<SectionChunk *> Chunks;
|
|
|
|
int Cnt = 0;
|
|
|
|
std::atomic<uint32_t> NextId = {1};
|
|
|
|
std::atomic<bool> Repeat = {false};
|
|
|
|
};
|
2015-07-31 06:57:21 +08:00
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
// Returns a hash value for S.
|
|
|
|
uint32_t ICF::getHash(SectionChunk *C) {
|
2015-09-16 22:19:10 +08:00
|
|
|
return hash_combine(C->getPermissions(),
|
|
|
|
hash_value(C->SectionName),
|
|
|
|
C->NumRelocs,
|
2015-09-26 00:50:12 +08:00
|
|
|
C->getAlign(),
|
2015-09-16 22:19:10 +08:00
|
|
|
uint32_t(C->Header->SizeOfRawData),
|
|
|
|
C->Checksum);
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
2015-09-11 12:29:03 +08:00
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
// Returns true if section S is subject of ICF.
|
|
|
|
bool ICF::isEligible(SectionChunk *C) {
|
|
|
|
bool Global = C->Sym && C->Sym->isExternal();
|
|
|
|
bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
|
|
|
|
return C->isCOMDAT() && C->isLive() && Global && !Writable;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Split a range into smaller ranges by recoloring sections
|
|
|
|
void ICF::segregate(size_t Begin, size_t End, bool Constant) {
|
|
|
|
while (Begin < End) {
|
|
|
|
// Divide [Begin, End) into two. Let Mid be the start index of the
|
|
|
|
// second group.
|
|
|
|
auto Bound = std::stable_partition(
|
|
|
|
Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) {
|
|
|
|
if (Constant)
|
|
|
|
return equalsConstant(Chunks[Begin], S);
|
|
|
|
return equalsVariable(Chunks[Begin], S);
|
|
|
|
});
|
|
|
|
size_t Mid = Bound - Chunks.begin();
|
|
|
|
|
|
|
|
// Split [Begin, End) into [Begin, Mid) and [Mid, End).
|
|
|
|
uint32_t Id = NextId++;
|
|
|
|
for (size_t I = Begin; I < Mid; ++I)
|
|
|
|
Chunks[I]->Color[(Cnt + 1) % 2] = Id;
|
|
|
|
|
|
|
|
// If we created a group, we need to iterate the main loop again.
|
|
|
|
if (Mid != End)
|
|
|
|
Repeat = true;
|
|
|
|
|
|
|
|
Begin = Mid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare "non-moving" part of two sections, namely everything
|
|
|
|
// except relocation targets.
|
2015-09-16 22:19:10 +08:00
|
|
|
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
|
2016-12-02 16:03:58 +08:00
|
|
|
if (A->NumRelocs != B->NumRelocs)
|
2015-07-31 06:57:21 +08:00
|
|
|
return false;
|
|
|
|
|
COFF: Optimize ICF by comparing relocations before section contents.
equalsConstants() is the heaviest function in ICF, and that consumes
more than half of total ICF execution time. Of which, section content
comparison accounts for roughly one third.
Previously, we compared section contents at the beginning of the
function after comparing their checksums. The comparison is very
likely to succeed because when the control reaches that comparison,
their checksums are always equal. And because checksums are 64-bit
CRC, they are unlikely to collide.
We compared relocations and associative sections after that.
If they are different, the time we spent on byte-by-byte comparison
of section contents were wasted.
This patch moves the comparison at the end of function. If the
comparison fails, the time we spent on relocation comparison are
wasted, but as I wrote it's very unlikely to happen.
LLD took 1198 ms to link itself to produce a 27.11 MB executable.
Of which, ICF accounted for 536 ms. This patch cuts it by 90 ms,
which is 17% speedup of ICF and 7.5% speedup overall. All numbers
are median of ten runs.
llvm-svn: 247961
2015-09-18 09:30:56 +08:00
|
|
|
// Compare relocations.
|
2015-07-31 06:57:21 +08:00
|
|
|
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
|
2015-09-16 11:26:31 +08:00
|
|
|
if (R1.Type != R2.Type ||
|
|
|
|
R1.VirtualAddress != R2.VirtualAddress) {
|
2015-07-31 06:57:21 +08:00
|
|
|
return false;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
2016-12-10 05:55:24 +08:00
|
|
|
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
|
|
|
|
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
|
2015-07-31 06:57:21 +08:00
|
|
|
if (B1 == B2)
|
|
|
|
return true;
|
2015-09-18 10:40:54 +08:00
|
|
|
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
|
|
|
|
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
|
|
|
|
return D1->getValue() == D2->getValue() &&
|
2016-12-02 16:03:58 +08:00
|
|
|
D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2];
|
2015-09-18 10:40:54 +08:00
|
|
|
return false;
|
2015-07-31 06:57:21 +08:00
|
|
|
};
|
COFF: Optimize ICF by comparing relocations before section contents.
equalsConstants() is the heaviest function in ICF, and that consumes
more than half of total ICF execution time. Of which, section content
comparison accounts for roughly one third.
Previously, we compared section contents at the beginning of the
function after comparing their checksums. The comparison is very
likely to succeed because when the control reaches that comparison,
their checksums are always equal. And because checksums are 64-bit
CRC, they are unlikely to collide.
We compared relocations and associative sections after that.
If they are different, the time we spent on byte-by-byte comparison
of section contents were wasted.
This patch moves the comparison at the end of function. If the
comparison fails, the time we spent on relocation comparison are
wasted, but as I wrote it's very unlikely to happen.
LLD took 1198 ms to link itself to produce a 27.11 MB executable.
Of which, ICF accounted for 536 ms. This patch cuts it by 90 ms,
which is 17% speedup of ICF and 7.5% speedup overall. All numbers
are median of ten runs.
llvm-svn: 247961
2015-09-18 09:30:56 +08:00
|
|
|
if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
|
|
|
|
return false;
|
|
|
|
|
2015-09-18 10:40:54 +08:00
|
|
|
// Compare section attributes and contents.
|
|
|
|
return A->getPermissions() == B->getPermissions() &&
|
|
|
|
A->SectionName == B->SectionName &&
|
2015-09-26 00:50:12 +08:00
|
|
|
A->getAlign() == B->getAlign() &&
|
2015-09-18 10:40:54 +08:00
|
|
|
A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
|
|
|
|
A->Checksum == B->Checksum &&
|
|
|
|
A->getContents() == B->getContents();
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
// Compare "moving" part of two sections, namely relocation targets.
|
2015-09-16 22:19:10 +08:00
|
|
|
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
|
2015-09-18 09:51:37 +08:00
|
|
|
// Compare relocations.
|
|
|
|
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
|
2016-12-10 05:55:24 +08:00
|
|
|
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
|
|
|
|
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
|
2015-09-21 04:19:12 +08:00
|
|
|
if (B1 == B2)
|
|
|
|
return true;
|
|
|
|
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
|
2015-09-19 05:17:44 +08:00
|
|
|
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
|
2016-12-02 16:03:58 +08:00
|
|
|
return D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2];
|
2015-09-19 05:17:44 +08:00
|
|
|
return false;
|
2015-09-18 09:51:37 +08:00
|
|
|
};
|
|
|
|
return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
size_t ICF::findBoundary(size_t Begin, size_t End) {
|
|
|
|
for (size_t I = Begin + 1; I < End; ++I)
|
|
|
|
if (Chunks[Begin]->Color[Cnt % 2] != Chunks[I]->Color[Cnt % 2])
|
|
|
|
return I;
|
|
|
|
return End;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ICF::forEachColorRange(size_t Begin, size_t End,
|
|
|
|
std::function<void(size_t, size_t)> Fn) {
|
|
|
|
if (Begin > 0)
|
|
|
|
Begin = findBoundary(Begin - 1, End);
|
|
|
|
|
|
|
|
while (Begin < End) {
|
|
|
|
size_t Mid = findBoundary(Begin, Chunks.size());
|
|
|
|
Fn(Begin, Mid);
|
|
|
|
Begin = Mid;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
// Call Fn on each color group.
|
|
|
|
void ICF::forEachColor(std::function<void(size_t, size_t)> Fn) {
|
|
|
|
// If the number of sections are too small to use threading,
|
|
|
|
// call Fn sequentially.
|
|
|
|
if (Chunks.size() < 1024) {
|
|
|
|
forEachColorRange(0, Chunks.size(), Fn);
|
|
|
|
return;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
2016-12-02 16:03:58 +08:00
|
|
|
|
|
|
|
// Split sections into 256 shards and call Fn in parallel.
|
|
|
|
size_t NumShards = 256;
|
|
|
|
size_t Step = Chunks.size() / NumShards;
|
|
|
|
parallel_for(size_t(0), NumShards, [&](size_t I) {
|
|
|
|
forEachColorRange(I * Step, (I + 1) * Step, Fn);
|
|
|
|
});
|
|
|
|
forEachColorRange(Step * NumShards, Chunks.size(), Fn);
|
2015-07-31 06:57:21 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:03 +08:00
|
|
|
// Merge identical COMDAT sections.
|
2015-09-05 05:35:54 +08:00
|
|
|
// Two sections are considered the same if their section headers,
|
2015-06-25 04:40:03 +08:00
|
|
|
// contents and relocations are all the same.
|
2015-09-19 05:17:44 +08:00
|
|
|
void ICF::run(const std::vector<Chunk *> &Vec) {
|
2015-09-16 22:19:10 +08:00
|
|
|
// Collect only mergeable sections and group by hash value.
|
2015-09-19 05:17:44 +08:00
|
|
|
for (Chunk *C : Vec) {
|
2016-12-02 16:03:58 +08:00
|
|
|
auto *SC = dyn_cast<SectionChunk>(C);
|
|
|
|
if (!SC)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (isEligible(SC)) {
|
|
|
|
// Set MSB to 1 to avoid collisions with non-hash colors.
|
|
|
|
SC->Color[0] = getHash(SC) | (1 << 31);
|
|
|
|
Chunks.push_back(SC);
|
|
|
|
} else {
|
|
|
|
SC->Color[0] = NextId++;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
}
|
2015-09-05 05:35:54 +08:00
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
if (Chunks.empty())
|
|
|
|
return;
|
|
|
|
|
2015-09-19 05:06:34 +08:00
|
|
|
// From now on, sections in Chunks are ordered so that sections in
|
2015-09-16 22:19:10 +08:00
|
|
|
// the same group are consecutive in the vector.
|
2016-12-02 16:03:58 +08:00
|
|
|
std::stable_sort(Chunks.begin(), Chunks.end(),
|
|
|
|
[](SectionChunk *A, SectionChunk *B) {
|
|
|
|
return A->Color[0] < B->Color[0];
|
|
|
|
});
|
|
|
|
|
|
|
|
// Compare static contents and assign unique IDs for each static content.
|
|
|
|
forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
|
|
|
|
++Cnt;
|
|
|
|
|
|
|
|
// Split groups by comparing relocations until convergence is obtained.
|
|
|
|
do {
|
|
|
|
Repeat = false;
|
|
|
|
forEachColor(
|
|
|
|
[&](size_t Begin, size_t End) { segregate(Begin, End, false); });
|
2015-09-17 07:55:39 +08:00
|
|
|
++Cnt;
|
2016-12-02 16:03:58 +08:00
|
|
|
} while (Repeat);
|
|
|
|
|
2015-09-17 07:55:39 +08:00
|
|
|
if (Config->Verbose)
|
2016-12-02 16:03:58 +08:00
|
|
|
outs() << "\nICF needed " << Cnt << " iterations\n";
|
|
|
|
|
|
|
|
// Merge sections in the same colors.
|
|
|
|
forEachColor([&](size_t Begin, size_t End) {
|
|
|
|
if (End - Begin == 1)
|
|
|
|
return;
|
|
|
|
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
if (Config->Verbose)
|
2016-12-02 16:03:58 +08:00
|
|
|
outs() << "Selected " << Chunks[Begin]->getDebugName() << "\n";
|
|
|
|
for (size_t I = Begin + 1; I < End; ++I) {
|
2015-09-16 11:26:31 +08:00
|
|
|
if (Config->Verbose)
|
2016-12-02 16:03:58 +08:00
|
|
|
outs() << " Removed " << Chunks[I]->getDebugName() << "\n";
|
|
|
|
Chunks[Begin]->replace(Chunks[I]);
|
2015-09-11 12:29:03 +08:00
|
|
|
}
|
2016-12-02 16:03:58 +08:00
|
|
|
});
|
2015-06-25 04:40:03 +08:00
|
|
|
}
|
|
|
|
|
2016-12-02 16:03:58 +08:00
|
|
|
// Entry point to ICF.
|
|
|
|
void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); }
|
|
|
|
|
2015-06-25 04:40:03 +08:00
|
|
|
} // namespace coff
|
|
|
|
} // namespace lld
|