2015-06-25 04:40:03 +08:00
|
|
|
//===- ICF.cpp ------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Linker
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2015-09-11 12:29:03 +08:00
|
|
|
// Identical COMDAT Folding is a feature to merge COMDAT sections not by
|
|
|
|
// name (which is regular COMDAT handling) but by contents. If two COMDAT
|
|
|
|
// sections have the same data, relocations, attributes, etc., then the two
|
|
|
|
// are considered identical and merged by the linker. This optimization
|
|
|
|
// makes outputs smaller.
|
|
|
|
//
|
|
|
|
// ICF is theoretically a problem of reducing graphs by merging as many
|
2015-09-16 11:26:31 +08:00
|
|
|
// identical subgraphs as possible, if we consider sections as vertices and
|
2015-09-11 12:29:03 +08:00
|
|
|
// relocations as edges. This may be a bit more complicated problem than you
|
|
|
|
// might think. The order of processing sections matters since merging two
|
2015-09-15 08:35:41 +08:00
|
|
|
// sections can make other sections, whose relocations now point to the same
|
2015-09-11 12:29:03 +08:00
|
|
|
// section, mergeable. Graphs may contain cycles, which is common in COFF.
|
|
|
|
// We need a sophisticated algorithm to do this properly and efficiently.
|
|
|
|
//
|
2015-09-16 11:26:31 +08:00
|
|
|
// What we do in this file is this. We split sections into groups. Sections
|
|
|
|
// in the same group are considered identical.
|
2015-09-11 12:29:03 +08:00
|
|
|
//
|
2015-09-16 11:26:31 +08:00
|
|
|
// First, all sections are grouped by their "constant" values. Constant
|
2015-09-16 22:19:10 +08:00
|
|
|
// values are values that are never changed by ICF, such as section contents,
|
2015-09-16 11:26:31 +08:00
|
|
|
// section name, number of relocations, type and offset of each relocation,
|
2015-09-16 22:19:10 +08:00
|
|
|
// etc. Because we do not care about some relocation targets in this step,
|
|
|
|
// two sections in the same group may not be identical, but at least two
|
|
|
|
// sections in different groups can never be identical.
|
2015-09-16 11:26:31 +08:00
|
|
|
//
|
|
|
|
// Then, we try to split each group by relocation targets. Relocations are
|
|
|
|
// considered identical if and only if the relocation targets are in the
|
|
|
|
// same group. Splitting a group may make more groups to be splittable,
|
|
|
|
// because two relocations that were previously considered identical might
|
|
|
|
// now point to different groups. We repeat this step until the convergence
|
|
|
|
// is obtained.
|
|
|
|
//
|
|
|
|
// This algorithm is so-called "optimistic" algorithm described in
|
|
|
|
// http://research.google.com/pubs/pub36912.html.
|
2015-06-25 04:40:03 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "Chunks.h"
|
2015-07-31 06:57:21 +08:00
|
|
|
#include "Symbols.h"
|
2015-09-19 05:06:34 +08:00
|
|
|
#include "lld/Core/Parallel.h"
|
2015-07-31 06:57:21 +08:00
|
|
|
#include "llvm/ADT/Hashing.h"
|
2015-09-11 12:29:03 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <algorithm>
|
2015-09-19 06:31:15 +08:00
|
|
|
#include <atomic>
|
2015-06-25 04:40:03 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2015-07-31 06:57:21 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2015-06-25 04:40:03 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
2015-09-11 12:29:03 +08:00
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
typedef std::vector<SectionChunk *>::iterator ChunkIterator;
|
|
|
|
typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
|
|
|
|
|
|
|
|
class ICF {
|
|
|
|
public:
|
2015-09-19 05:17:44 +08:00
|
|
|
void run(const std::vector<Chunk *> &V);
|
2015-09-16 22:19:10 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
static uint64_t getHash(SectionChunk *C);
|
|
|
|
static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
|
|
|
|
static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
|
2015-09-19 05:17:44 +08:00
|
|
|
bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq);
|
2016-02-26 02:49:11 +08:00
|
|
|
bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
|
2015-09-16 22:19:10 +08:00
|
|
|
|
2015-09-20 09:44:44 +08:00
|
|
|
std::atomic<uint64_t> NextID = { 1 };
|
2015-09-16 22:19:10 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Entry point to ICF.
|
|
|
|
void doICF(const std::vector<Chunk *> &Chunks) {
|
2015-09-19 05:17:44 +08:00
|
|
|
ICF().run(Chunks);
|
2015-07-31 06:57:21 +08:00
|
|
|
}
|
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
uint64_t ICF::getHash(SectionChunk *C) {
|
|
|
|
return hash_combine(C->getPermissions(),
|
|
|
|
hash_value(C->SectionName),
|
|
|
|
C->NumRelocs,
|
2015-09-26 00:50:12 +08:00
|
|
|
C->getAlign(),
|
2015-09-16 22:19:10 +08:00
|
|
|
uint32_t(C->Header->SizeOfRawData),
|
|
|
|
C->Checksum);
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
2015-09-11 12:29:03 +08:00
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
|
2015-09-18 10:40:54 +08:00
|
|
|
if (A->AssocChildren.size() != B->AssocChildren.size() ||
|
|
|
|
A->NumRelocs != B->NumRelocs) {
|
2015-07-31 06:57:21 +08:00
|
|
|
return false;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
2015-07-31 06:57:21 +08:00
|
|
|
|
COFF: Optimize ICF by comparing relocations before section contents.
equalsConstants() is the heaviest function in ICF, and that consumes
more than half of total ICF execution time. Of which, section content
comparison accounts for roughly one third.
Previously, we compared section contents at the beginning of the
function after comparing their checksums. The comparison is very
likely to succeed because when the control reaches that comparison,
their checksums are always equal. And because checksums are 64-bit
CRC, they are unlikely to collide.
We compared relocations and associative sections after that.
If they are different, the time we spent on byte-by-byte comparison
of section contents were wasted.
This patch moves the comparison at the end of function. If the
comparison fails, the time we spent on relocation comparison are
wasted, but as I wrote it's very unlikely to happen.
LLD took 1198 ms to link itself to produce a 27.11 MB executable.
Of which, ICF accounted for 536 ms. This patch cuts it by 90 ms,
which is 17% speedup of ICF and 7.5% speedup overall. All numbers
are median of ten runs.
llvm-svn: 247961
2015-09-18 09:30:56 +08:00
|
|
|
// Compare associative sections.
|
2015-09-16 11:26:31 +08:00
|
|
|
for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
|
|
|
|
if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
|
2015-07-31 06:57:21 +08:00
|
|
|
return false;
|
|
|
|
|
COFF: Optimize ICF by comparing relocations before section contents.
equalsConstants() is the heaviest function in ICF, and that consumes
more than half of total ICF execution time. Of which, section content
comparison accounts for roughly one third.
Previously, we compared section contents at the beginning of the
function after comparing their checksums. The comparison is very
likely to succeed because when the control reaches that comparison,
their checksums are always equal. And because checksums are 64-bit
CRC, they are unlikely to collide.
We compared relocations and associative sections after that.
If they are different, the time we spent on byte-by-byte comparison
of section contents were wasted.
This patch moves the comparison at the end of function. If the
comparison fails, the time we spent on relocation comparison are
wasted, but as I wrote it's very unlikely to happen.
LLD took 1198 ms to link itself to produce a 27.11 MB executable.
Of which, ICF accounted for 536 ms. This patch cuts it by 90 ms,
which is 17% speedup of ICF and 7.5% speedup overall. All numbers
are median of ten runs.
llvm-svn: 247961
2015-09-18 09:30:56 +08:00
|
|
|
// Compare relocations.
|
2015-07-31 06:57:21 +08:00
|
|
|
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
|
2015-09-16 11:26:31 +08:00
|
|
|
if (R1.Type != R2.Type ||
|
|
|
|
R1.VirtualAddress != R2.VirtualAddress) {
|
2015-07-31 06:57:21 +08:00
|
|
|
return false;
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
|
|
|
|
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
|
2015-07-31 06:57:21 +08:00
|
|
|
if (B1 == B2)
|
|
|
|
return true;
|
2015-09-18 10:40:54 +08:00
|
|
|
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
|
|
|
|
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
|
|
|
|
return D1->getValue() == D2->getValue() &&
|
|
|
|
D1->getChunk()->GroupID == D2->getChunk()->GroupID;
|
|
|
|
return false;
|
2015-07-31 06:57:21 +08:00
|
|
|
};
|
COFF: Optimize ICF by comparing relocations before section contents.
equalsConstants() is the heaviest function in ICF, and that consumes
more than half of total ICF execution time. Of which, section content
comparison accounts for roughly one third.
Previously, we compared section contents at the beginning of the
function after comparing their checksums. The comparison is very
likely to succeed because when the control reaches that comparison,
their checksums are always equal. And because checksums are 64-bit
CRC, they are unlikely to collide.
We compared relocations and associative sections after that.
If they are different, the time we spent on byte-by-byte comparison
of section contents were wasted.
This patch moves the comparison at the end of function. If the
comparison fails, the time we spent on relocation comparison are
wasted, but as I wrote it's very unlikely to happen.
LLD took 1198 ms to link itself to produce a 27.11 MB executable.
Of which, ICF accounted for 536 ms. This patch cuts it by 90 ms,
which is 17% speedup of ICF and 7.5% speedup overall. All numbers
are median of ten runs.
llvm-svn: 247961
2015-09-18 09:30:56 +08:00
|
|
|
if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
|
|
|
|
return false;
|
|
|
|
|
2015-09-18 10:40:54 +08:00
|
|
|
// Compare section attributes and contents.
|
|
|
|
return A->getPermissions() == B->getPermissions() &&
|
|
|
|
A->SectionName == B->SectionName &&
|
2015-09-26 00:50:12 +08:00
|
|
|
A->getAlign() == B->getAlign() &&
|
2015-09-18 10:40:54 +08:00
|
|
|
A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
|
|
|
|
A->Checksum == B->Checksum &&
|
|
|
|
A->getContents() == B->getContents();
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
|
2015-09-16 22:19:10 +08:00
|
|
|
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
|
2015-09-18 09:51:37 +08:00
|
|
|
// Compare associative sections.
|
|
|
|
for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
|
|
|
|
if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Compare relocations.
|
|
|
|
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
|
|
|
|
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
|
2015-09-21 04:19:12 +08:00
|
|
|
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
|
|
|
|
if (B1 == B2)
|
|
|
|
return true;
|
|
|
|
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
|
2015-09-19 05:17:44 +08:00
|
|
|
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
|
|
|
|
return D1->getChunk()->GroupID == D2->getChunk()->GroupID;
|
|
|
|
return false;
|
2015-09-18 09:51:37 +08:00
|
|
|
};
|
|
|
|
return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
|
2015-09-16 11:26:31 +08:00
|
|
|
}
|
|
|
|
|
2016-02-26 02:49:11 +08:00
|
|
|
bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
|
2015-09-16 11:26:31 +08:00
|
|
|
bool R = false;
|
|
|
|
for (auto It = Begin;;) {
|
|
|
|
SectionChunk *Head = *It;
|
|
|
|
auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) {
|
|
|
|
return Eq(Head, SC);
|
|
|
|
});
|
|
|
|
if (Bound == End)
|
|
|
|
return R;
|
2015-09-26 00:38:13 +08:00
|
|
|
uint64_t ID = NextID++;
|
2015-09-16 11:26:31 +08:00
|
|
|
std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; });
|
|
|
|
It = Bound;
|
|
|
|
R = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-19 05:17:44 +08:00
|
|
|
bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) {
|
2015-09-16 11:26:31 +08:00
|
|
|
bool R = false;
|
2015-09-19 05:17:44 +08:00
|
|
|
for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
|
2015-09-16 11:26:31 +08:00
|
|
|
SectionChunk *Head = *It;
|
|
|
|
auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
|
|
|
|
return SC->GroupID != Head->GroupID;
|
|
|
|
});
|
2016-02-26 02:49:11 +08:00
|
|
|
if (segregate(It, Bound, Eq))
|
2015-09-16 11:26:31 +08:00
|
|
|
R = true;
|
|
|
|
It = Bound;
|
|
|
|
}
|
|
|
|
return R;
|
2015-07-31 06:57:21 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:03 +08:00
|
|
|
// Merge identical COMDAT sections.
|
2015-09-05 05:35:54 +08:00
|
|
|
// Two sections are considered the same if their section headers,
|
2015-06-25 04:40:03 +08:00
|
|
|
// contents and relocations are all the same.
|
2015-09-19 05:17:44 +08:00
|
|
|
void ICF::run(const std::vector<Chunk *> &Vec) {
|
2015-09-16 22:19:10 +08:00
|
|
|
// Collect only mergeable sections and group by hash value.
|
2015-09-19 05:17:44 +08:00
|
|
|
parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) {
|
2015-09-16 11:26:31 +08:00
|
|
|
if (auto *SC = dyn_cast<SectionChunk>(C)) {
|
2015-12-03 10:23:33 +08:00
|
|
|
bool Global = SC->Sym && SC->Sym->isExternal();
|
2015-09-16 11:26:31 +08:00
|
|
|
bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
|
2015-12-03 10:23:33 +08:00
|
|
|
if (SC->isCOMDAT() && SC->isLive() && Global && !Writable)
|
2015-09-16 22:19:10 +08:00
|
|
|
SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
|
2015-09-19 05:06:34 +08:00
|
|
|
}
|
|
|
|
});
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
std::vector<SectionChunk *> Chunks;
|
2015-09-19 05:17:44 +08:00
|
|
|
for (Chunk *C : Vec) {
|
2015-09-19 05:06:34 +08:00
|
|
|
if (auto *SC = dyn_cast<SectionChunk>(C)) {
|
|
|
|
if (SC->GroupID) {
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
Chunks.push_back(SC);
|
2015-09-16 11:26:31 +08:00
|
|
|
} else {
|
|
|
|
SC->GroupID = NextID++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-09-05 05:35:54 +08:00
|
|
|
|
2015-09-19 05:06:34 +08:00
|
|
|
// From now on, sections in Chunks are ordered so that sections in
|
2015-09-16 22:19:10 +08:00
|
|
|
// the same group are consecutive in the vector.
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
std::sort(Chunks.begin(), Chunks.end(),
|
|
|
|
[](SectionChunk *A, SectionChunk *B) {
|
|
|
|
return A->GroupID < B->GroupID;
|
|
|
|
});
|
2015-09-16 11:26:31 +08:00
|
|
|
|
|
|
|
// Split groups until we get a convergence.
|
2015-09-17 07:55:39 +08:00
|
|
|
int Cnt = 1;
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
forEachGroup(Chunks, equalsConstant);
|
2015-09-19 05:06:34 +08:00
|
|
|
|
|
|
|
for (;;) {
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
if (!forEachGroup(Chunks, equalsVariable))
|
2015-09-19 05:06:34 +08:00
|
|
|
break;
|
2015-09-17 07:55:39 +08:00
|
|
|
++Cnt;
|
2015-09-19 05:06:34 +08:00
|
|
|
}
|
2015-09-17 07:55:39 +08:00
|
|
|
if (Config->Verbose)
|
|
|
|
llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
|
2015-09-16 11:26:31 +08:00
|
|
|
|
|
|
|
// Merge sections in the same group.
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
|
|
|
|
SectionChunk *Head = *It++;
|
|
|
|
auto Bound = std::find_if(It, End, [&](SectionChunk *SC) {
|
|
|
|
return Head->GroupID != SC->GroupID;
|
|
|
|
});
|
|
|
|
if (It == Bound)
|
|
|
|
continue;
|
|
|
|
if (Config->Verbose)
|
|
|
|
llvm::outs() << "Selected " << Head->getDebugName() << "\n";
|
|
|
|
while (It != Bound) {
|
|
|
|
SectionChunk *SC = *It++;
|
2015-09-16 11:26:31 +08:00
|
|
|
if (Config->Verbose)
|
COFF: De-parallelize ICF for now.
There was a threading issue in the ICF code for COFF. That seems like
a venign bug in the sense that it doesn't produce an incorrect output,
but it oftentimes misses reducible sections. As a result, mergeable
sections could remain in outputs, which makes the output nondeterministic.
Basically the algorithm we are using for ICF is this: We group sections
so that identical sections will eventually be in the same group. Initially,
all sections are in one group. We split the group by relocation targets
until we get a convergence (if relocation targets are in different gruops,
the sections are different). Once a group is split, they will never be
merged.
Each section has a group ID. That variable itself is atomic, so there's
no threading issue at the level that we can use thread sanitizer.
The point is, when we split a group, we re-assign new group IDs to group
of sections. That are multiple separate writes to atomic varaibles.
Thus, splitting a group is not an atomic operation, and there's a small
chance that the other thread observes inconsistent group IDs.
Over-splitting is always "safe", so it will never create incorrect output.
I suspect that the nondeterminism stems from that point. However, I
cannot prove or fix that at this moment, so I'm going to avoid using
threads here.
llvm-svn: 251300
2015-10-27 00:20:00 +08:00
|
|
|
llvm::outs() << " Removed " << SC->getDebugName() << "\n";
|
|
|
|
Head->replace(SC);
|
2015-09-11 12:29:03 +08:00
|
|
|
}
|
2015-09-05 05:35:54 +08:00
|
|
|
}
|
2015-06-25 04:40:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace coff
|
|
|
|
} // namespace lld
|