forked from OSchip/llvm-project
[lld-macho][nfc] Reduce size of icfEqClass hash
... from a `uint64_t` to a `uint32_t`. (LLD-ELF uses a `uint32_t` too.) About a 1.7% reduction in peak RSS when linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W Mac Pro, and no stat sig change in wall time. </Users/jezng/test2.sh ["before"]> </Users/jezng/test2.sh ["after"]> difference (95% CI) RSS 1003036672.000 ± 9891065.259 985539505.231 ± 10272748.749 [ -2.3% .. -1.2%] samples 27 26 base diff difference (95% CI) sys_time 1.277 ± 0.023 1.277 ± 0.024 [ -0.9% .. +0.9%] user_time 6.682 ± 0.046 6.598 ± 0.043 [ -1.6% .. -0.9%] wall_time 5.904 ± 0.062 5.895 ± 0.063 [ -0.7% .. +0.4%] samples 46 28 No appreciable change (~0.01%) in number of `equals` comparisons either: Before: ld64.lld: ICF needed 8 iterations ld64.lld: equalsConstant() called 701643 times ld64.lld: equalsVariable() called 3438526 times After: ld64.lld: ICF needed 8 iterations ld64.lld: equalsConstant() called 701729 times ld64.lld: equalsVariable() called 3438526 times Reviewed By: #lld-macho, MaskRay, thakis Differential Revision: https://reviews.llvm.org/D121052
This commit is contained in:
parent
112135e774
commit
ad1c32e9b3
|
@ -275,7 +275,7 @@ void ICF::run() {
|
||||||
// Into each origin-section hash, combine all reloc referent section hashes.
|
// Into each origin-section hash, combine all reloc referent section hashes.
|
||||||
for (icfPass = 0; icfPass < 2; ++icfPass) {
|
for (icfPass = 0; icfPass < 2; ++icfPass) {
|
||||||
parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
|
parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
|
||||||
uint64_t hash = isec->icfEqClass[icfPass % 2];
|
uint32_t hash = isec->icfEqClass[icfPass % 2];
|
||||||
for (const Reloc &r : isec->relocs) {
|
for (const Reloc &r : isec->relocs) {
|
||||||
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
|
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
|
||||||
if (auto *defined = dyn_cast<Defined>(sym)) {
|
if (auto *defined = dyn_cast<Defined>(sym)) {
|
||||||
|
@ -295,7 +295,7 @@ void ICF::run() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Set MSB to 1 to avoid collisions with non-hashed classes.
|
// Set MSB to 1 to avoid collisions with non-hashed classes.
|
||||||
isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
|
isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,7 +391,7 @@ void macho::foldIdenticalSections() {
|
||||||
assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
|
assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
|
||||||
// Turn-on the top bit to guarantee that valid hashes have no collisions
|
// Turn-on the top bit to guarantee that valid hashes have no collisions
|
||||||
// with the small-integer unique IDs for ICF-ineligible sections
|
// with the small-integer unique IDs for ICF-ineligible sections
|
||||||
isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 63);
|
isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31);
|
||||||
});
|
});
|
||||||
// Now that every input section is either hashed or marked as unique, run the
|
// Now that every input section is either hashed or marked as unique, run the
|
||||||
// segregation algorithm to detect foldable subsections.
|
// segregation algorithm to detect foldable subsections.
|
||||||
|
|
|
@ -30,7 +30,7 @@ using namespace lld::macho;
|
||||||
// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
|
// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
|
||||||
// so account for that.
|
// so account for that.
|
||||||
static_assert(sizeof(void *) != 8 ||
|
static_assert(sizeof(void *) != 8 ||
|
||||||
sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 96,
|
sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88,
|
||||||
"Try to minimize ConcatInputSection's size, we create many "
|
"Try to minimize ConcatInputSection's size, we create many "
|
||||||
"instances of it");
|
"instances of it");
|
||||||
|
|
||||||
|
|
|
@ -117,7 +117,7 @@ public:
|
||||||
// Points to the surviving section after this one is folded by ICF
|
// Points to the surviving section after this one is folded by ICF
|
||||||
ConcatInputSection *replacement = nullptr;
|
ConcatInputSection *replacement = nullptr;
|
||||||
// Equivalence-class ID for ICF
|
// Equivalence-class ID for ICF
|
||||||
uint64_t icfEqClass[2] = {0, 0};
|
uint32_t icfEqClass[2] = {0, 0};
|
||||||
|
|
||||||
// With subsections_via_symbols, most symbols have their own InputSection,
|
// With subsections_via_symbols, most symbols have their own InputSection,
|
||||||
// and for weak symbols (e.g. from inline functions), only the
|
// and for weak symbols (e.g. from inline functions), only the
|
||||||
|
|
Loading…
Reference in New Issue