forked from OSchip/llvm-project
Inline initOffsetMap.
In the lld perf builder r328686 had a negative impact in stalled-cycles-frontend. Somehow that stat is not showing on my machine, but the attached patch shows an improvement on cache-misses, which is probably a reasonable proxy. My working theory is that given a large input the pieces vector is out of cache by the time initOffsetMap runs. Both finalizeContents implementation have a convenient location for initializing the OffsetMap, so this seems the best solution. llvm-svn: 329117
This commit is contained in:
parent
d506bf8e3d
commit
6cd7af51e1
|
@ -985,12 +985,6 @@ uint64_t MergeInputSection::getOffset(uint64_t Offset) const {
|
||||||
return Piece.OutputOff + Addend;
|
return Piece.OutputOff + Addend;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeInputSection::initOffsetMap() {
|
|
||||||
OffsetMap.reserve(Pieces.size());
|
|
||||||
for (size_t I = 0; I < Pieces.size(); ++I)
|
|
||||||
OffsetMap[Pieces[I].InputOff] = I;
|
|
||||||
}
|
|
||||||
|
|
||||||
template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &,
|
template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &,
|
||||||
StringRef);
|
StringRef);
|
||||||
template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &,
|
template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &,
|
||||||
|
|
|
@ -236,6 +236,7 @@ public:
|
||||||
// Splittable sections are handled as a sequence of data
|
// Splittable sections are handled as a sequence of data
|
||||||
// rather than a single large blob of data.
|
// rather than a single large blob of data.
|
||||||
std::vector<SectionPiece> Pieces;
|
std::vector<SectionPiece> Pieces;
|
||||||
|
llvm::DenseMap<uint32_t, uint32_t> OffsetMap;
|
||||||
|
|
||||||
// Returns I'th piece's data. This function is very hot when
|
// Returns I'th piece's data. This function is very hot when
|
||||||
// string merging is enabled, so we want to inline.
|
// string merging is enabled, so we want to inline.
|
||||||
|
@ -254,14 +255,11 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
SyntheticSection *getParent() const;
|
SyntheticSection *getParent() const;
|
||||||
void initOffsetMap();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void splitStrings(ArrayRef<uint8_t> A, size_t Size);
|
void splitStrings(ArrayRef<uint8_t> A, size_t Size);
|
||||||
void splitNonStrings(ArrayRef<uint8_t> A, size_t Size);
|
void splitNonStrings(ArrayRef<uint8_t> A, size_t Size);
|
||||||
|
|
||||||
llvm::DenseMap<uint32_t, uint32_t> OffsetMap;
|
|
||||||
|
|
||||||
llvm::DenseSet<uint32_t> LiveOffsets;
|
llvm::DenseSet<uint32_t> LiveOffsets;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2438,10 +2438,15 @@ void MergeTailSection::finalizeContents() {
|
||||||
// finalize() fixed tail-optimized strings, so we can now get
|
// finalize() fixed tail-optimized strings, so we can now get
|
||||||
// offsets of strings. Get an offset for each string and save it
|
// offsets of strings. Get an offset for each string and save it
|
||||||
// to a corresponding StringPiece for easy access.
|
// to a corresponding StringPiece for easy access.
|
||||||
for (MergeInputSection *Sec : Sections)
|
for (MergeInputSection *Sec : Sections) {
|
||||||
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
|
Sec->OffsetMap.reserve(Sec->Pieces.size());
|
||||||
if (Sec->Pieces[I].Live)
|
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
|
||||||
Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I));
|
SectionPiece &P = Sec->Pieces[I];
|
||||||
|
Sec->OffsetMap[P.InputOff] = I;
|
||||||
|
if (P.Live)
|
||||||
|
P.OutputOff = Builder.getOffset(Sec->getData(I));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeNoTailSection::writeTo(uint8_t *Buf) {
|
void MergeNoTailSection::writeTo(uint8_t *Buf) {
|
||||||
|
@ -2494,10 +2499,13 @@ void MergeNoTailSection::finalizeContents() {
|
||||||
// So far, section pieces have offsets from beginning of shards, but
|
// So far, section pieces have offsets from beginning of shards, but
|
||||||
// we want offsets from beginning of the whole section. Fix them.
|
// we want offsets from beginning of the whole section. Fix them.
|
||||||
parallelForEach(Sections, [&](MergeInputSection *Sec) {
|
parallelForEach(Sections, [&](MergeInputSection *Sec) {
|
||||||
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
|
Sec->OffsetMap.reserve(Sec->Pieces.size());
|
||||||
if (Sec->Pieces[I].Live)
|
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
|
||||||
Sec->Pieces[I].OutputOff +=
|
SectionPiece &P = Sec->Pieces[I];
|
||||||
ShardOffsets[getShardId(Sec->Pieces[I].Hash)];
|
Sec->OffsetMap[P.InputOff] = I;
|
||||||
|
if (P.Live)
|
||||||
|
P.OutputOff += ShardOffsets[getShardId(P.Hash)];
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2573,11 +2581,8 @@ void elf::mergeSections() {
|
||||||
}
|
}
|
||||||
(*I)->addSection(MS);
|
(*I)->addSection(MS);
|
||||||
}
|
}
|
||||||
for (auto *MS : MergeSections) {
|
for (auto *MS : MergeSections)
|
||||||
MS->finalizeContents();
|
MS->finalizeContents();
|
||||||
parallelForEach(MS->Sections,
|
|
||||||
[](MergeInputSection *Sec) { Sec->initOffsetMap(); });
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<InputSectionBase *> &V = InputSections;
|
std::vector<InputSectionBase *> &V = InputSections;
|
||||||
V.erase(std::remove(V.begin(), V.end(), nullptr), V.end());
|
V.erase(std::remove(V.begin(), V.end(), nullptr), V.end());
|
||||||
|
|
Loading…
Reference in New Issue