forked from OSchip/llvm-project
Avoid doing binary search.
MergedInputSection::getOffset is the busiest function in LLD if string merging is enabled and input files have lots of mergeable sections. It is usually the case when creating executable with debug info, so it is pretty common. The reason why it is slow is because it has to do faily complex computations. For non-mergeable sections, section contents are contiguous in output, so in order to compute an output offset, we only have to add the output section's base address to an input offset. But for mergeable strings, section contents are split for merging, so they are not contigous. We've got to do some lookups. We used to do binary search on the list of section pieces. It is slow because I think it's hostile to branch prediction. This patch replaces it with hash table lookup. Seems it's working pretty well. Below is "perf stat -r10" output when linking clang with debug info. In this case this patch speeds up about 4%. Before: 6584.153205 task-clock (msec) # 1.001 CPUs utilized ( +- 0.09% ) 238 context-switches # 0.036 K/sec ( +- 6.59% ) 0 cpu-migrations # 0.000 K/sec ( +- 50.92% ) 1,067,675 page-faults # 0.162 M/sec ( +- 0.15% ) 18,369,931,470 cycles # 2.790 GHz ( +- 0.09% ) 9,640,680,143 stalled-cycles-frontend # 52.48% frontend cycles idle ( +- 0.18% ) <not supported> stalled-cycles-backend 21,206,747,787 instructions # 1.15 insns per cycle # 0.45 stalled cycles per insn ( +- 0.04% ) 3,817,398,032 branches # 579.786 M/sec ( +- 0.04% ) 132,787,249 branch-misses # 3.48% of all branches ( +- 0.02% ) 6.579106511 seconds time elapsed ( +- 0.09% ) After: 6312.317533 task-clock (msec) # 1.001 CPUs utilized ( +- 0.19% ) 221 context-switches # 0.035 K/sec ( +- 4.11% ) 1 cpu-migrations # 0.000 K/sec ( +- 45.21% ) 1,280,775 page-faults # 0.203 M/sec ( +- 0.37% ) 17,611,539,150 cycles # 2.790 GHz ( +- 0.19% ) 10,285,148,569 stalled-cycles-frontend # 58.40% frontend cycles idle ( +- 0.30% ) <not supported> stalled-cycles-backend 18,794,779,900 instructions # 1.07 insns per cycle # 0.55 stalled cycles per insn ( +- 0.03% ) 3,287,450,865 branches # 520.799 M/sec ( +- 0.03% ) 72,259,605 branch-misses # 2.20% of all branches ( +- 0.01% ) 6.307411828 seconds time elapsed ( +- 0.19% ) Differential Revision: http://reviews.llvm.org/D20645 llvm-svn: 270999
This commit is contained in:
parent
07c8654284
commit
406b469de4
|
@ -513,6 +513,7 @@ bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
|
|||
return S->SectionKind == InputSectionBase<ELFT>::Merge;
|
||||
}
|
||||
|
||||
// Do binary search to get a section piece at a given input offset.
|
||||
template <class ELFT>
|
||||
SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
|
||||
ArrayRef<uint8_t> D = this->getSectionData();
|
||||
|
@ -529,23 +530,40 @@ SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
|
|||
return &*I;
|
||||
}
|
||||
|
||||
// Returns the offset in an output section for a given input offset.
|
||||
// Because contents of a mergeable section is not contiguous in output,
|
||||
// it is not just an addition to a base output offset.
|
||||
template <class ELFT>
|
||||
typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) {
|
||||
auto It = OffsetMap.find(Offset);
|
||||
if (It != OffsetMap.end())
|
||||
return It->second;
|
||||
|
||||
// If Offset is not at beginning of a section piece, it is not in the map.
|
||||
// In that case we need to search from the original section piece vector.
|
||||
SectionPiece &Piece = *this->getSectionPiece(Offset);
|
||||
assert(Piece.Live);
|
||||
|
||||
// Compute the Addend and if the Base is cached, return.
|
||||
uintX_t Addend = Offset - Piece.InputOff;
|
||||
if (Piece.OutputOff != size_t(-1))
|
||||
return Piece.OutputOff + Addend;
|
||||
uintX_t Ret = Piece.OutputOff + Addend;
|
||||
return Ret;
|
||||
}
|
||||
|
||||
// Map the base to the offset in the output section and cache it.
|
||||
ArrayRef<uint8_t> D = this->getSectionData();
|
||||
StringRef Data((const char *)D.data(), D.size());
|
||||
StringRef Entry = Data.substr(Piece.InputOff, Piece.size());
|
||||
auto *MOS = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
|
||||
Piece.OutputOff = MOS->getOffset(Entry);
|
||||
return Piece.OutputOff + Addend;
|
||||
// Create a map from input offsets to output offsets for all section pieces.
|
||||
// It is called after finalize().
|
||||
template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() {
|
||||
OffsetMap.grow(this->Pieces.size());
|
||||
for (SectionPiece &Piece : this->Pieces) {
|
||||
if (!Piece.Live)
|
||||
continue;
|
||||
if (Piece.OutputOff == size_t(-1)) {
|
||||
// Offsets of tail-merged strings are computed lazily.
|
||||
auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
|
||||
ArrayRef<uint8_t> D = Piece.data();
|
||||
StringRef S((const char *)D.data(), D.size());
|
||||
Piece.OutputOff = OutSec->getOffset(S);
|
||||
}
|
||||
OffsetMap[Piece.InputOff] = Piece.OutputOff;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
|
|
|
@ -145,7 +145,10 @@ public:
|
|||
// in the output section.
|
||||
uintX_t getOffset(uintX_t Offset);
|
||||
|
||||
void finalizePieces();
|
||||
|
||||
private:
|
||||
llvm::DenseMap<uintX_t, uintX_t> OffsetMap;
|
||||
llvm::DenseSet<uintX_t> LiveOffsets;
|
||||
};
|
||||
|
||||
|
|
|
@ -1164,6 +1164,7 @@ void MergeOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) {
|
|||
Sec->OutSec = this;
|
||||
this->updateAlign(Sec->Align);
|
||||
this->Header.sh_entsize = Sec->getSectionHdr()->sh_entsize;
|
||||
Sections.push_back(Sec);
|
||||
|
||||
bool IsString = this->Header.sh_flags & SHF_STRINGS;
|
||||
|
||||
|
@ -1191,6 +1192,11 @@ template <class ELFT> void MergeOutputSection<ELFT>::finalize() {
|
|||
this->Header.sh_size = Builder.getSize();
|
||||
}
|
||||
|
||||
template <class ELFT> void MergeOutputSection<ELFT>::finalizePieces() {
|
||||
for (MergeInputSection<ELFT> *Sec : Sections)
|
||||
Sec->finalizePieces();
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic)
|
||||
: OutputSectionBase<ELFT>(Name, SHT_STRTAB,
|
||||
|
|
|
@ -93,6 +93,7 @@ public:
|
|||
bool PageAlign = false;
|
||||
|
||||
virtual void finalize() {}
|
||||
virtual void finalizePieces() {}
|
||||
virtual void
|
||||
forEachInputSection(std::function<void(InputSectionBase<ELFT> *)> F) {}
|
||||
virtual void writeTo(uint8_t *Buf) {}
|
||||
|
@ -320,10 +321,12 @@ public:
|
|||
void writeTo(uint8_t *Buf) override;
|
||||
unsigned getOffset(StringRef Val);
|
||||
void finalize() override;
|
||||
void finalizePieces() override;
|
||||
bool shouldTailMerge() const;
|
||||
|
||||
private:
|
||||
llvm::StringTableBuilder Builder;
|
||||
std::vector<MergeInputSection<ELFT> *> Sections;
|
||||
};
|
||||
|
||||
struct CieRecord {
|
||||
|
|
|
@ -873,6 +873,11 @@ template <class ELFT> void Writer<ELFT>::createSections() {
|
|||
|
||||
if (isOutputDynamic())
|
||||
Out<ELFT>::Dynamic->finalize();
|
||||
|
||||
// Now that all output offsets are fixed. Finalize mergeable sections
|
||||
// to fix their maps from input offsets to output offsets.
|
||||
for (OutputSectionBase<ELFT> *Sec : OutputSections)
|
||||
Sec->finalizePieces();
|
||||
}
|
||||
|
||||
template <class ELFT> bool Writer<ELFT>::needsGot() {
|
||||
|
|
Loading…
Reference in New Issue