From d19265b31e65c84ad908abce6e7f6e6d15c22258 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 14 May 2020 19:23:01 +0200 Subject: [PATCH] [clangd] Avoid wasteful data structures in RefSlab::Builder Summary: This is worth another 10% or so on InedxBenchmark.DexBuild. Reviewers: kbobyrev Subscribers: ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D79950 --- clang-tools-extra/clangd/index/Ref.cpp | 40 +++++++++++-------- clang-tools-extra/clangd/index/Ref.h | 39 +++++++++++++++++- .../clangd/index/SymbolLocation.cpp | 16 +++----- .../clangd/index/SymbolLocation.h | 17 ++++---- 4 files changed, 77 insertions(+), 35 deletions(-) diff --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp index 115894ce6743..e622a3f78e42 100644 --- a/clang-tools-extra/clangd/index/Ref.cpp +++ b/clang-tools-extra/clangd/index/Ref.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Ref.h" +#include "llvm/ADT/STLExtras.h" namespace clang { namespace clangd { @@ -33,27 +34,34 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) { } void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) { - auto &M = Refs[ID]; - if (M.count(S)) - return; - Ref R = S; - R.Location.FileURI = - UniqueStrings.save(R.Location.FileURI).data(); - M.insert(std::move(R)); + Entry E = {ID, S}; + E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data(); + Entries.insert(std::move(E)); } RefSlab RefSlab::Builder::build() && { - // We can reuse the arena, as it only has unique strings and we need them all. - // Reallocate refs on the arena to reduce waste and indirections when reading. std::vector>> Result; - Result.reserve(Refs.size()); - size_t NumRefs = 0; - for (auto &Sym : Refs) { - std::vector SymRefs(Sym.second.begin(), Sym.second.end()); - NumRefs += SymRefs.size(); - Result.emplace_back(Sym.first, llvm::ArrayRef(SymRefs).copy(Arena)); + // We'll reuse the arena, as it only has unique strings and we need them all. + // We need to group refs by symbol and form contiguous arrays on the arena. + std::vector> Flat; + Flat.reserve(Entries.size()); + for (const Entry &E : Entries) + Flat.emplace_back(E.Symbol, &E.Reference); + // Group by SymbolID. + llvm::sort(Flat, llvm::less_first()); + std::vector Refs; + // Loop over symbols, copying refs for each onto the arena. + for (auto I = Flat.begin(), End = Flat.end(); I != End;) { + SymbolID Sym = I->first; + Refs.clear(); + do { + Refs.push_back(*I->second); + ++I; + } while (I != End && I->first == Sym); + llvm::sort(Refs); // By file, affects xrefs display order. + Result.emplace_back(Sym, llvm::ArrayRef(Refs).copy(Arena)); } - return RefSlab(std::move(Result), std::move(Arena), NumRefs); + return RefSlab(std::move(Result), std::move(Arena), Entries.size()); } } // namespace clangd diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h index 382ade712017..cba8adaee7c2 100644 --- a/clang-tools-extra/clangd/index/Ref.h +++ b/clang-tools-extra/clangd/index/Ref.h @@ -13,6 +13,8 @@ #include "SymbolLocation.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include @@ -132,9 +134,17 @@ public: RefSlab build() &&; private: + // A ref we're storing with its symbol to consume with build(). + // All strings are interned, so DenseMapInfo can use pointer comparisons. + struct Entry { + SymbolID Symbol; + Ref Reference; + }; + friend struct llvm::DenseMapInfo; + llvm::BumpPtrAllocator Arena; llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. - llvm::DenseMap> Refs; + llvm::DenseSet Entries; }; private: @@ -151,4 +161,31 @@ private: } // namespace clangd } // namespace clang +namespace llvm { +template <> struct DenseMapInfo { + using Entry = clang::clangd::RefSlab::Builder::Entry; + static inline Entry getEmptyKey() { + static Entry E{clang::clangd::SymbolID(""), {}}; + return E; + } + static inline Entry getTombstoneKey() { + static Entry E{clang::clangd::SymbolID("TOMBSTONE"), {}}; + return E; + } + static unsigned getHashValue(const Entry &Val) { + return llvm::hash_combine( + Val.Symbol, reinterpret_cast(Val.Reference.Location.FileURI), + Val.Reference.Location.Start.rep(), Val.Reference.Location.End.rep()); + } + static bool isEqual(const Entry &LHS, const Entry &RHS) { + return std::tie(LHS.Symbol, LHS.Reference.Location.FileURI, + LHS.Reference.Kind) == + std::tie(RHS.Symbol, RHS.Reference.Location.FileURI, + RHS.Reference.Kind) && + LHS.Reference.Location.Start == RHS.Reference.Location.Start && + LHS.Reference.Location.End == RHS.Reference.Location.End; + } +}; +}; // namespace llvm + #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H diff --git a/clang-tools-extra/clangd/index/SymbolLocation.cpp b/clang-tools-extra/clangd/index/SymbolLocation.cpp index aac55703c35f..61da267b93ce 100644 --- a/clang-tools-extra/clangd/index/SymbolLocation.cpp +++ b/clang-tools-extra/clangd/index/SymbolLocation.cpp @@ -15,18 +15,14 @@ constexpr uint32_t SymbolLocation::Position::MaxLine; constexpr uint32_t SymbolLocation::Position::MaxColumn; void SymbolLocation::Position::setLine(uint32_t L) { - if (L > MaxLine) { - Line = MaxLine; - return; - } - Line = L; + if (L > MaxLine) + L = MaxLine; + LineColumnPacked = (L << ColumnBits) | column(); } void SymbolLocation::Position::setColumn(uint32_t Col) { - if (Col > MaxColumn) { - Column = MaxColumn; - return; - } - Column = Col; + if (Col > MaxColumn) + Col = MaxColumn; + LineColumnPacked = (LineColumnPacked & ~MaxColumn) | Col; } llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolLocation &L) { diff --git a/clang-tools-extra/clangd/index/SymbolLocation.h b/clang-tools-extra/clangd/index/SymbolLocation.h index 9b503546a9f7..2dd33839115e 100644 --- a/clang-tools-extra/clangd/index/SymbolLocation.h +++ b/clang-tools-extra/clangd/index/SymbolLocation.h @@ -30,22 +30,23 @@ struct SymbolLocation { // Position is encoded into 32 bits to save space. // If Line/Column overflow, the value will be their maximum value. struct Position { - Position() : Line(0), Column(0) {} + Position() : LineColumnPacked(0) {} void setLine(uint32_t Line); - uint32_t line() const { return Line; } + uint32_t line() const { return LineColumnPacked >> ColumnBits; } void setColumn(uint32_t Column); - uint32_t column() const { return Column; } + uint32_t column() const { return LineColumnPacked & MaxColumn; } + uint32_t rep() const { return LineColumnPacked; } bool hasOverflow() const { - return Line >= MaxLine || Column >= MaxColumn; + return line() == MaxLine || column() == MaxColumn; } - static constexpr uint32_t MaxLine = (1 << 20) - 1; - static constexpr uint32_t MaxColumn = (1 << 12) - 1; + static constexpr unsigned ColumnBits = 12; + static constexpr uint32_t MaxLine = (1 << (32 - ColumnBits)) - 1; + static constexpr uint32_t MaxColumn = (1 << ColumnBits) - 1; private: - uint32_t Line : 20; // 0-based - uint32_t Column : 12; // 0-based + uint32_t LineColumnPacked; // Top 20 bit line, bottom 12 bits column. }; /// The symbol range, using half-open range [Start, End).