[clangd] Avoid wasteful data structures in RefSlab::Builder

Summary: This is worth another 10% or so on InedxBenchmark.DexBuild.

Reviewers: kbobyrev

Subscribers: ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79950
This commit is contained in:
Sam McCall 2020-05-14 19:23:01 +02:00
parent c9f63297e2
commit d19265b31e
4 changed files with 77 additions and 35 deletions

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Ref.h"
#include "llvm/ADT/STLExtras.h"
namespace clang {
namespace clangd {
@ -33,27 +34,34 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) {
}
void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
auto &M = Refs[ID];
if (M.count(S))
return;
Ref R = S;
R.Location.FileURI =
UniqueStrings.save(R.Location.FileURI).data();
M.insert(std::move(R));
Entry E = {ID, S};
E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data();
Entries.insert(std::move(E));
}
RefSlab RefSlab::Builder::build() && {
// We can reuse the arena, as it only has unique strings and we need them all.
// Reallocate refs on the arena to reduce waste and indirections when reading.
std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result;
Result.reserve(Refs.size());
size_t NumRefs = 0;
for (auto &Sym : Refs) {
std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
NumRefs += SymRefs.size();
Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
// We'll reuse the arena, as it only has unique strings and we need them all.
// We need to group refs by symbol and form contiguous arrays on the arena.
std::vector<std::pair<SymbolID, const Ref *>> Flat;
Flat.reserve(Entries.size());
for (const Entry &E : Entries)
Flat.emplace_back(E.Symbol, &E.Reference);
// Group by SymbolID.
llvm::sort(Flat, llvm::less_first());
std::vector<Ref> Refs;
// Loop over symbols, copying refs for each onto the arena.
for (auto I = Flat.begin(), End = Flat.end(); I != End;) {
SymbolID Sym = I->first;
Refs.clear();
do {
Refs.push_back(*I->second);
++I;
} while (I != End && I->first == Sym);
llvm::sort(Refs); // By file, affects xrefs display order.
Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena));
}
return RefSlab(std::move(Result), std::move(Arena), NumRefs);
return RefSlab(std::move(Result), std::move(Arena), Entries.size());
}
} // namespace clangd

View File

@ -13,6 +13,8 @@
#include "SymbolLocation.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
@ -132,9 +134,17 @@ public:
RefSlab build() &&;
private:
// A ref we're storing with its symbol to consume with build().
// All strings are interned, so DenseMapInfo can use pointer comparisons.
struct Entry {
SymbolID Symbol;
Ref Reference;
};
friend struct llvm::DenseMapInfo<Entry>;
llvm::BumpPtrAllocator Arena;
llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
llvm::DenseSet<Entry> Entries;
};
private:
@ -151,4 +161,31 @@ private:
} // namespace clangd
} // namespace clang
namespace llvm {
template <> struct DenseMapInfo<clang::clangd::RefSlab::Builder::Entry> {
using Entry = clang::clangd::RefSlab::Builder::Entry;
static inline Entry getEmptyKey() {
static Entry E{clang::clangd::SymbolID(""), {}};
return E;
}
static inline Entry getTombstoneKey() {
static Entry E{clang::clangd::SymbolID("TOMBSTONE"), {}};
return E;
}
static unsigned getHashValue(const Entry &Val) {
return llvm::hash_combine(
Val.Symbol, reinterpret_cast<uintptr_t>(Val.Reference.Location.FileURI),
Val.Reference.Location.Start.rep(), Val.Reference.Location.End.rep());
}
static bool isEqual(const Entry &LHS, const Entry &RHS) {
return std::tie(LHS.Symbol, LHS.Reference.Location.FileURI,
LHS.Reference.Kind) ==
std::tie(RHS.Symbol, RHS.Reference.Location.FileURI,
RHS.Reference.Kind) &&
LHS.Reference.Location.Start == RHS.Reference.Location.Start &&
LHS.Reference.Location.End == RHS.Reference.Location.End;
}
};
}; // namespace llvm
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H

View File

@ -15,18 +15,14 @@ constexpr uint32_t SymbolLocation::Position::MaxLine;
constexpr uint32_t SymbolLocation::Position::MaxColumn;
void SymbolLocation::Position::setLine(uint32_t L) {
if (L > MaxLine) {
Line = MaxLine;
return;
}
Line = L;
if (L > MaxLine)
L = MaxLine;
LineColumnPacked = (L << ColumnBits) | column();
}
void SymbolLocation::Position::setColumn(uint32_t Col) {
if (Col > MaxColumn) {
Column = MaxColumn;
return;
}
Column = Col;
if (Col > MaxColumn)
Col = MaxColumn;
LineColumnPacked = (LineColumnPacked & ~MaxColumn) | Col;
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolLocation &L) {

View File

@ -30,22 +30,23 @@ struct SymbolLocation {
// Position is encoded into 32 bits to save space.
// If Line/Column overflow, the value will be their maximum value.
struct Position {
Position() : Line(0), Column(0) {}
Position() : LineColumnPacked(0) {}
void setLine(uint32_t Line);
uint32_t line() const { return Line; }
uint32_t line() const { return LineColumnPacked >> ColumnBits; }
void setColumn(uint32_t Column);
uint32_t column() const { return Column; }
uint32_t column() const { return LineColumnPacked & MaxColumn; }
uint32_t rep() const { return LineColumnPacked; }
bool hasOverflow() const {
return Line >= MaxLine || Column >= MaxColumn;
return line() == MaxLine || column() == MaxColumn;
}
static constexpr uint32_t MaxLine = (1 << 20) - 1;
static constexpr uint32_t MaxColumn = (1 << 12) - 1;
static constexpr unsigned ColumnBits = 12;
static constexpr uint32_t MaxLine = (1 << (32 - ColumnBits)) - 1;
static constexpr uint32_t MaxColumn = (1 << ColumnBits) - 1;
private:
uint32_t Line : 20; // 0-based
uint32_t Column : 12; // 0-based
uint32_t LineColumnPacked; // Top 20 bit line, bottom 12 bits column.
};
/// The symbol range, using half-open range [Start, End).