[clangd] Deduplicate Refs on the fly.

Summary:
Currently, we only do deduplication when we flush final results. We may
have huge duplications (refs from headers) during the indexing period (running
clangd-indexer on Chromium).

With this change, clangd-indexer can index the whole chromium projects
(48 threads, 40 GB peak memory usage).

Reviewers: kadircet

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, mgrang, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D59092

llvm-svn: 355676
This commit is contained in:
Haojian Wu 2019-03-08 09:26:30 +00:00
parent 8e16d73346
commit f334231176
3 changed files with 11 additions and 10 deletions

View File

@ -33,9 +33,12 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) {
void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
auto &M = Refs[ID];
M.push_back(S);
M.back().Location.FileURI =
UniqueStrings.save(M.back().Location.FileURI).data();
if (M.count(S))
return;
Ref R = S;
R.Location.FileURI =
UniqueStrings.save(R.Location.FileURI).data();
M.insert(std::move(R));
}
RefSlab RefSlab::Builder::build() && {
@ -45,11 +48,7 @@ RefSlab RefSlab::Builder::build() && {
Result.reserve(Refs.size());
size_t NumRefs = 0;
for (auto &Sym : Refs) {
auto &SymRefs = Sym.second;
llvm::sort(SymRefs);
// FIXME: do we really need to dedup?
SymRefs.erase(std::unique(SymRefs.begin(), SymRefs.end()), SymRefs.end());
std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
NumRefs += SymRefs.size();
Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
}

View File

@ -16,6 +16,7 @@
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <set>
#include <utility>
namespace clang {
@ -67,6 +68,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Ref &);
/// Filenames are deduplicated.
class RefSlab {
public:
// Refs are stored in order.
using value_type = std::pair<SymbolID, llvm::ArrayRef<Ref>>;
using const_iterator = std::vector<value_type>::const_iterator;
using iterator = const_iterator;
@ -99,7 +101,7 @@ public:
private:
llvm::BumpPtrAllocator Arena;
llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
llvm::DenseMap<SymbolID, std::vector<Ref>> Refs;
llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
};
private:

View File

@ -56,7 +56,7 @@ public:
[&](RefSlab S) {
std::lock_guard<std::mutex> Lock(SymbolsMu);
for (const auto &Sym : S) {
// No need to merge as currently all Refs are from main file.
// Deduplication happens during insertion.
for (const auto &Ref : Sym.second)
Refs.insert(Sym.first, Ref);
}