[clangd] clangd-indexer gathers refs and stores them in index files.

Reviewers: ioeric

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52531

llvm-svn: 343778
This commit is contained in:
Sam McCall 2018-10-04 14:09:55 +00:00
parent 2ec5a10db3
commit cc21779c3c
7 changed files with 232 additions and 21 deletions

View File

@ -13,10 +13,11 @@ public:
IndexAction(std::shared_ptr<SymbolCollector> C,
std::unique_ptr<CanonicalIncludes> Includes,
const index::IndexingOptions &Opts,
std::function<void(SymbolSlab)> &SymbolsCallback)
std::function<void(SymbolSlab)> SymbolsCallback,
std::function<void(RefSlab)> RefsCallback)
: WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)),
SymbolsCallback(SymbolsCallback), Collector(C),
Includes(std::move(Includes)),
SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback),
Collector(C), Includes(std::move(Includes)),
PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
@ -41,10 +42,13 @@ public:
return;
}
SymbolsCallback(Collector->takeSymbols());
if (RefsCallback != nullptr)
RefsCallback(Collector->takeRefs());
}
private:
std::function<void(SymbolSlab)> SymbolsCallback;
std::function<void(RefSlab)> RefsCallback;
std::shared_ptr<SymbolCollector> Collector;
std::unique_ptr<CanonicalIncludes> Includes;
std::unique_ptr<CommentHandler> PragmaHandler;
@ -54,20 +58,23 @@ private:
std::unique_ptr<FrontendAction>
createStaticIndexingAction(SymbolCollector::Options Opts,
std::function<void(SymbolSlab)> SymbolsCallback) {
std::function<void(SymbolSlab)> SymbolsCallback,
std::function<void(RefSlab)> RefsCallback) {
index::IndexingOptions IndexOpts;
IndexOpts.SystemSymbolFilter =
index::IndexingOptions::SystemSymbolFilterKind::All;
Opts.CollectIncludePath = true;
Opts.CountReferences = true;
Opts.Origin = SymbolOrigin::Static;
if (RefsCallback != nullptr)
Opts.RefFilter = RefKind::All;
auto Includes = llvm::make_unique<CanonicalIncludes>();
addSystemHeadersMapping(Includes.get());
Opts.Includes = Includes.get();
return llvm::make_unique<IndexAction>(
std::make_shared<SymbolCollector>(std::move(Opts)), std::move(Includes),
IndexOpts, SymbolsCallback);
}
IndexOpts, SymbolsCallback, RefsCallback);
};
} // namespace clangd
} // namespace clang

View File

@ -21,10 +21,13 @@ namespace clangd {
// Only a subset of SymbolCollector::Options are respected:
// - include paths are always collected, and canonicalized appropriately
// - references are always counted
// - main-file refs are collected (if RefsCallback is non-null)
// - the symbol origin is always Static
// FIXME: refs from headers should also be collected.
std::unique_ptr<FrontendAction>
createStaticIndexingAction(SymbolCollector::Options Opts,
std::function<void(SymbolSlab)> SymbolsCallback);
std::function<void(SymbolSlab)> SymbolsCallback,
std::function<void(RefSlab)> RefsCallback);
} // namespace clangd
} // namespace clang

View File

@ -298,17 +298,47 @@ Symbol readSymbol(Reader &Data, ArrayRef<StringRef> Strings) {
return Sym;
}
// REFS ENCODING
// A refs section has data grouped by Symbol. Each symbol has:
// - SymbolID: 20 bytes
// - NumRefs: varint
// - Ref[NumRefs]
// Fields of Ref are encoded in turn, see implementation.
void writeRefs(const SymbolID &ID, ArrayRef<Ref> Refs,
const StringTableOut &Strings, raw_ostream &OS) {
OS << ID.raw();
writeVar(Refs.size(), OS);
for (const auto &Ref : Refs) {
OS.write(static_cast<unsigned char>(Ref.Kind));
writeLocation(Ref.Location, Strings, OS);
}
}
std::pair<SymbolID, std::vector<Ref>> readRefs(Reader &Data,
ArrayRef<StringRef> Strings) {
std::pair<SymbolID, std::vector<Ref>> Result;
Result.first = Data.consumeID();
Result.second.resize(Data.consumeVar());
for (auto &Ref : Result.second) {
Ref.Kind = static_cast<RefKind>(Data.consume8());
Ref.Location = readLocation(Data, Strings);
}
return Result;
}
// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
// - meta: version number
// - stri: string table
// - symb: symbols
// - refs: references to symbols
// The current versioning scheme is simple - non-current versions are rejected.
// If you make a breaking change, bump this version number to invalidate stored
// data. Later we may want to support some backward compatibility.
constexpr static uint32_t Version = 4;
constexpr static uint32_t Version = 5;
Expected<IndexFileIn> readRIFF(StringRef Data) {
auto RIFF = riff::readFile(Data);
@ -342,6 +372,18 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
return makeError("malformed or truncated symbol");
Result.Symbols = std::move(Symbols).build();
}
if (Chunks.count("refs")) {
Reader RefsReader(Chunks.lookup("refs"));
RefSlab::Builder Refs;
while (!RefsReader.eof()) {
auto RefsBundle = readRefs(RefsReader, Strings->Strings);
for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
Refs.insert(RefsBundle.first, Ref);
}
if (RefsReader.err())
return makeError("malformed or truncated refs");
Result.Refs = std::move(Refs).build();
}
return std::move(Result);
}
@ -363,6 +405,14 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
Symbols.emplace_back(Sym);
visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
}
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
Refs.emplace_back(Sym);
for (auto &Ref : Refs.back().second)
Strings.intern(Ref.Location.FileURI);
}
}
std::string StringSection;
{
@ -379,6 +429,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
}
RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
std::string RefsSection;
if (Data.Refs) {
{
raw_string_ostream RefsOS(RefsSection);
for (const auto &Sym : Refs)
writeRefs(Sym.first, Sym.second, Strings, RefsOS);
}
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
OS << RIFF;
}
@ -428,6 +488,8 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
if (I->Symbols)
Symbols = std::move(*I->Symbols);
if (I->Refs)
Refs = std::move(*I->Refs);
} else {
llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
return nullptr;

View File

@ -38,26 +38,29 @@ enum class IndexFileFormat {
// Holds the contents of an index file that was read.
struct IndexFileIn {
llvm::Optional<SymbolSlab> Symbols;
llvm::Optional<RefSlab> Refs;
};
// Parse an index file. The input must be a RIFF container chunk.
// Parse an index file. The input must be a RIFF or YAML file.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
// Specifies the contents of an index file to be written.
struct IndexFileOut {
const SymbolSlab *Symbols;
// TODO: Support serializing symbol occurrences.
const SymbolSlab *Symbols = nullptr;
const RefSlab *Refs = nullptr;
// TODO: Support serializing Dex posting lists.
IndexFileFormat Format = IndexFileFormat::RIFF;
IndexFileOut() = default;
IndexFileOut(const IndexFileIn &I)
: Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {}
: Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr),
Refs(I.Refs ? I.Refs.getPointer() : nullptr) {}
};
// Serializes an index file.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
// Convert a single symbol to YAML, a nice debug representation.
std::string toYAML(const Symbol &);
std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &);
// Build an in-memory static index from an index file.
// The size should be relatively small, so data can be managed in memory.

View File

@ -6,6 +6,12 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// A YAML index file is a sequence of tagged entries.
// Each entry either encodes a Symbol or the list of references to a symbol
// (a "ref bundle").
//
//===----------------------------------------------------------------------===//
#include "Index.h"
#include "Serialization.h"
@ -20,10 +26,22 @@
#include <cstdint>
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref)
namespace {
using RefBundle =
std::pair<clang::clangd::SymbolID, std::vector<clang::clangd::Ref>>;
// This is a pale imitation of std::variant<Symbol, RefBundle>
struct VariantEntry {
llvm::Optional<clang::clangd::Symbol> Symbol;
llvm::Optional<RefBundle> Refs;
};
} // namespace
namespace llvm {
namespace yaml {
using clang::clangd::Ref;
using clang::clangd::RefKind;
using clang::clangd::Symbol;
using clang::clangd::SymbolID;
using clang::clangd::SymbolLocation;
@ -179,6 +197,46 @@ template <> struct ScalarEnumerationTraits<SymbolKind> {
}
};
template <> struct MappingTraits<RefBundle> {
static void mapping(IO &IO, RefBundle &Refs) {
MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO,
Refs.first);
IO.mapRequired("ID", NSymbolID->HexString);
IO.mapRequired("References", Refs.second);
}
};
struct NormalizedRefKind {
NormalizedRefKind(IO &) {}
NormalizedRefKind(IO &, RefKind O) { Kind = static_cast<uint8_t>(O); }
RefKind denormalize(IO &) { return static_cast<RefKind>(Kind); }
uint8_t Kind = 0;
};
template <> struct MappingTraits<Ref> {
static void mapping(IO &IO, Ref &R) {
MappingNormalization<NormalizedRefKind, RefKind> NKind(IO, R.Kind);
IO.mapRequired("Kind", NKind->Kind);
IO.mapRequired("Location", R.Location);
}
};
template <> struct MappingTraits<VariantEntry> {
static void mapping(IO &IO, VariantEntry &Variant) {
if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) {
if (!IO.outputting())
Variant.Symbol.emplace();
MappingTraits<Symbol>::mapping(IO, *Variant.Symbol);
} else if (IO.mapTag("!Refs", Variant.Refs.hasValue())) {
if (!IO.outputting())
Variant.Refs.emplace();
MappingTraits<RefBundle>::mapping(IO, *Variant.Refs);
}
}
};
} // namespace yaml
} // namespace llvm
@ -187,23 +245,38 @@ namespace clangd {
void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
llvm::yaml::Output Yout(OS);
for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
Yout << Sym;
for (const auto &Sym : *O.Symbols) {
VariantEntry Entry;
Entry.Symbol = Sym;
Yout << Entry;
}
if (O.Refs)
for (auto &Sym : *O.Refs) {
VariantEntry Entry;
Entry.Refs = Sym;
Yout << Entry;
}
}
Expected<IndexFileIn> readYAML(StringRef Data) {
SymbolSlab::Builder Symbols;
RefSlab::Builder Refs;
llvm::yaml::Input Yin(Data);
do {
Symbol S;
Yin >> S;
VariantEntry Variant;
Yin >> Variant;
if (Yin.error())
return llvm::errorCodeToError(Yin.error());
Symbols.insert(S);
if (Variant.Symbol)
Symbols.insert(*Variant.Symbol);
if (Variant.Refs)
for (const auto &Ref : Variant.Refs->second)
Refs.insert(Variant.Refs->first, Ref);
} while (Yin.nextDocument());
IndexFileIn Result;
Result.Symbols.emplace(std::move(Symbols).build());
Result.Refs.emplace(std::move(Refs).build());
return std::move(Result);
}
@ -218,5 +291,16 @@ std::string toYAML(const Symbol &S) {
return Buf;
}
std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &Data) {
RefBundle Refs = {Data.first, Data.second};
std::string Buf;
{
llvm::raw_string_ostream OS(Buf);
llvm::yaml::Output Yout(OS);
Yout << Refs;
}
return Buf;
}
} // namespace clangd
} // namespace clang

View File

@ -67,18 +67,30 @@ public:
else
Symbols.insert(Sym);
}
},
[&](RefSlab S) {
std::lock_guard<std::mutex> Lock(SymbolsMu);
for (const auto &Sym : S) {
// No need to merge as currently all Refs are from main file.
for (const auto &Ref : Sym.second)
Refs.insert(Sym.first, Ref);
}
})
.release();
}
// Awkward: we write the result in the destructor, because the executor
// takes ownership so it's the easiest way to get our data back out.
~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); }
~IndexActionFactory() {
Result.Symbols = std::move(Symbols).build();
Result.Refs = std::move(Refs).build();
}
private:
IndexFileIn &Result;
std::mutex SymbolsMu;
SymbolSlab::Builder Symbols;
RefSlab::Builder Refs;
};
} // namespace

View File

@ -13,6 +13,9 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using testing::_;
using testing::AllOf;
using testing::Pair;
using testing::UnorderedElementsAre;
using testing::UnorderedElementsAreArray;
namespace clang {
@ -21,6 +24,7 @@ namespace {
const char *YAML = R"(
---
!Symbol
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
Name: 'Foo1'
Scope: 'clang::'
@ -46,6 +50,7 @@ IncludeHeaders:
References: 3
...
---
!Symbol
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
Name: 'Foo2'
Scope: 'clang::'
@ -64,6 +69,18 @@ Flags: 2
Signature: '-sig'
CompletionSnippetSuffix: '-snippet'
...
!Refs
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
References:
- Kind: 4
Location:
FileURI: file:///path/foo.cc
Start:
Line: 5
Column: 3
End:
Line: 5
Column: 8
)";
MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
@ -107,6 +124,16 @@ TEST(SerializationTest, YAMLConversions) {
EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
ASSERT_TRUE(bool(ParsedYAML->Refs));
EXPECT_THAT(*ParsedYAML->Refs,
UnorderedElementsAre(
Pair(cantFail(SymbolID::fromStr(
"057557CEBF6E6B2DD437FBF60CC58F352D1DF856")),
testing::SizeIs(1))));
auto Ref1 = ParsedYAML->Refs->begin()->second.front();
EXPECT_EQ(Ref1.Kind, RefKind::Reference);
EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");
}
std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
@ -115,24 +142,37 @@ std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
Result.push_back(toYAML(Sym));
return Result;
}
std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
std::vector<std::string> Result;
for (const auto &Sym : Slab)
Result.push_back(toYAML(Sym));
return Result;
}
TEST(SerializationTest, BinaryConversions) {
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
// Write to binary format, and parse again.
IndexFileOut Out;
Out.Symbols = In->Symbols.getPointer();
IndexFileOut Out(*In);
Out.Format = IndexFileFormat::RIFF;
std::string Serialized = llvm::to_string(Out);
{
std::error_code EC;
llvm::raw_fd_ostream F("/tmp/foo", EC);
F << Serialized;
}
auto In2 = readIndexFile(Serialized);
ASSERT_TRUE(bool(In2)) << In.takeError();
ASSERT_TRUE(In->Symbols);
ASSERT_TRUE(In2->Symbols);
ASSERT_TRUE(In2->Refs);
// Assert the YAML serializations match, for nice comparisons and diffs.
EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
EXPECT_THAT(YAMLFromRefs(*In2->Refs),
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
}
} // namespace