forked from OSchip/llvm-project
[clangd] clangd-indexer gathers refs and stores them in index files.
Reviewers: ioeric Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D52531 llvm-svn: 343778
This commit is contained in:
parent
2ec5a10db3
commit
cc21779c3c
|
@ -13,10 +13,11 @@ public:
|
|||
IndexAction(std::shared_ptr<SymbolCollector> C,
|
||||
std::unique_ptr<CanonicalIncludes> Includes,
|
||||
const index::IndexingOptions &Opts,
|
||||
std::function<void(SymbolSlab)> &SymbolsCallback)
|
||||
std::function<void(SymbolSlab)> SymbolsCallback,
|
||||
std::function<void(RefSlab)> RefsCallback)
|
||||
: WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)),
|
||||
SymbolsCallback(SymbolsCallback), Collector(C),
|
||||
Includes(std::move(Includes)),
|
||||
SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback),
|
||||
Collector(C), Includes(std::move(Includes)),
|
||||
PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
|
||||
|
||||
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
|
||||
|
@ -41,10 +42,13 @@ public:
|
|||
return;
|
||||
}
|
||||
SymbolsCallback(Collector->takeSymbols());
|
||||
if (RefsCallback != nullptr)
|
||||
RefsCallback(Collector->takeRefs());
|
||||
}
|
||||
|
||||
private:
|
||||
std::function<void(SymbolSlab)> SymbolsCallback;
|
||||
std::function<void(RefSlab)> RefsCallback;
|
||||
std::shared_ptr<SymbolCollector> Collector;
|
||||
std::unique_ptr<CanonicalIncludes> Includes;
|
||||
std::unique_ptr<CommentHandler> PragmaHandler;
|
||||
|
@ -54,20 +58,23 @@ private:
|
|||
|
||||
std::unique_ptr<FrontendAction>
|
||||
createStaticIndexingAction(SymbolCollector::Options Opts,
|
||||
std::function<void(SymbolSlab)> SymbolsCallback) {
|
||||
std::function<void(SymbolSlab)> SymbolsCallback,
|
||||
std::function<void(RefSlab)> RefsCallback) {
|
||||
index::IndexingOptions IndexOpts;
|
||||
IndexOpts.SystemSymbolFilter =
|
||||
index::IndexingOptions::SystemSymbolFilterKind::All;
|
||||
Opts.CollectIncludePath = true;
|
||||
Opts.CountReferences = true;
|
||||
Opts.Origin = SymbolOrigin::Static;
|
||||
if (RefsCallback != nullptr)
|
||||
Opts.RefFilter = RefKind::All;
|
||||
auto Includes = llvm::make_unique<CanonicalIncludes>();
|
||||
addSystemHeadersMapping(Includes.get());
|
||||
Opts.Includes = Includes.get();
|
||||
return llvm::make_unique<IndexAction>(
|
||||
std::make_shared<SymbolCollector>(std::move(Opts)), std::move(Includes),
|
||||
IndexOpts, SymbolsCallback);
|
||||
}
|
||||
IndexOpts, SymbolsCallback, RefsCallback);
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
|
@ -21,10 +21,13 @@ namespace clangd {
|
|||
// Only a subset of SymbolCollector::Options are respected:
|
||||
// - include paths are always collected, and canonicalized appropriately
|
||||
// - references are always counted
|
||||
// - main-file refs are collected (if RefsCallback is non-null)
|
||||
// - the symbol origin is always Static
|
||||
// FIXME: refs from headers should also be collected.
|
||||
std::unique_ptr<FrontendAction>
|
||||
createStaticIndexingAction(SymbolCollector::Options Opts,
|
||||
std::function<void(SymbolSlab)> SymbolsCallback);
|
||||
std::function<void(SymbolSlab)> SymbolsCallback,
|
||||
std::function<void(RefSlab)> RefsCallback);
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
|
@ -298,17 +298,47 @@ Symbol readSymbol(Reader &Data, ArrayRef<StringRef> Strings) {
|
|||
return Sym;
|
||||
}
|
||||
|
||||
// REFS ENCODING
|
||||
// A refs section has data grouped by Symbol. Each symbol has:
|
||||
// - SymbolID: 20 bytes
|
||||
// - NumRefs: varint
|
||||
// - Ref[NumRefs]
|
||||
// Fields of Ref are encoded in turn, see implementation.
|
||||
|
||||
void writeRefs(const SymbolID &ID, ArrayRef<Ref> Refs,
|
||||
const StringTableOut &Strings, raw_ostream &OS) {
|
||||
OS << ID.raw();
|
||||
writeVar(Refs.size(), OS);
|
||||
for (const auto &Ref : Refs) {
|
||||
OS.write(static_cast<unsigned char>(Ref.Kind));
|
||||
writeLocation(Ref.Location, Strings, OS);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<SymbolID, std::vector<Ref>> readRefs(Reader &Data,
|
||||
ArrayRef<StringRef> Strings) {
|
||||
std::pair<SymbolID, std::vector<Ref>> Result;
|
||||
Result.first = Data.consumeID();
|
||||
Result.second.resize(Data.consumeVar());
|
||||
for (auto &Ref : Result.second) {
|
||||
Ref.Kind = static_cast<RefKind>(Data.consume8());
|
||||
Ref.Location = readLocation(Data, Strings);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
// FILE ENCODING
|
||||
// A file is a RIFF chunk with type 'CdIx'.
|
||||
// It contains the sections:
|
||||
// - meta: version number
|
||||
// - stri: string table
|
||||
// - symb: symbols
|
||||
// - refs: references to symbols
|
||||
|
||||
// The current versioning scheme is simple - non-current versions are rejected.
|
||||
// If you make a breaking change, bump this version number to invalidate stored
|
||||
// data. Later we may want to support some backward compatibility.
|
||||
constexpr static uint32_t Version = 4;
|
||||
constexpr static uint32_t Version = 5;
|
||||
|
||||
Expected<IndexFileIn> readRIFF(StringRef Data) {
|
||||
auto RIFF = riff::readFile(Data);
|
||||
|
@ -342,6 +372,18 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
|
|||
return makeError("malformed or truncated symbol");
|
||||
Result.Symbols = std::move(Symbols).build();
|
||||
}
|
||||
if (Chunks.count("refs")) {
|
||||
Reader RefsReader(Chunks.lookup("refs"));
|
||||
RefSlab::Builder Refs;
|
||||
while (!RefsReader.eof()) {
|
||||
auto RefsBundle = readRefs(RefsReader, Strings->Strings);
|
||||
for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
|
||||
Refs.insert(RefsBundle.first, Ref);
|
||||
}
|
||||
if (RefsReader.err())
|
||||
return makeError("malformed or truncated refs");
|
||||
Result.Refs = std::move(Refs).build();
|
||||
}
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
|
@ -363,6 +405,14 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
|
|||
Symbols.emplace_back(Sym);
|
||||
visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
|
||||
}
|
||||
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
|
||||
if (Data.Refs) {
|
||||
for (const auto &Sym : *Data.Refs) {
|
||||
Refs.emplace_back(Sym);
|
||||
for (auto &Ref : Refs.back().second)
|
||||
Strings.intern(Ref.Location.FileURI);
|
||||
}
|
||||
}
|
||||
|
||||
std::string StringSection;
|
||||
{
|
||||
|
@ -379,6 +429,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
|
|||
}
|
||||
RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
|
||||
|
||||
std::string RefsSection;
|
||||
if (Data.Refs) {
|
||||
{
|
||||
raw_string_ostream RefsOS(RefsSection);
|
||||
for (const auto &Sym : Refs)
|
||||
writeRefs(Sym.first, Sym.second, Strings, RefsOS);
|
||||
}
|
||||
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
|
||||
}
|
||||
|
||||
OS << RIFF;
|
||||
}
|
||||
|
||||
|
@ -428,6 +488,8 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
|
|||
if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
|
||||
if (I->Symbols)
|
||||
Symbols = std::move(*I->Symbols);
|
||||
if (I->Refs)
|
||||
Refs = std::move(*I->Refs);
|
||||
} else {
|
||||
llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
|
||||
return nullptr;
|
||||
|
|
|
@ -38,26 +38,29 @@ enum class IndexFileFormat {
|
|||
// Holds the contents of an index file that was read.
|
||||
struct IndexFileIn {
|
||||
llvm::Optional<SymbolSlab> Symbols;
|
||||
llvm::Optional<RefSlab> Refs;
|
||||
};
|
||||
// Parse an index file. The input must be a RIFF container chunk.
|
||||
// Parse an index file. The input must be a RIFF or YAML file.
|
||||
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
|
||||
|
||||
// Specifies the contents of an index file to be written.
|
||||
struct IndexFileOut {
|
||||
const SymbolSlab *Symbols;
|
||||
// TODO: Support serializing symbol occurrences.
|
||||
const SymbolSlab *Symbols = nullptr;
|
||||
const RefSlab *Refs = nullptr;
|
||||
// TODO: Support serializing Dex posting lists.
|
||||
IndexFileFormat Format = IndexFileFormat::RIFF;
|
||||
|
||||
IndexFileOut() = default;
|
||||
IndexFileOut(const IndexFileIn &I)
|
||||
: Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {}
|
||||
: Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr),
|
||||
Refs(I.Refs ? I.Refs.getPointer() : nullptr) {}
|
||||
};
|
||||
// Serializes an index file.
|
||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
|
||||
|
||||
// Convert a single symbol to YAML, a nice debug representation.
|
||||
std::string toYAML(const Symbol &);
|
||||
std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &);
|
||||
|
||||
// Build an in-memory static index from an index file.
|
||||
// The size should be relatively small, so data can be managed in memory.
|
||||
|
|
|
@ -6,6 +6,12 @@
|
|||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// A YAML index file is a sequence of tagged entries.
|
||||
// Each entry either encodes a Symbol or the list of references to a symbol
|
||||
// (a "ref bundle").
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Index.h"
|
||||
#include "Serialization.h"
|
||||
|
@ -20,10 +26,22 @@
|
|||
#include <cstdint>
|
||||
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref)
|
||||
|
||||
namespace {
|
||||
using RefBundle =
|
||||
std::pair<clang::clangd::SymbolID, std::vector<clang::clangd::Ref>>;
|
||||
// This is a pale imitation of std::variant<Symbol, RefBundle>
|
||||
struct VariantEntry {
|
||||
llvm::Optional<clang::clangd::Symbol> Symbol;
|
||||
llvm::Optional<RefBundle> Refs;
|
||||
};
|
||||
} // namespace
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
using clang::clangd::Ref;
|
||||
using clang::clangd::RefKind;
|
||||
using clang::clangd::Symbol;
|
||||
using clang::clangd::SymbolID;
|
||||
using clang::clangd::SymbolLocation;
|
||||
|
@ -179,6 +197,46 @@ template <> struct ScalarEnumerationTraits<SymbolKind> {
|
|||
}
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<RefBundle> {
|
||||
static void mapping(IO &IO, RefBundle &Refs) {
|
||||
MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO,
|
||||
Refs.first);
|
||||
IO.mapRequired("ID", NSymbolID->HexString);
|
||||
IO.mapRequired("References", Refs.second);
|
||||
}
|
||||
};
|
||||
|
||||
struct NormalizedRefKind {
|
||||
NormalizedRefKind(IO &) {}
|
||||
NormalizedRefKind(IO &, RefKind O) { Kind = static_cast<uint8_t>(O); }
|
||||
|
||||
RefKind denormalize(IO &) { return static_cast<RefKind>(Kind); }
|
||||
|
||||
uint8_t Kind = 0;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<Ref> {
|
||||
static void mapping(IO &IO, Ref &R) {
|
||||
MappingNormalization<NormalizedRefKind, RefKind> NKind(IO, R.Kind);
|
||||
IO.mapRequired("Kind", NKind->Kind);
|
||||
IO.mapRequired("Location", R.Location);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<VariantEntry> {
|
||||
static void mapping(IO &IO, VariantEntry &Variant) {
|
||||
if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) {
|
||||
if (!IO.outputting())
|
||||
Variant.Symbol.emplace();
|
||||
MappingTraits<Symbol>::mapping(IO, *Variant.Symbol);
|
||||
} else if (IO.mapTag("!Refs", Variant.Refs.hasValue())) {
|
||||
if (!IO.outputting())
|
||||
Variant.Refs.emplace();
|
||||
MappingTraits<RefBundle>::mapping(IO, *Variant.Refs);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace yaml
|
||||
} // namespace llvm
|
||||
|
||||
|
@ -187,23 +245,38 @@ namespace clangd {
|
|||
|
||||
void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
|
||||
llvm::yaml::Output Yout(OS);
|
||||
for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
|
||||
Yout << Sym;
|
||||
for (const auto &Sym : *O.Symbols) {
|
||||
VariantEntry Entry;
|
||||
Entry.Symbol = Sym;
|
||||
Yout << Entry;
|
||||
}
|
||||
if (O.Refs)
|
||||
for (auto &Sym : *O.Refs) {
|
||||
VariantEntry Entry;
|
||||
Entry.Refs = Sym;
|
||||
Yout << Entry;
|
||||
}
|
||||
}
|
||||
|
||||
Expected<IndexFileIn> readYAML(StringRef Data) {
|
||||
SymbolSlab::Builder Symbols;
|
||||
RefSlab::Builder Refs;
|
||||
llvm::yaml::Input Yin(Data);
|
||||
do {
|
||||
Symbol S;
|
||||
Yin >> S;
|
||||
VariantEntry Variant;
|
||||
Yin >> Variant;
|
||||
if (Yin.error())
|
||||
return llvm::errorCodeToError(Yin.error());
|
||||
Symbols.insert(S);
|
||||
if (Variant.Symbol)
|
||||
Symbols.insert(*Variant.Symbol);
|
||||
if (Variant.Refs)
|
||||
for (const auto &Ref : Variant.Refs->second)
|
||||
Refs.insert(Variant.Refs->first, Ref);
|
||||
} while (Yin.nextDocument());
|
||||
|
||||
IndexFileIn Result;
|
||||
Result.Symbols.emplace(std::move(Symbols).build());
|
||||
Result.Refs.emplace(std::move(Refs).build());
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
|
@ -218,5 +291,16 @@ std::string toYAML(const Symbol &S) {
|
|||
return Buf;
|
||||
}
|
||||
|
||||
std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &Data) {
|
||||
RefBundle Refs = {Data.first, Data.second};
|
||||
std::string Buf;
|
||||
{
|
||||
llvm::raw_string_ostream OS(Buf);
|
||||
llvm::yaml::Output Yout(OS);
|
||||
Yout << Refs;
|
||||
}
|
||||
return Buf;
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
|
@ -67,18 +67,30 @@ public:
|
|||
else
|
||||
Symbols.insert(Sym);
|
||||
}
|
||||
},
|
||||
[&](RefSlab S) {
|
||||
std::lock_guard<std::mutex> Lock(SymbolsMu);
|
||||
for (const auto &Sym : S) {
|
||||
// No need to merge as currently all Refs are from main file.
|
||||
for (const auto &Ref : Sym.second)
|
||||
Refs.insert(Sym.first, Ref);
|
||||
}
|
||||
})
|
||||
.release();
|
||||
}
|
||||
|
||||
// Awkward: we write the result in the destructor, because the executor
|
||||
// takes ownership so it's the easiest way to get our data back out.
|
||||
~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); }
|
||||
~IndexActionFactory() {
|
||||
Result.Symbols = std::move(Symbols).build();
|
||||
Result.Refs = std::move(Refs).build();
|
||||
}
|
||||
|
||||
private:
|
||||
IndexFileIn &Result;
|
||||
std::mutex SymbolsMu;
|
||||
SymbolSlab::Builder Symbols;
|
||||
RefSlab::Builder Refs;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using testing::_;
|
||||
using testing::AllOf;
|
||||
using testing::Pair;
|
||||
using testing::UnorderedElementsAre;
|
||||
using testing::UnorderedElementsAreArray;
|
||||
namespace clang {
|
||||
|
@ -21,6 +24,7 @@ namespace {
|
|||
|
||||
const char *YAML = R"(
|
||||
---
|
||||
!Symbol
|
||||
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
|
||||
Name: 'Foo1'
|
||||
Scope: 'clang::'
|
||||
|
@ -46,6 +50,7 @@ IncludeHeaders:
|
|||
References: 3
|
||||
...
|
||||
---
|
||||
!Symbol
|
||||
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
|
||||
Name: 'Foo2'
|
||||
Scope: 'clang::'
|
||||
|
@ -64,6 +69,18 @@ Flags: 2
|
|||
Signature: '-sig'
|
||||
CompletionSnippetSuffix: '-snippet'
|
||||
...
|
||||
!Refs
|
||||
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
|
||||
References:
|
||||
- Kind: 4
|
||||
Location:
|
||||
FileURI: file:///path/foo.cc
|
||||
Start:
|
||||
Line: 5
|
||||
Column: 3
|
||||
End:
|
||||
Line: 5
|
||||
Column: 8
|
||||
)";
|
||||
|
||||
MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
|
||||
|
@ -107,6 +124,16 @@ TEST(SerializationTest, YAMLConversions) {
|
|||
EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
|
||||
EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
|
||||
EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
|
||||
|
||||
ASSERT_TRUE(bool(ParsedYAML->Refs));
|
||||
EXPECT_THAT(*ParsedYAML->Refs,
|
||||
UnorderedElementsAre(
|
||||
Pair(cantFail(SymbolID::fromStr(
|
||||
"057557CEBF6E6B2DD437FBF60CC58F352D1DF856")),
|
||||
testing::SizeIs(1))));
|
||||
auto Ref1 = ParsedYAML->Refs->begin()->second.front();
|
||||
EXPECT_EQ(Ref1.Kind, RefKind::Reference);
|
||||
EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");
|
||||
}
|
||||
|
||||
std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
|
||||
|
@ -115,24 +142,37 @@ std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
|
|||
Result.push_back(toYAML(Sym));
|
||||
return Result;
|
||||
}
|
||||
std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
|
||||
std::vector<std::string> Result;
|
||||
for (const auto &Sym : Slab)
|
||||
Result.push_back(toYAML(Sym));
|
||||
return Result;
|
||||
}
|
||||
|
||||
TEST(SerializationTest, BinaryConversions) {
|
||||
auto In = readIndexFile(YAML);
|
||||
EXPECT_TRUE(bool(In)) << In.takeError();
|
||||
|
||||
// Write to binary format, and parse again.
|
||||
IndexFileOut Out;
|
||||
Out.Symbols = In->Symbols.getPointer();
|
||||
IndexFileOut Out(*In);
|
||||
Out.Format = IndexFileFormat::RIFF;
|
||||
std::string Serialized = llvm::to_string(Out);
|
||||
{
|
||||
std::error_code EC;
|
||||
llvm::raw_fd_ostream F("/tmp/foo", EC);
|
||||
F << Serialized;
|
||||
}
|
||||
|
||||
auto In2 = readIndexFile(Serialized);
|
||||
ASSERT_TRUE(bool(In2)) << In.takeError();
|
||||
ASSERT_TRUE(In->Symbols);
|
||||
ASSERT_TRUE(In2->Symbols);
|
||||
ASSERT_TRUE(In2->Refs);
|
||||
|
||||
// Assert the YAML serializations match, for nice comparisons and diffs.
|
||||
EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
|
||||
UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
|
||||
EXPECT_THAT(YAMLFromRefs(*In2->Refs),
|
||||
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in New Issue