[clangd] Merge binary + YAML serialization behind a (mostly) common interface.

Summary:
Interface is in one file, implementation in two as they have little in common.
A couple of ad-hoc YAML functions left exposed:
 - symbol -> YAML I expect to keep for tools like dexp
 - YAML -> symbol is used for the MR-style indexer, I think we can eliminate
   this (merge-on-the-fly, else use a different serialization)

Reviewers: kbobyrev

Subscribers: mgorny, ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52453

llvm-svn: 342999
This commit is contained in:
Sam McCall 2018-09-25 18:06:43 +00:00
parent 891d7504bb
commit 02d600d267
11 changed files with 177 additions and 178 deletions

View File

@ -44,7 +44,7 @@ add_clang_library(clangDaemon
index/Merge.cpp
index/Serialization.cpp
index/SymbolCollector.cpp
index/SymbolYAML.cpp
index/YAMLSerialization.cpp
index/dex/Dex.cpp
index/dex/Iterator.cpp

View File

@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
#include "../index/SymbolYAML.h"
#include "../index/Serialization.h"
#include "../index/dex/Dex.h"
#include "benchmark/benchmark.h"
#include "llvm/ADT/SmallVector.h"

View File

@ -9,6 +9,8 @@
#include "Serialization.h"
#include "Index.h"
#include "RIFF.h"
#include "Trace.h"
#include "dex/Dex.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@ -294,8 +296,6 @@ Symbol readSymbol(Reader &Data, ArrayRef<StringRef> Strings) {
return Sym;
}
} // namespace
// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
@ -308,7 +308,7 @@ Symbol readSymbol(Reader &Data, ArrayRef<StringRef> Strings) {
// data. Later we may want to support some backward compatibility.
constexpr static uint32_t Version = 4;
Expected<IndexFileIn> readIndexFile(StringRef Data) {
Expected<IndexFileIn> readRIFF(StringRef Data) {
auto RIFF = riff::readFile(Data);
if (!RIFF)
return RIFF.takeError();
@ -343,7 +343,7 @@ Expected<IndexFileIn> readIndexFile(StringRef Data) {
return std::move(Result);
}
raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) {
void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
RIFF.Type = riff::fourCC("CdIx");
@ -377,7 +377,64 @@ raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) {
}
RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
return OS << RIFF;
OS << RIFF;
}
} // namespace
// Defined in YAMLSerialization.cpp.
void writeYAML(const IndexFileOut &, raw_ostream &);
Expected<IndexFileIn> readYAML(StringRef);
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
switch (O.Format) {
case IndexFileFormat::RIFF:
writeYAML(O, OS);
break;
case IndexFileFormat::YAML:
writeRIFF(O, OS);
break;
}
return OS;
}
Expected<IndexFileIn> readIndexFile(StringRef Data) {
if (Data.startswith("RIFF")) {
return readRIFF(Data);
} else if (auto YAMLContents = readYAML(Data)) {
return std::move(*YAMLContents);
} else {
return makeError("Not a RIFF file and failed to parse as YAML: " +
llvm::toString(YAMLContents.takeError()));
}
}
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
llvm::ArrayRef<std::string> URISchemes,
bool UseDex) {
trace::Span OverallTracer("LoadIndex");
auto Buffer = MemoryBuffer::getFile(SymbolFilename);
if (!Buffer) {
llvm::errs() << "Can't open " << SymbolFilename << "\n";
return nullptr;
}
SymbolSlab Symbols;
RefSlab Refs;
{
trace::Span Tracer("ParseIndex");
if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
if (I->Symbols)
Symbols = std::move(*I->Symbols);
} else {
llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
return nullptr;
}
}
trace::Span Tracer("BuildIndex");
return UseDex ? dex::Dex::build(std::move(Symbols), URISchemes)
: MemIndex::build(std::move(Symbols), std::move(Refs));
}
} // namespace clangd

View File

@ -7,14 +7,18 @@
//
//===----------------------------------------------------------------------===//
//
// This file provides a compact binary serialization of indexed symbols.
// This file provides serialization of indexed symbols and other data.
//
// It writes two sections:
// It writes sections:
// - metadata such as version info
// - a string table (which is compressed)
// - lists of encoded symbols
//
// The format has a simple versioning scheme: the version is embedded in the
// data and non-current versions are rejected when reading.
// The format has a simple versioning scheme: the format version number is
// written in the file and non-current versions are rejected when reading.
//
// Human-readable YAML serialization is also supported, and recommended for
// debugging and experiments only.
//
//===----------------------------------------------------------------------===//
@ -23,25 +27,48 @@
#include "Index.h"
#include "llvm/Support/Error.h"
namespace llvm {
namespace yaml {
class Input;
}
} // namespace llvm
namespace clang {
namespace clangd {
enum class IndexFileFormat {
RIFF, // Versioned binary format, suitable for production use.
YAML, // Human-readable format, suitable for experiments and debugging.
};
// Specifies the contents of an index file to be written.
struct IndexFileOut {
const SymbolSlab *Symbols;
// TODO: Support serializing symbol occurrences.
// TODO: Support serializing Dex posting lists.
IndexFileFormat Format = IndexFileFormat::RIFF;
};
// Serializes an index file. (This is a RIFF container chunk).
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
// Holds the contents of an index file that was read.
struct IndexFileIn {
llvm::Optional<SymbolSlab> Symbols;
IndexFileFormat Format;
};
// Parse an index file. The input must be a RIFF container chunk.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
std::string toYAML(const Symbol &);
// Returned symbol is backed by the YAML input.
// FIXME: this is only needed for IndexerMain, find a better solution.
llvm::Expected<Symbol> symbolFromYAML(llvm::yaml::Input &);
// Build an in-memory static index from an index file.
// The size should be relatively small, so data can be managed in memory.
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef Filename,
llvm::ArrayRef<std::string> URISchemes,
bool UseDex = true);
} // namespace clangd
} // namespace clang

View File

@ -1,54 +0,0 @@
//===--- SymbolYAML.h --------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SymbolYAML provides facilities to convert Symbol to YAML, and vice versa.
// The YAML format of Symbol is designed for simplicity and experiment, but
// isn't a suitable/efficient store.
//
// This is for **experimental** only. Don't use it in the production code.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
#include "Index.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace clangd {
// Read symbols from a YAML-format string.
SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent);
// Read one symbol from a YAML-stream.
// The returned symbol is backed by Input.
Symbol SymbolFromYAML(llvm::yaml::Input &Input);
// Convert a single symbol to YAML-format string.
// The YAML result is safe to concatenate.
std::string SymbolToYAML(Symbol Sym);
// Convert symbols to a YAML-format string.
// The YAML result is safe to concatenate if you have multiple symbol slabs.
void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS);
// Build an in-memory static index for global symbols from a symbol file.
// The size of global symbols should be relatively small, so that all symbols
// can be managed in memory.
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
llvm::ArrayRef<std::string> URISchemes,
bool UseDex = true);
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H

View File

@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
#include "SymbolYAML.h"
#include "Index.h"
#include "Serialization.h"
#include "Trace.h"
@ -16,10 +15,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol)
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
namespace llvm {
@ -27,8 +26,8 @@ namespace yaml {
using clang::clangd::Symbol;
using clang::clangd::SymbolID;
using clang::clangd::SymbolOrigin;
using clang::clangd::SymbolLocation;
using clang::clangd::SymbolOrigin;
using clang::index::SymbolInfo;
using clang::index::SymbolKind;
using clang::index::SymbolLanguage;
@ -186,66 +185,46 @@ template <> struct ScalarEnumerationTraits<SymbolKind> {
namespace clang {
namespace clangd {
SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) {
llvm::yaml::Input Yin(YAMLContent);
std::vector<Symbol> S;
Yin >> S;
SymbolSlab::Builder Syms;
for (auto &Sym : S)
Syms.insert(Sym);
return std::move(Syms).build();
void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
llvm::yaml::Output Yout(OS);
for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
Yout << Sym;
}
Symbol SymbolFromYAML(llvm::yaml::Input &Input) {
Expected<IndexFileIn> readYAML(StringRef Data) {
SymbolSlab::Builder Symbols;
llvm::yaml::Input Yin(Data);
do {
Symbol S;
Yin >> S;
if (Yin.error())
return llvm::errorCodeToError(Yin.error());
Symbols.insert(S);
} while (Yin.nextDocument());
IndexFileIn Result;
Result.Symbols.emplace(std::move(Symbols).build());
return std::move(Result);
}
std::string toYAML(const Symbol &S) {
std::string Buf;
{
llvm::raw_string_ostream OS(Buf);
llvm::yaml::Output Yout(OS);
Symbol Sym = S; // copy: Yout<< requires mutability.
OS << Sym;
}
return Buf;
}
Expected<Symbol> symbolFromYAML(llvm::yaml::Input &Yin) {
Symbol S;
Input >> S;
Yin >> S;
if (Yin.error())
return llvm::errorCodeToError(Yin.error());
return S;
}
void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS) {
llvm::yaml::Output Yout(OS);
for (Symbol S : Symbols) // copy: Yout<< requires mutability.
Yout << S;
}
std::string SymbolToYAML(Symbol Sym) {
std::string Str;
llvm::raw_string_ostream OS(Str);
llvm::yaml::Output Yout(OS);
Yout << Sym;
return OS.str();
}
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
llvm::ArrayRef<std::string> URISchemes,
bool UseDex) {
trace::Span OverallTracer("LoadIndex");
auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
if (!Buffer) {
llvm::errs() << "Can't open " << SymbolFilename << "\n";
return nullptr;
}
StringRef Data = Buffer->get()->getBuffer();
llvm::Optional<SymbolSlab> Slab;
if (Data.startswith("RIFF")) { // Magic for binary index file.
trace::Span Tracer("ParseRIFF");
if (auto RIFF = readIndexFile(Data))
Slab = std::move(RIFF->Symbols);
else
llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n";
} else {
trace::Span Tracer("ParseYAML");
Slab = symbolsFromYAML(Data);
}
if (!Slab)
return nullptr;
trace::Span Tracer("BuildIndex");
return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes)
: MemIndex::build(std::move(*Slab), RefSlab());
}
} // namespace clangd
} // namespace clang

View File

@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "../../../index/SymbolYAML.h"
#include "../../Serialization.h"
#include "../Dex.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@ -155,7 +155,7 @@ class Lookup : public Command {
bool FoundSymbol = false;
Index->lookup(Request, [&](const Symbol &Sym) {
FoundSymbol = true;
llvm::outs() << SymbolToYAML(Sym);
llvm::outs() << toYAML(Sym);
});
if (!FoundSymbol)
llvm::outs() << "not found\n";

View File

@ -18,7 +18,6 @@
#include "index/Merge.h"
#include "index/Serialization.h"
#include "index/SymbolCollector.h"
#include "index/SymbolYAML.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Index/IndexDataConsumer.h"
@ -60,12 +59,13 @@ static llvm::cl::opt<bool> MergeOnTheFly(
"MapReduce."),
llvm::cl::init(true), llvm::cl::Hidden);
enum IndexFormat { YAML, Binary };
static llvm::cl::opt<IndexFormat> Format(
"format", llvm::cl::desc("Format of the index to be written"),
llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"),
clEnumValN(Binary, "binary", "binary RIFF format")),
llvm::cl::init(YAML));
static llvm::cl::opt<IndexFileFormat>
Format("format", llvm::cl::desc("Format of the index to be written"),
llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
"human-readable YAML format"),
clEnumValN(IndexFileFormat::RIFF, "binary",
"binary RIFF format")),
llvm::cl::init(IndexFileFormat::YAML));
/// Responsible for aggregating symbols from each processed file and producing
/// the final results. All methods in this class must be thread-safe,
@ -162,8 +162,7 @@ public:
void consumeSymbols(SymbolSlab Symbols) override {
for (const auto &Sym : Symbols)
Executor.getExecutionContext()->reportResult(Sym.ID.str(),
SymbolToYAML(Sym));
Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym));
}
SymbolSlab mergeResults() override {
@ -171,7 +170,7 @@ public:
Executor.getToolResults()->forEachResult(
[&](llvm::StringRef Key, llvm::StringRef Value) {
llvm::yaml::Input Yin(Value);
auto Sym = clang::clangd::SymbolFromYAML(Yin);
auto Sym = cantFail(clang::clangd::symbolFromYAML(Yin));
auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key));
if (const auto *Existing = UniqueSymbols.find(ID))
UniqueSymbols.insert(mergeSymbol(*Existing, Sym));
@ -270,15 +269,9 @@ int main(int argc, const char **argv) {
// Reduce phase: combine symbols with the same IDs.
auto UniqueSymbols = Consumer->mergeResults();
// Output phase: emit result symbols.
switch (clang::clangd::Format) {
case clang::clangd::IndexFormat::YAML:
SymbolsToYAML(UniqueSymbols, llvm::outs());
break;
case clang::clangd::IndexFormat::Binary: {
clang::clangd::IndexFileOut Out;
Out.Symbols = &UniqueSymbols;
llvm::outs() << Out;
}
}
clang::clangd::IndexFileOut Out;
Out.Symbols = &UniqueSymbols;
Out.Format = clang::clangd::Format;
llvm::outs() << Out;
return 0;
}

View File

@ -12,7 +12,7 @@
#include "Path.h"
#include "RIFF.h"
#include "Trace.h"
#include "index/SymbolYAML.h"
#include "index/Serialization.h"
#include "clang/Basic/Version.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"

View File

@ -9,18 +9,18 @@
#include "index/Index.h"
#include "index/Serialization.h"
#include "index/SymbolYAML.h"
#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using testing::AllOf;
using testing::UnorderedElementsAre;
using testing::UnorderedElementsAreArray;
namespace clang {
namespace clangd {
namespace {
const char *YAML1 = R"(
const char *YAML = R"(
---
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
Name: 'Foo1'
@ -46,9 +46,6 @@ IncludeHeaders:
- Header: 'include2'
References: 3
...
)";
const char *YAML2 = R"(
---
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
Name: 'Foo2'
@ -70,15 +67,29 @@ CompletionSnippetSuffix: '-snippet'
...
)";
MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
}
TEST(SerializationTest, YAMLConversions) {
auto Symbols1 = symbolsFromYAML(YAML1);
ASSERT_EQ(Symbols1.size(), 1u);
const auto &Sym1 = *Symbols1.begin();
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
auto ParsedYAML = readIndexFile(YAML);
ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
ASSERT_TRUE(bool(ParsedYAML->Symbols));
EXPECT_THAT(
*ParsedYAML->Symbols,
UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"),
ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
auto Sym1 = *ParsedYAML->Symbols->find(
cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856")));
auto Sym2 = *ParsedYAML->Symbols->find(
cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
EXPECT_THAT(Sym1, QName("clang::Foo1"));
EXPECT_EQ(Sym1.Signature, "");
EXPECT_EQ(Sym1.Documentation, "Foo doc");
@ -91,51 +102,38 @@ TEST(SerializationTest, YAMLConversions) {
UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
IncludeHeaderWithRef("include2", 3u)));
auto Symbols2 = symbolsFromYAML(YAML2);
ASSERT_EQ(Symbols2.size(), 1u);
const auto &Sym2 = *Symbols2.begin();
EXPECT_THAT(Sym2, QName("clang::Foo2"));
EXPECT_EQ(Sym2.Signature, "-sig");
EXPECT_EQ(Sym2.ReturnType, "");
EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
std::string ConcatenatedYAML;
{
llvm::raw_string_ostream OS(ConcatenatedYAML);
SymbolsToYAML(Symbols1, OS);
SymbolsToYAML(Symbols2, OS);
}
auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML);
EXPECT_THAT(ConcatenatedSymbols,
UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2")));
}
std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
std::vector<std::string> Result;
for (const auto &Sym : Slab)
Result.push_back(SymbolToYAML(Sym));
Result.push_back(toYAML(Sym));
return Result;
}
TEST(SerializationTest, BinaryConversions) {
// We reuse the test symbols from YAML.
auto Slab = symbolsFromYAML(std::string(YAML1) + YAML2);
ASSERT_EQ(Slab.size(), 2u);
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
// Write to binary format, and parse again.
IndexFileOut Out;
Out.Symbols = &Slab;
Out.Symbols = In->Symbols.getPointer();
Out.Format = IndexFileFormat::RIFF;
std::string Serialized = llvm::to_string(Out);
auto In = readIndexFile(Serialized);
ASSERT_TRUE(bool(In)) << In.takeError();
auto In2 = readIndexFile(Serialized);
ASSERT_TRUE(bool(In2)) << In.takeError();
ASSERT_TRUE(In->Symbols);
// Assert the YAML serializations match, for nice comparisons and diffs.
EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(Slab)));
EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
}
} // namespace

View File

@ -11,7 +11,6 @@
#include "TestFS.h"
#include "TestTU.h"
#include "index/SymbolCollector.h"
#include "index/SymbolYAML.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/VirtualFileSystem.h"