llvm-project/clang-tools-extra/clangd/index/Serialization.cpp

745 lines
24 KiB
C++

//===-- Serialization.cpp - Binary serialization of index data ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Serialization.h"
#include "Headers.h"
#include "RIFF.h"
#include "SymbolLocation.h"
#include "SymbolOrigin.h"
#include "dex/Dex.h"
#include "support/Logger.h"
#include "support/Trace.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <vector>
namespace clang {
namespace clangd {
namespace {
// IO PRIMITIVES
// We use little-endian 32 bit ints, sometimes with variable-length encoding.
//
// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
// to encode the number, and the top bit to indicate whether more bytes follow.
// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
// This represents 0x1a | 0x2f<<7 = 6042.
// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
// Reads binary data from a StringRef, and keeps track of position.
class Reader {
const char *Begin, *End;
bool Err = false;
public:
Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
// The "error" bit is set by reading past EOF or reading invalid data.
// When in an error state, reads may return zero values: callers should check.
bool err() const { return Err; }
// Did we read all the data, or encounter an error?
bool eof() const { return Begin == End || Err; }
// All the data we didn't read yet.
llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
uint8_t consume8() {
if (LLVM_UNLIKELY(Begin == End)) {
Err = true;
return 0;
}
return *Begin++;
}
uint32_t consume32() {
if (LLVM_UNLIKELY(Begin + 4 > End)) {
Err = true;
return 0;
}
auto Ret = llvm::support::endian::read32le(Begin);
Begin += 4;
return Ret;
}
llvm::StringRef consume(int N) {
if (LLVM_UNLIKELY(Begin + N > End)) {
Err = true;
return llvm::StringRef();
}
llvm::StringRef Ret(Begin, N);
Begin += N;
return Ret;
}
uint32_t consumeVar() {
constexpr static uint8_t More = 1 << 7;
// Use a 32 bit unsigned here to prevent promotion to signed int (unless int
// is wider than 32 bits).
uint32_t B = consume8();
if (LLVM_LIKELY(!(B & More)))
return B;
uint32_t Val = B & ~More;
for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
B = consume8();
// 5th byte of a varint can only have lowest 4 bits set.
assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
Val |= (B & ~More) << Shift;
}
return Val;
}
llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
auto StringIndex = consumeVar();
if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
Err = true;
return llvm::StringRef();
}
return Strings[StringIndex];
}
SymbolID consumeID() {
llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
}
// Read a varint (as consumeVar) and resize the container accordingly.
// If the size is invalid, return false and mark an error.
// (The caller should abort in this case).
template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) {
auto Size = consumeVar();
// Conservatively assume each element is at least one byte.
if (Size > (size_t)(End - Begin)) {
Err = true;
return false;
}
Container.resize(Size);
return true;
}
};
void write32(uint32_t I, llvm::raw_ostream &OS) {
char Buf[4];
llvm::support::endian::write32le(Buf, I);
OS.write(Buf, sizeof(Buf));
}
void writeVar(uint32_t I, llvm::raw_ostream &OS) {
constexpr static uint8_t More = 1 << 7;
if (LLVM_LIKELY(I < 1 << 7)) {
OS.write(I);
return;
}
for (;;) {
OS.write(I | More);
I >>= 7;
if (I < 1 << 7) {
OS.write(I);
return;
}
}
}
// STRING TABLE ENCODING
// Index data has many string fields, and many strings are identical.
// We store each string once, and refer to them by index.
//
// The string table's format is:
// - UncompressedSize : uint32 (or 0 for no compression)
// - CompressedData : byte[CompressedSize]
//
// CompressedData is a zlib-compressed byte[UncompressedSize].
// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
// These are sorted to improve compression.
// Maps each string to a canonical representation.
// Strings remain owned externally (e.g. by SymbolSlab).
class StringTableOut {
llvm::DenseSet<llvm::StringRef> Unique;
std::vector<llvm::StringRef> Sorted;
// Since strings are interned, look up can be by pointer.
llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
public:
StringTableOut() {
// Ensure there's at least one string in the table.
// Table size zero is reserved to indicate no compression.
Unique.insert("");
}
// Add a string to the table. Overwrites S if an identical string exists.
void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
// Finalize the table and write it to OS. No more strings may be added.
void finalize(llvm::raw_ostream &OS) {
Sorted = {Unique.begin(), Unique.end()};
llvm::sort(Sorted);
for (unsigned I = 0; I < Sorted.size(); ++I)
Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
std::string RawTable;
for (llvm::StringRef S : Sorted) {
RawTable.append(std::string(S));
RawTable.push_back(0);
}
if (llvm::zlib::isAvailable()) {
llvm::SmallString<1> Compressed;
llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
write32(RawTable.size(), OS);
OS << Compressed;
} else {
write32(0, OS); // No compression.
OS << RawTable;
}
}
// Get the ID of an string, which must be interned. Table must be finalized.
unsigned index(llvm::StringRef S) const {
assert(!Sorted.empty() && "table not finalized");
assert(Index.count({S.data(), S.size()}) && "string not interned");
return Index.find({S.data(), S.size()})->second;
}
};
struct StringTableIn {
llvm::BumpPtrAllocator Arena;
std::vector<llvm::StringRef> Strings;
};
llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
Reader R(Data);
size_t UncompressedSize = R.consume32();
if (R.err())
return error("Truncated string table");
llvm::StringRef Uncompressed;
llvm::SmallString<1> UncompressedStorage;
if (UncompressedSize == 0) // No compression
Uncompressed = R.rest();
else if (llvm::zlib::isAvailable()) {
// Don't allocate a massive buffer if UncompressedSize was corrupted
// This is effective for sharded index, but not big monolithic ones, as
// once compressed size reaches 4MB nothing can be ruled out.
// Theoretical max ratio from https://zlib.net/zlib_tech.html
constexpr int MaxCompressionRatio = 1032;
if (UncompressedSize / MaxCompressionRatio > R.rest().size())
return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
R.rest().size(), UncompressedSize);
if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
UncompressedSize))
return std::move(E);
Uncompressed = UncompressedStorage;
} else
return error("Compressed string table, but zlib is unavailable");
StringTableIn Table;
llvm::StringSaver Saver(Table.Arena);
R = Reader(Uncompressed);
for (Reader R(Uncompressed); !R.eof();) {
auto Len = R.rest().find(0);
if (Len == llvm::StringRef::npos)
return error("Bad string table: not null terminated");
Table.Strings.push_back(Saver.save(R.consume(Len)));
R.consume8();
}
if (R.err())
return error("Truncated string table");
return std::move(Table);
}
// SYMBOL ENCODING
// Each field of clangd::Symbol is encoded in turn (see implementation).
// - StringRef fields encode as varint (index into the string table)
// - enums encode as the underlying type
// - most numbers encode as varint
void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
llvm::raw_ostream &OS) {
writeVar(Strings.index(Loc.FileURI), OS);
for (const auto &Endpoint : {Loc.Start, Loc.End}) {
writeVar(Endpoint.line(), OS);
writeVar(Endpoint.column(), OS);
}
}
SymbolLocation readLocation(Reader &Data,
llvm::ArrayRef<llvm::StringRef> Strings) {
SymbolLocation Loc;
Loc.FileURI = Data.consumeString(Strings).data();
for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
Endpoint->setLine(Data.consumeVar());
Endpoint->setColumn(Data.consumeVar());
}
return Loc;
}
IncludeGraphNode readIncludeGraphNode(Reader &Data,
llvm::ArrayRef<llvm::StringRef> Strings) {
IncludeGraphNode IGN;
IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
IGN.URI = Data.consumeString(Strings);
llvm::StringRef Digest = Data.consume(IGN.Digest.size());
std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
if (!Data.consumeSize(IGN.DirectIncludes))
return IGN;
for (llvm::StringRef &Include : IGN.DirectIncludes)
Include = Data.consumeString(Strings);
return IGN;
}
void writeIncludeGraphNode(const IncludeGraphNode &IGN,
const StringTableOut &Strings,
llvm::raw_ostream &OS) {
OS.write(static_cast<uint8_t>(IGN.Flags));
writeVar(Strings.index(IGN.URI), OS);
llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
IGN.Digest.size());
OS << Hash;
writeVar(IGN.DirectIncludes.size(), OS);
for (llvm::StringRef Include : IGN.DirectIncludes)
writeVar(Strings.index(Include), OS);
}
void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
llvm::raw_ostream &OS) {
OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
// symbol IDs should probably be in a string table.
OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
writeVar(Strings.index(Sym.Name), OS);
writeVar(Strings.index(Sym.Scope), OS);
writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
writeLocation(Sym.Definition, Strings, OS);
writeLocation(Sym.CanonicalDeclaration, Strings, OS);
writeVar(Sym.References, OS);
OS.write(static_cast<uint8_t>(Sym.Flags));
OS.write(static_cast<uint8_t>(Sym.Origin));
writeVar(Strings.index(Sym.Signature), OS);
writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
writeVar(Strings.index(Sym.Documentation), OS);
writeVar(Strings.index(Sym.ReturnType), OS);
writeVar(Strings.index(Sym.Type), OS);
auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
writeVar(Strings.index(Include.IncludeHeader), OS);
writeVar(Include.References, OS);
};
writeVar(Sym.IncludeHeaders.size(), OS);
for (const auto &Include : Sym.IncludeHeaders)
WriteInclude(Include);
}
Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
Symbol Sym;
Sym.ID = Data.consumeID();
Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
Sym.Name = Data.consumeString(Strings);
Sym.Scope = Data.consumeString(Strings);
Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
Sym.Definition = readLocation(Data, Strings);
Sym.CanonicalDeclaration = readLocation(Data, Strings);
Sym.References = Data.consumeVar();
Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
Sym.Signature = Data.consumeString(Strings);
Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
Sym.Documentation = Data.consumeString(Strings);
Sym.ReturnType = Data.consumeString(Strings);
Sym.Type = Data.consumeString(Strings);
if (!Data.consumeSize(Sym.IncludeHeaders))
return Sym;
for (auto &I : Sym.IncludeHeaders) {
I.IncludeHeader = Data.consumeString(Strings);
I.References = Data.consumeVar();
}
return Sym;
}
// REFS ENCODING
// A refs section has data grouped by Symbol. Each symbol has:
// - SymbolID: 8 bytes
// - NumRefs: varint
// - Ref[NumRefs]
// Fields of Ref are encoded in turn, see implementation.
void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
const StringTableOut &Strings, llvm::raw_ostream &OS) {
OS << ID.raw();
writeVar(Refs.size(), OS);
for (const auto &Ref : Refs) {
OS.write(static_cast<unsigned char>(Ref.Kind));
writeLocation(Ref.Location, Strings, OS);
OS << Ref.Container.raw();
}
}
std::pair<SymbolID, std::vector<Ref>>
readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
std::pair<SymbolID, std::vector<Ref>> Result;
Result.first = Data.consumeID();
if (!Data.consumeSize(Result.second))
return Result;
for (auto &Ref : Result.second) {
Ref.Kind = static_cast<RefKind>(Data.consume8());
Ref.Location = readLocation(Data, Strings);
Ref.Container = Data.consumeID();
}
return Result;
}
// RELATIONS ENCODING
// A relations section is a flat list of relations. Each relation has:
// - SymbolID (subject): 8 bytes
// - relation kind (predicate): 1 byte
// - SymbolID (object): 8 bytes
// In the future, we might prefer a packed representation if the need arises.
void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
OS << R.Subject.raw();
OS.write(static_cast<uint8_t>(R.Predicate));
OS << R.Object.raw();
}
Relation readRelation(Reader &Data) {
SymbolID Subject = Data.consumeID();
RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
SymbolID Object = Data.consumeID();
return {Subject, Predicate, Object};
}
struct InternedCompileCommand {
llvm::StringRef Directory;
std::vector<llvm::StringRef> CommandLine;
};
void writeCompileCommand(const InternedCompileCommand &Cmd,
const StringTableOut &Strings,
llvm::raw_ostream &CmdOS) {
writeVar(Strings.index(Cmd.Directory), CmdOS);
writeVar(Cmd.CommandLine.size(), CmdOS);
for (llvm::StringRef C : Cmd.CommandLine)
writeVar(Strings.index(C), CmdOS);
}
InternedCompileCommand
readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
InternedCompileCommand Cmd;
Cmd.Directory = CmdReader.consumeString(Strings);
if (!CmdReader.consumeSize(Cmd.CommandLine))
return Cmd;
for (llvm::StringRef &C : Cmd.CommandLine)
C = CmdReader.consumeString(Strings);
return Cmd;
}
// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
// - meta: version number
// - srcs: information related to include graph
// - stri: string table
// - symb: symbols
// - refs: references to symbols
// The current versioning scheme is simple - non-current versions are rejected.
// If you make a breaking change, bump this version number to invalidate stored
// data. Later we may want to support some backward compatibility.
constexpr static uint32_t Version = 16;
llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
auto RIFF = riff::readFile(Data);
if (!RIFF)
return RIFF.takeError();
if (RIFF->Type != riff::fourCC("CdIx"))
return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
llvm::StringMap<llvm::StringRef> Chunks;
for (const auto &Chunk : RIFF->Chunks)
Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
Chunk.Data);
if (!Chunks.count("meta"))
return error("missing meta chunk");
Reader Meta(Chunks.lookup("meta"));
auto SeenVersion = Meta.consume32();
if (SeenVersion != Version)
return error("wrong version: want {0}, got {1}", Version, SeenVersion);
// meta chunk is checked above, as we prefer the "version mismatch" error.
for (llvm::StringRef RequiredChunk : {"stri"})
if (!Chunks.count(RequiredChunk))
return error("missing required chunk {0}", RequiredChunk);
auto Strings = readStringTable(Chunks.lookup("stri"));
if (!Strings)
return Strings.takeError();
IndexFileIn Result;
if (Chunks.count("srcs")) {
Reader SrcsReader(Chunks.lookup("srcs"));
Result.Sources.emplace();
while (!SrcsReader.eof()) {
auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
auto Entry = Result.Sources->try_emplace(IGN.URI).first;
Entry->getValue() = std::move(IGN);
// We change all the strings inside the structure to point at the keys in
// the map, since it is the only copy of the string that's going to live.
Entry->getValue().URI = Entry->getKey();
for (auto &Include : Entry->getValue().DirectIncludes)
Include = Result.Sources->try_emplace(Include).first->getKey();
}
if (SrcsReader.err())
return error("malformed or truncated include uri");
}
if (Chunks.count("symb")) {
Reader SymbolReader(Chunks.lookup("symb"));
SymbolSlab::Builder Symbols;
while (!SymbolReader.eof())
Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
if (SymbolReader.err())
return error("malformed or truncated symbol");
Result.Symbols = std::move(Symbols).build();
}
if (Chunks.count("refs")) {
Reader RefsReader(Chunks.lookup("refs"));
RefSlab::Builder Refs;
while (!RefsReader.eof()) {
auto RefsBundle = readRefs(RefsReader, Strings->Strings);
for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
Refs.insert(RefsBundle.first, Ref);
}
if (RefsReader.err())
return error("malformed or truncated refs");
Result.Refs = std::move(Refs).build();
}
if (Chunks.count("rela")) {
Reader RelationsReader(Chunks.lookup("rela"));
RelationSlab::Builder Relations;
while (!RelationsReader.eof())
Relations.insert(readRelation(RelationsReader));
if (RelationsReader.err())
return error("malformed or truncated relations");
Result.Relations = std::move(Relations).build();
}
if (Chunks.count("cmdl")) {
Reader CmdReader(Chunks.lookup("cmdl"));
InternedCompileCommand Cmd =
readCompileCommand(CmdReader, Strings->Strings);
if (CmdReader.err())
return error("malformed or truncated commandline section");
Result.Cmd.emplace();
Result.Cmd->Directory = std::string(Cmd.Directory);
Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
for (llvm::StringRef C : Cmd.CommandLine)
Result.Cmd->CommandLine.emplace_back(C);
}
return std::move(Result);
}
template <class Callback>
void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
CB(IGN.URI);
for (llvm::StringRef &Include : IGN.DirectIncludes)
CB(Include);
}
void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
RIFF.Type = riff::fourCC("CdIx");
llvm::SmallString<4> Meta;
{
llvm::raw_svector_ostream MetaOS(Meta);
write32(Version, MetaOS);
}
RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
StringTableOut Strings;
std::vector<Symbol> Symbols;
for (const auto &Sym : *Data.Symbols) {
Symbols.emplace_back(Sym);
visitStrings(Symbols.back(),
[&](llvm::StringRef &S) { Strings.intern(S); });
}
std::vector<IncludeGraphNode> Sources;
if (Data.Sources)
for (const auto &Source : *Data.Sources) {
Sources.push_back(Source.getValue());
visitStrings(Sources.back(),
[&](llvm::StringRef &S) { Strings.intern(S); });
}
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
Refs.emplace_back(Sym);
for (auto &Ref : Refs.back().second) {
llvm::StringRef File = Ref.Location.FileURI;
Strings.intern(File);
Ref.Location.FileURI = File.data();
}
}
}
std::vector<Relation> Relations;
if (Data.Relations) {
for (const auto &Relation : *Data.Relations) {
Relations.emplace_back(Relation);
// No strings to be interned in relations.
}
}
InternedCompileCommand InternedCmd;
if (Data.Cmd) {
InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
InternedCmd.Directory = Data.Cmd->Directory;
Strings.intern(InternedCmd.Directory);
for (llvm::StringRef C : Data.Cmd->CommandLine) {
InternedCmd.CommandLine.emplace_back(C);
Strings.intern(InternedCmd.CommandLine.back());
}
}
std::string StringSection;
{
llvm::raw_string_ostream StringOS(StringSection);
Strings.finalize(StringOS);
}
RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
std::string SymbolSection;
{
llvm::raw_string_ostream SymbolOS(SymbolSection);
for (const auto &Sym : Symbols)
writeSymbol(Sym, Strings, SymbolOS);
}
RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
std::string RefsSection;
if (Data.Refs) {
{
llvm::raw_string_ostream RefsOS(RefsSection);
for (const auto &Sym : Refs)
writeRefs(Sym.first, Sym.second, Strings, RefsOS);
}
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
std::string RelationSection;
if (Data.Relations) {
{
llvm::raw_string_ostream RelationOS{RelationSection};
for (const auto &Relation : Relations)
writeRelation(Relation, RelationOS);
}
RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
}
std::string SrcsSection;
{
{
llvm::raw_string_ostream SrcsOS(SrcsSection);
for (const auto &SF : Sources)
writeIncludeGraphNode(SF, Strings, SrcsOS);
}
RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
}
std::string CmdlSection;
if (Data.Cmd) {
{
llvm::raw_string_ostream CmdOS(CmdlSection);
writeCompileCommand(InternedCmd, Strings, CmdOS);
}
RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
}
OS << RIFF;
}
} // namespace
// Defined in YAMLSerialization.cpp.
void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
switch (O.Format) {
case IndexFileFormat::RIFF:
writeRIFF(O, OS);
break;
case IndexFileFormat::YAML:
writeYAML(O, OS);
break;
}
return OS;
}
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
if (Data.startswith("RIFF")) {
return readRIFF(Data);
} else if (auto YAMLContents = readYAML(Data)) {
return std::move(*YAMLContents);
} else {
return error("Not a RIFF file and failed to parse as YAML: {0}",
YAMLContents.takeError());
}
}
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
bool UseDex) {
trace::Span OverallTracer("LoadIndex");
auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
if (!Buffer) {
elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
return nullptr;
}
SymbolSlab Symbols;
RefSlab Refs;
RelationSlab Relations;
{
trace::Span Tracer("ParseIndex");
if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
if (I->Symbols)
Symbols = std::move(*I->Symbols);
if (I->Refs)
Refs = std::move(*I->Refs);
if (I->Relations)
Relations = std::move(*I->Relations);
} else {
elog("Bad index file: {0}", I.takeError());
return nullptr;
}
}
size_t NumSym = Symbols.size();
size_t NumRefs = Refs.numRefs();
size_t NumRelations = Relations.size();
trace::Span Tracer("BuildIndex");
auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
std::move(Relations))
: MemIndex::build(std::move(Symbols), std::move(Refs),
std::move(Relations));
vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
" - number of symbols: {3}\n"
" - number of refs: {4}\n"
" - number of relations: {5}",
UseDex ? "Dex" : "MemIndex", SymbolFilename,
Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
return Index;
}
} // namespace clangd
} // namespace clang