forked from OSchip/llvm-project
Moved Symbol into its own header and implementation file
Reviewers: ioeric Subscribers: mgorny, jkorous, arphaman, kadircet, jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D58774 llvm-svn: 355088
This commit is contained in:
parent
a073a18460
commit
08b49b5338
|
@ -63,6 +63,7 @@ add_clang_library(clangDaemon
|
||||||
index/MemIndex.cpp
|
index/MemIndex.cpp
|
||||||
index/Merge.cpp
|
index/Merge.cpp
|
||||||
index/Serialization.cpp
|
index/Serialization.cpp
|
||||||
|
index/Symbol.cpp
|
||||||
index/SymbolCollector.cpp
|
index/SymbolCollector.cpp
|
||||||
index/SymbolID.cpp
|
index/SymbolID.cpp
|
||||||
index/SymbolLocation.cpp
|
index/SymbolLocation.cpp
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "Trace.h"
|
#include "Trace.h"
|
||||||
#include "URI.h"
|
#include "URI.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "clang/AST/Decl.h"
|
#include "clang/AST/Decl.h"
|
||||||
#include "clang/AST/DeclBase.h"
|
#include "clang/AST/DeclBase.h"
|
||||||
#include "clang/Basic/LangOptions.h"
|
#include "clang/Basic/LangOptions.h"
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "Path.h"
|
#include "Path.h"
|
||||||
#include "Protocol.h"
|
#include "Protocol.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "index/SymbolOrigin.h"
|
#include "index/SymbolOrigin.h"
|
||||||
#include "clang/Frontend/PrecompiledPreamble.h"
|
#include "clang/Frontend/PrecompiledPreamble.h"
|
||||||
#include "clang/Sema/CodeCompleteConsumer.h"
|
#include "clang/Sema/CodeCompleteConsumer.h"
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
#include "Path.h"
|
#include "Path.h"
|
||||||
#include "Protocol.h"
|
#include "Protocol.h"
|
||||||
#include "SourceCode.h"
|
#include "SourceCode.h"
|
||||||
#include "index/Index.h"
|
#include "index/Symbol.h"
|
||||||
#include "clang/Format/Format.h"
|
#include "clang/Format/Format.h"
|
||||||
#include "clang/Lex/HeaderSearch.h"
|
#include "clang/Lex/HeaderSearch.h"
|
||||||
#include "clang/Lex/PPCallbacks.h"
|
#include "clang/Lex/PPCallbacks.h"
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "SourceCode.h"
|
#include "SourceCode.h"
|
||||||
#include "Trace.h"
|
#include "Trace.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "clang/AST/Decl.h"
|
#include "clang/AST/Decl.h"
|
||||||
#include "clang/AST/DeclBase.h"
|
#include "clang/AST/DeclBase.h"
|
||||||
#include "clang/AST/NestedNameSpecifier.h"
|
#include "clang/AST/NestedNameSpecifier.h"
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "Diagnostics.h"
|
#include "Diagnostics.h"
|
||||||
#include "Headers.h"
|
#include "Headers.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "clang/AST/Type.h"
|
#include "clang/AST/Type.h"
|
||||||
#include "clang/Basic/Diagnostic.h"
|
#include "clang/Basic/Diagnostic.h"
|
||||||
#include "clang/Basic/SourceLocation.h"
|
#include "clang/Basic/SourceLocation.h"
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "Compiler.h"
|
#include "Compiler.h"
|
||||||
#include "Logger.h"
|
#include "Logger.h"
|
||||||
#include "SourceCode.h"
|
#include "SourceCode.h"
|
||||||
|
#include "Symbol.h"
|
||||||
#include "Threading.h"
|
#include "Threading.h"
|
||||||
#include "Trace.h"
|
#include "Trace.h"
|
||||||
#include "URI.h"
|
#include "URI.h"
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "MemIndex.h"
|
#include "MemIndex.h"
|
||||||
#include "Merge.h"
|
#include "Merge.h"
|
||||||
#include "index/CanonicalIncludes.h"
|
#include "index/CanonicalIncludes.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "clang/Lex/Preprocessor.h"
|
#include "clang/Lex/Preprocessor.h"
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
|
|
@ -16,67 +16,6 @@
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace clangd {
|
namespace clangd {
|
||||||
|
|
||||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag F) {
|
|
||||||
if (F == Symbol::None)
|
|
||||||
return OS << "None";
|
|
||||||
std::string S;
|
|
||||||
if (F & Symbol::Deprecated)
|
|
||||||
S += "deprecated|";
|
|
||||||
if (F & Symbol::IndexedForCodeCompletion)
|
|
||||||
S += "completion|";
|
|
||||||
return OS << llvm::StringRef(S).rtrim('|');
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
|
|
||||||
return OS << S.Scope << S.Name;
|
|
||||||
}
|
|
||||||
|
|
||||||
float quality(const Symbol &S) {
|
|
||||||
// This avoids a sharp gradient for tail symbols, and also neatly avoids the
|
|
||||||
// question of whether 0 references means a bad symbol or missing data.
|
|
||||||
if (S.References < 3)
|
|
||||||
return 1;
|
|
||||||
return std::log(S.References);
|
|
||||||
}
|
|
||||||
|
|
||||||
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
|
|
||||||
auto It = std::lower_bound(
|
|
||||||
Symbols.begin(), Symbols.end(), ID,
|
|
||||||
[](const Symbol &S, const SymbolID &I) { return S.ID < I; });
|
|
||||||
if (It != Symbols.end() && It->ID == ID)
|
|
||||||
return It;
|
|
||||||
return Symbols.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the underlying data of the symbol into the owned arena.
|
|
||||||
static void own(Symbol &S, llvm::UniqueStringSaver &Strings) {
|
|
||||||
visitStrings(S, [&](llvm::StringRef &V) { V = Strings.save(V); });
|
|
||||||
}
|
|
||||||
|
|
||||||
void SymbolSlab::Builder::insert(const Symbol &S) {
|
|
||||||
auto R = SymbolIndex.try_emplace(S.ID, Symbols.size());
|
|
||||||
if (R.second) {
|
|
||||||
Symbols.push_back(S);
|
|
||||||
own(Symbols.back(), UniqueStrings);
|
|
||||||
} else {
|
|
||||||
auto &Copy = Symbols[R.first->second] = S;
|
|
||||||
own(Copy, UniqueStrings);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SymbolSlab SymbolSlab::Builder::build() && {
|
|
||||||
Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit.
|
|
||||||
// Sort symbols so the slab can binary search over them.
|
|
||||||
llvm::sort(Symbols,
|
|
||||||
[](const Symbol &L, const Symbol &R) { return L.ID < R.ID; });
|
|
||||||
// We may have unused strings from overwritten symbols. Build a new arena.
|
|
||||||
llvm::BumpPtrAllocator NewArena;
|
|
||||||
llvm::UniqueStringSaver Strings(NewArena);
|
|
||||||
for (auto &S : Symbols)
|
|
||||||
own(S, Strings);
|
|
||||||
return SymbolSlab(std::move(NewArena), std::move(Symbols));
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, RefKind K) {
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, RefKind K) {
|
||||||
if (K == RefKind::Unknown)
|
if (K == RefKind::Unknown)
|
||||||
return OS << "Unknown";
|
return OS << "Unknown";
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||||
|
|
||||||
#include "ExpectedTypes.h"
|
#include "ExpectedTypes.h"
|
||||||
|
#include "Symbol.h"
|
||||||
#include "SymbolID.h"
|
#include "SymbolID.h"
|
||||||
#include "SymbolLocation.h"
|
#include "SymbolLocation.h"
|
||||||
#include "SymbolOrigin.h"
|
|
||||||
#include "clang/Index/IndexSymbol.h"
|
#include "clang/Index/IndexSymbol.h"
|
||||||
#include "clang/Lex/Lexer.h"
|
#include "clang/Lex/Lexer.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
|
@ -22,7 +22,6 @@
|
||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/Support/JSON.h"
|
#include "llvm/Support/JSON.h"
|
||||||
#include "llvm/Support/StringSaver.h"
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
@ -32,207 +31,6 @@
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace clangd {
|
namespace clangd {
|
||||||
|
|
||||||
// The class presents a C++ symbol, e.g. class, function.
|
|
||||||
//
|
|
||||||
// WARNING: Symbols do not own much of their underlying data - typically strings
|
|
||||||
// are owned by a SymbolSlab. They should be treated as non-owning references.
|
|
||||||
// Copies are shallow.
|
|
||||||
// When adding new unowned data fields to Symbol, remember to update:
|
|
||||||
// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
|
|
||||||
// - mergeSymbol in Merge.cpp, to properly combine two Symbols.
|
|
||||||
//
|
|
||||||
// A fully documented symbol can be split as:
|
|
||||||
// size_type std::map<k, t>::count(const K& key) const
|
|
||||||
// | Return | Scope |Name| Signature |
|
|
||||||
// We split up these components to allow display flexibility later.
|
|
||||||
struct Symbol {
|
|
||||||
// The ID of the symbol.
|
|
||||||
SymbolID ID;
|
|
||||||
// The symbol information, like symbol kind.
|
|
||||||
index::SymbolInfo SymInfo;
|
|
||||||
// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
|
|
||||||
llvm::StringRef Name;
|
|
||||||
// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
|
|
||||||
llvm::StringRef Scope;
|
|
||||||
// The location of the symbol's definition, if one was found.
|
|
||||||
// This just covers the symbol name (e.g. without class/function body).
|
|
||||||
SymbolLocation Definition;
|
|
||||||
// The location of the preferred declaration of the symbol.
|
|
||||||
// This just covers the symbol name.
|
|
||||||
// This may be the same as Definition.
|
|
||||||
//
|
|
||||||
// A C++ symbol may have multiple declarations, and we pick one to prefer.
|
|
||||||
// * For classes, the canonical declaration should be the definition.
|
|
||||||
// * For non-inline functions, the canonical declaration typically appears
|
|
||||||
// in the ".h" file corresponding to the definition.
|
|
||||||
SymbolLocation CanonicalDeclaration;
|
|
||||||
// The number of translation units that reference this symbol from their main
|
|
||||||
// file. This number is only meaningful if aggregated in an index.
|
|
||||||
unsigned References = 0;
|
|
||||||
/// Where this symbol came from. Usually an index provides a constant value.
|
|
||||||
SymbolOrigin Origin = SymbolOrigin::Unknown;
|
|
||||||
/// A brief description of the symbol that can be appended in the completion
|
|
||||||
/// candidate list. For example, "(X x, Y y) const" is a function signature.
|
|
||||||
/// Only set when the symbol is indexed for completion.
|
|
||||||
llvm::StringRef Signature;
|
|
||||||
/// What to insert when completing this symbol, after the symbol name.
|
|
||||||
/// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
|
|
||||||
/// (When snippets are disabled, the symbol name alone is used).
|
|
||||||
/// Only set when the symbol is indexed for completion.
|
|
||||||
llvm::StringRef CompletionSnippetSuffix;
|
|
||||||
/// Documentation including comment for the symbol declaration.
|
|
||||||
llvm::StringRef Documentation;
|
|
||||||
/// Type when this symbol is used in an expression. (Short display form).
|
|
||||||
/// e.g. return type of a function, or type of a variable.
|
|
||||||
/// Only set when the symbol is indexed for completion.
|
|
||||||
llvm::StringRef ReturnType;
|
|
||||||
|
|
||||||
/// Raw representation of the OpaqueType of the symbol, used for scoring
|
|
||||||
/// purposes.
|
|
||||||
/// Only set when the symbol is indexed for completion.
|
|
||||||
llvm::StringRef Type;
|
|
||||||
|
|
||||||
struct IncludeHeaderWithReferences {
|
|
||||||
IncludeHeaderWithReferences() = default;
|
|
||||||
|
|
||||||
IncludeHeaderWithReferences(llvm::StringRef IncludeHeader,
|
|
||||||
unsigned References)
|
|
||||||
: IncludeHeader(IncludeHeader), References(References) {}
|
|
||||||
|
|
||||||
/// This can be either a URI of the header to be #include'd
|
|
||||||
/// for this symbol, or a literal header quoted with <> or "" that is
|
|
||||||
/// suitable to be included directly. When it is a URI, the exact #include
|
|
||||||
/// path needs to be calculated according to the URI scheme.
|
|
||||||
///
|
|
||||||
/// Note that the include header is a canonical include for the symbol and
|
|
||||||
/// can be different from FileURI in the CanonicalDeclaration.
|
|
||||||
llvm::StringRef IncludeHeader = "";
|
|
||||||
/// The number of translation units that reference this symbol and include
|
|
||||||
/// this header. This number is only meaningful if aggregated in an index.
|
|
||||||
unsigned References = 0;
|
|
||||||
};
|
|
||||||
/// One Symbol can potentially be incuded via different headers.
|
|
||||||
/// - If we haven't seen a definition, this covers all declarations.
|
|
||||||
/// - If we have seen a definition, this covers declarations visible from
|
|
||||||
/// any definition.
|
|
||||||
/// Only set when the symbol is indexed for completion.
|
|
||||||
llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders;
|
|
||||||
|
|
||||||
enum SymbolFlag : uint8_t {
|
|
||||||
None = 0,
|
|
||||||
/// Whether or not this symbol is meant to be used for the code completion.
|
|
||||||
/// See also isIndexedForCodeCompletion().
|
|
||||||
/// Note that we don't store completion information (signature, snippet,
|
|
||||||
/// type, inclues) if the symbol is not indexed for code completion.
|
|
||||||
IndexedForCodeCompletion = 1 << 0,
|
|
||||||
/// Indicates if the symbol is deprecated.
|
|
||||||
Deprecated = 1 << 1,
|
|
||||||
// Symbol is an implementation detail.
|
|
||||||
ImplementationDetail = 1 << 2,
|
|
||||||
// Symbol is visible to other files (not e.g. a static helper function).
|
|
||||||
VisibleOutsideFile = 1 << 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
SymbolFlag Flags = SymbolFlag::None;
|
|
||||||
/// FIXME: also add deprecation message and fixit?
|
|
||||||
};
|
|
||||||
inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, Symbol::SymbolFlag B) {
|
|
||||||
return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) |
|
|
||||||
static_cast<uint8_t>(B));
|
|
||||||
}
|
|
||||||
inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, Symbol::SymbolFlag B) {
|
|
||||||
return A = A | B;
|
|
||||||
}
|
|
||||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
|
|
||||||
raw_ostream &operator<<(raw_ostream &, Symbol::SymbolFlag);
|
|
||||||
|
|
||||||
// Invokes Callback with each StringRef& contained in the Symbol.
|
|
||||||
// Useful for deduplicating backing strings.
|
|
||||||
template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
|
|
||||||
CB(S.Name);
|
|
||||||
CB(S.Scope);
|
|
||||||
CB(S.Signature);
|
|
||||||
CB(S.CompletionSnippetSuffix);
|
|
||||||
CB(S.Documentation);
|
|
||||||
CB(S.ReturnType);
|
|
||||||
CB(S.Type);
|
|
||||||
auto RawCharPointerCB = [&CB](const char *&P) {
|
|
||||||
llvm::StringRef S(P);
|
|
||||||
CB(S);
|
|
||||||
assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
|
|
||||||
P = S.data();
|
|
||||||
};
|
|
||||||
RawCharPointerCB(S.CanonicalDeclaration.FileURI);
|
|
||||||
RawCharPointerCB(S.Definition.FileURI);
|
|
||||||
|
|
||||||
for (auto &Include : S.IncludeHeaders)
|
|
||||||
CB(Include.IncludeHeader);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Computes query-independent quality score for a Symbol.
|
|
||||||
// This currently falls in the range [1, ln(#indexed documents)].
|
|
||||||
// FIXME: this should probably be split into symbol -> signals
|
|
||||||
// and signals -> score, so it can be reused for Sema completions.
|
|
||||||
float quality(const Symbol &S);
|
|
||||||
|
|
||||||
// An immutable symbol container that stores a set of symbols.
|
|
||||||
// The container will maintain the lifetime of the symbols.
|
|
||||||
class SymbolSlab {
|
|
||||||
public:
|
|
||||||
using const_iterator = std::vector<Symbol>::const_iterator;
|
|
||||||
using iterator = const_iterator;
|
|
||||||
using value_type = Symbol;
|
|
||||||
|
|
||||||
SymbolSlab() = default;
|
|
||||||
|
|
||||||
const_iterator begin() const { return Symbols.begin(); }
|
|
||||||
const_iterator end() const { return Symbols.end(); }
|
|
||||||
const_iterator find(const SymbolID &SymID) const;
|
|
||||||
|
|
||||||
size_t size() const { return Symbols.size(); }
|
|
||||||
bool empty() const { return Symbols.empty(); }
|
|
||||||
// Estimates the total memory usage.
|
|
||||||
size_t bytes() const {
|
|
||||||
return sizeof(*this) + Arena.getTotalMemory() +
|
|
||||||
Symbols.capacity() * sizeof(Symbol);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
|
|
||||||
// The frozen SymbolSlab will use less memory.
|
|
||||||
class Builder {
|
|
||||||
public:
|
|
||||||
Builder() : UniqueStrings(Arena) {}
|
|
||||||
|
|
||||||
// Adds a symbol, overwriting any existing one with the same ID.
|
|
||||||
// This is a deep copy: underlying strings will be owned by the slab.
|
|
||||||
void insert(const Symbol &S);
|
|
||||||
|
|
||||||
// Returns the symbol with an ID, if it exists. Valid until next insert().
|
|
||||||
const Symbol *find(const SymbolID &ID) {
|
|
||||||
auto I = SymbolIndex.find(ID);
|
|
||||||
return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consumes the builder to finalize the slab.
|
|
||||||
SymbolSlab build() &&;
|
|
||||||
|
|
||||||
private:
|
|
||||||
llvm::BumpPtrAllocator Arena;
|
|
||||||
// Intern table for strings. Contents are on the arena.
|
|
||||||
llvm::UniqueStringSaver UniqueStrings;
|
|
||||||
std::vector<Symbol> Symbols;
|
|
||||||
// Values are indices into Symbols vector.
|
|
||||||
llvm::DenseMap<SymbolID, size_t> SymbolIndex;
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
|
||||||
SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
|
|
||||||
: Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
|
|
||||||
|
|
||||||
llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
|
|
||||||
std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Describes the kind of a cross-reference.
|
// Describes the kind of a cross-reference.
|
||||||
//
|
//
|
||||||
// This is a bitfield which can be combined from different kinds.
|
// This is a bitfield which can be combined from different kinds.
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "Merge.h"
|
#include "Merge.h"
|
||||||
#include "Logger.h"
|
#include "Logger.h"
|
||||||
#include "Trace.h"
|
#include "Trace.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "index/SymbolLocation.h"
|
#include "index/SymbolLocation.h"
|
||||||
#include "index/SymbolOrigin.h"
|
#include "index/SymbolOrigin.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
|
|
||||||
#include "Headers.h"
|
#include "Headers.h"
|
||||||
#include "Index.h"
|
#include "Index.h"
|
||||||
|
#include "index/Symbol.h"
|
||||||
#include "llvm/Support/Error.h"
|
#include "llvm/Support/Error.h"
|
||||||
|
|
||||||
namespace clang {
|
namespace clang {
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
//===--- Symbol.cpp ----------------------------------------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "Symbol.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace clangd {
|
||||||
|
|
||||||
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag F) {
|
||||||
|
if (F == Symbol::None)
|
||||||
|
return OS << "None";
|
||||||
|
std::string S;
|
||||||
|
if (F & Symbol::Deprecated)
|
||||||
|
S += "deprecated|";
|
||||||
|
if (F & Symbol::IndexedForCodeCompletion)
|
||||||
|
S += "completion|";
|
||||||
|
return OS << llvm::StringRef(S).rtrim('|');
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
|
||||||
|
return OS << S.Scope << S.Name;
|
||||||
|
}
|
||||||
|
|
||||||
|
float quality(const Symbol &S) {
|
||||||
|
// This avoids a sharp gradient for tail symbols, and also neatly avoids the
|
||||||
|
// question of whether 0 references means a bad symbol or missing data.
|
||||||
|
if (S.References < 3)
|
||||||
|
return 1;
|
||||||
|
return std::log(S.References);
|
||||||
|
}
|
||||||
|
|
||||||
|
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
|
||||||
|
auto It = std::lower_bound(
|
||||||
|
Symbols.begin(), Symbols.end(), ID,
|
||||||
|
[](const Symbol &S, const SymbolID &I) { return S.ID < I; });
|
||||||
|
if (It != Symbols.end() && It->ID == ID)
|
||||||
|
return It;
|
||||||
|
return Symbols.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the underlying data of the symbol into the owned arena.
|
||||||
|
static void own(Symbol &S, llvm::UniqueStringSaver &Strings) {
|
||||||
|
visitStrings(S, [&](llvm::StringRef &V) { V = Strings.save(V); });
|
||||||
|
}
|
||||||
|
|
||||||
|
void SymbolSlab::Builder::insert(const Symbol &S) {
|
||||||
|
auto R = SymbolIndex.try_emplace(S.ID, Symbols.size());
|
||||||
|
if (R.second) {
|
||||||
|
Symbols.push_back(S);
|
||||||
|
own(Symbols.back(), UniqueStrings);
|
||||||
|
} else {
|
||||||
|
auto &Copy = Symbols[R.first->second] = S;
|
||||||
|
own(Copy, UniqueStrings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SymbolSlab SymbolSlab::Builder::build() && {
|
||||||
|
Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit.
|
||||||
|
// Sort symbols so the slab can binary search over them.
|
||||||
|
llvm::sort(Symbols,
|
||||||
|
[](const Symbol &L, const Symbol &R) { return L.ID < R.ID; });
|
||||||
|
// We may have unused strings from overwritten symbols. Build a new arena.
|
||||||
|
llvm::BumpPtrAllocator NewArena;
|
||||||
|
llvm::UniqueStringSaver Strings(NewArena);
|
||||||
|
for (auto &S : Symbols)
|
||||||
|
own(S, Strings);
|
||||||
|
return SymbolSlab(std::move(NewArena), std::move(Symbols));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace clangd
|
||||||
|
} // namespace clang
|
|
@ -0,0 +1,231 @@
|
||||||
|
//===--- Symbol.h ------------------------------------------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
|
||||||
|
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
|
||||||
|
|
||||||
|
#include "SymbolID.h"
|
||||||
|
#include "SymbolLocation.h"
|
||||||
|
#include "SymbolOrigin.h"
|
||||||
|
#include "clang/Index/IndexSymbol.h"
|
||||||
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Support/StringSaver.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace clangd {
|
||||||
|
|
||||||
|
/// The class presents a C++ symbol, e.g. class, function.
|
||||||
|
///
|
||||||
|
/// WARNING: Symbols do not own much of their underlying data - typically
|
||||||
|
/// strings are owned by a SymbolSlab. They should be treated as non-owning
|
||||||
|
/// references. Copies are shallow.
|
||||||
|
///
|
||||||
|
/// When adding new unowned data fields to Symbol, remember to update:
|
||||||
|
/// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
|
||||||
|
/// - mergeSymbol in Merge.cpp, to properly combine two Symbols.
|
||||||
|
///
|
||||||
|
/// A fully documented symbol can be split as:
|
||||||
|
/// size_type std::map<k, t>::count(const K& key) const
|
||||||
|
/// | Return | Scope |Name| Signature |
|
||||||
|
/// We split up these components to allow display flexibility later.
|
||||||
|
struct Symbol {
|
||||||
|
/// The ID of the symbol.
|
||||||
|
SymbolID ID;
|
||||||
|
/// The symbol information, like symbol kind.
|
||||||
|
index::SymbolInfo SymInfo;
|
||||||
|
/// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
|
||||||
|
llvm::StringRef Name;
|
||||||
|
/// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
|
||||||
|
llvm::StringRef Scope;
|
||||||
|
/// The location of the symbol's definition, if one was found.
|
||||||
|
/// This just covers the symbol name (e.g. without class/function body).
|
||||||
|
SymbolLocation Definition;
|
||||||
|
/// The location of the preferred declaration of the symbol.
|
||||||
|
/// This just covers the symbol name.
|
||||||
|
/// This may be the same as Definition.
|
||||||
|
///
|
||||||
|
/// A C++ symbol may have multiple declarations, and we pick one to prefer.
|
||||||
|
/// * For classes, the canonical declaration should be the definition.
|
||||||
|
/// * For non-inline functions, the canonical declaration typically appears
|
||||||
|
/// in the ".h" file corresponding to the definition.
|
||||||
|
SymbolLocation CanonicalDeclaration;
|
||||||
|
/// The number of translation units that reference this symbol from their main
|
||||||
|
/// file. This number is only meaningful if aggregated in an index.
|
||||||
|
unsigned References = 0;
|
||||||
|
/// Where this symbol came from. Usually an index provides a constant value.
|
||||||
|
SymbolOrigin Origin = SymbolOrigin::Unknown;
|
||||||
|
/// A brief description of the symbol that can be appended in the completion
|
||||||
|
/// candidate list. For example, "(X x, Y y) const" is a function signature.
|
||||||
|
/// Only set when the symbol is indexed for completion.
|
||||||
|
llvm::StringRef Signature;
|
||||||
|
/// What to insert when completing this symbol, after the symbol name.
|
||||||
|
/// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
|
||||||
|
/// (When snippets are disabled, the symbol name alone is used).
|
||||||
|
/// Only set when the symbol is indexed for completion.
|
||||||
|
llvm::StringRef CompletionSnippetSuffix;
|
||||||
|
/// Documentation including comment for the symbol declaration.
|
||||||
|
llvm::StringRef Documentation;
|
||||||
|
/// Type when this symbol is used in an expression. (Short display form).
|
||||||
|
/// e.g. return type of a function, or type of a variable.
|
||||||
|
/// Only set when the symbol is indexed for completion.
|
||||||
|
llvm::StringRef ReturnType;
|
||||||
|
|
||||||
|
/// Raw representation of the OpaqueType of the symbol, used for scoring
|
||||||
|
/// purposes.
|
||||||
|
/// Only set when the symbol is indexed for completion.
|
||||||
|
llvm::StringRef Type;
|
||||||
|
|
||||||
|
struct IncludeHeaderWithReferences {
|
||||||
|
IncludeHeaderWithReferences() = default;
|
||||||
|
|
||||||
|
IncludeHeaderWithReferences(llvm::StringRef IncludeHeader,
|
||||||
|
unsigned References)
|
||||||
|
: IncludeHeader(IncludeHeader), References(References) {}
|
||||||
|
|
||||||
|
/// This can be either a URI of the header to be #include'd
|
||||||
|
/// for this symbol, or a literal header quoted with <> or "" that is
|
||||||
|
/// suitable to be included directly. When it is a URI, the exact #include
|
||||||
|
/// path needs to be calculated according to the URI scheme.
|
||||||
|
///
|
||||||
|
/// Note that the include header is a canonical include for the symbol and
|
||||||
|
/// can be different from FileURI in the CanonicalDeclaration.
|
||||||
|
llvm::StringRef IncludeHeader = "";
|
||||||
|
/// The number of translation units that reference this symbol and include
|
||||||
|
/// this header. This number is only meaningful if aggregated in an index.
|
||||||
|
unsigned References = 0;
|
||||||
|
};
|
||||||
|
/// One Symbol can potentially be incuded via different headers.
|
||||||
|
/// - If we haven't seen a definition, this covers all declarations.
|
||||||
|
/// - If we have seen a definition, this covers declarations visible from
|
||||||
|
/// any definition.
|
||||||
|
/// Only set when the symbol is indexed for completion.
|
||||||
|
llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders;
|
||||||
|
|
||||||
|
enum SymbolFlag : uint8_t {
|
||||||
|
None = 0,
|
||||||
|
/// Whether or not this symbol is meant to be used for the code completion.
|
||||||
|
/// See also isIndexedForCodeCompletion().
|
||||||
|
/// Note that we don't store completion information (signature, snippet,
|
||||||
|
/// type, inclues) if the symbol is not indexed for code completion.
|
||||||
|
IndexedForCodeCompletion = 1 << 0,
|
||||||
|
/// Indicates if the symbol is deprecated.
|
||||||
|
Deprecated = 1 << 1,
|
||||||
|
/// Symbol is an implementation detail.
|
||||||
|
ImplementationDetail = 1 << 2,
|
||||||
|
/// Symbol is visible to other files (not e.g. a static helper function).
|
||||||
|
VisibleOutsideFile = 1 << 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
SymbolFlag Flags = SymbolFlag::None;
|
||||||
|
/// FIXME: also add deprecation message and fixit?
|
||||||
|
};
|
||||||
|
|
||||||
|
inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A,
|
||||||
|
Symbol::SymbolFlag B) {
|
||||||
|
return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) |
|
||||||
|
static_cast<uint8_t>(B));
|
||||||
|
}
|
||||||
|
inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A,
|
||||||
|
Symbol::SymbolFlag B) {
|
||||||
|
return A = A | B;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
|
||||||
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag);
|
||||||
|
|
||||||
|
/// Invokes Callback with each StringRef& contained in the Symbol.
|
||||||
|
/// Useful for deduplicating backing strings.
|
||||||
|
template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
|
||||||
|
CB(S.Name);
|
||||||
|
CB(S.Scope);
|
||||||
|
CB(S.Signature);
|
||||||
|
CB(S.CompletionSnippetSuffix);
|
||||||
|
CB(S.Documentation);
|
||||||
|
CB(S.ReturnType);
|
||||||
|
CB(S.Type);
|
||||||
|
auto RawCharPointerCB = [&CB](const char *&P) {
|
||||||
|
llvm::StringRef S(P);
|
||||||
|
CB(S);
|
||||||
|
assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
|
||||||
|
P = S.data();
|
||||||
|
};
|
||||||
|
RawCharPointerCB(S.CanonicalDeclaration.FileURI);
|
||||||
|
RawCharPointerCB(S.Definition.FileURI);
|
||||||
|
|
||||||
|
for (auto &Include : S.IncludeHeaders)
|
||||||
|
CB(Include.IncludeHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes query-independent quality score for a Symbol.
|
||||||
|
/// This currently falls in the range [1, ln(#indexed documents)].
|
||||||
|
/// FIXME: this should probably be split into symbol -> signals
|
||||||
|
/// and signals -> score, so it can be reused for Sema completions.
|
||||||
|
float quality(const Symbol &S);
|
||||||
|
|
||||||
|
/// An immutable symbol container that stores a set of symbols.
|
||||||
|
/// The container will maintain the lifetime of the symbols.
|
||||||
|
class SymbolSlab {
|
||||||
|
public:
|
||||||
|
using const_iterator = std::vector<Symbol>::const_iterator;
|
||||||
|
using iterator = const_iterator;
|
||||||
|
using value_type = Symbol;
|
||||||
|
|
||||||
|
SymbolSlab() = default;
|
||||||
|
|
||||||
|
const_iterator begin() const { return Symbols.begin(); }
|
||||||
|
const_iterator end() const { return Symbols.end(); }
|
||||||
|
const_iterator find(const SymbolID &SymID) const;
|
||||||
|
|
||||||
|
size_t size() const { return Symbols.size(); }
|
||||||
|
bool empty() const { return Symbols.empty(); }
|
||||||
|
// Estimates the total memory usage.
|
||||||
|
size_t bytes() const {
|
||||||
|
return sizeof(*this) + Arena.getTotalMemory() +
|
||||||
|
Symbols.capacity() * sizeof(Symbol);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SymbolSlab::Builder is a mutable container that can 'freeze' to
|
||||||
|
/// SymbolSlab. The frozen SymbolSlab will use less memory.
|
||||||
|
class Builder {
|
||||||
|
public:
|
||||||
|
Builder() : UniqueStrings(Arena) {}
|
||||||
|
|
||||||
|
/// Adds a symbol, overwriting any existing one with the same ID.
|
||||||
|
/// This is a deep copy: underlying strings will be owned by the slab.
|
||||||
|
void insert(const Symbol &S);
|
||||||
|
|
||||||
|
/// Returns the symbol with an ID, if it exists. Valid until next insert().
|
||||||
|
const Symbol *find(const SymbolID &ID) {
|
||||||
|
auto I = SymbolIndex.find(ID);
|
||||||
|
return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes the builder to finalize the slab.
|
||||||
|
SymbolSlab build() &&;
|
||||||
|
|
||||||
|
private:
|
||||||
|
llvm::BumpPtrAllocator Arena;
|
||||||
|
/// Intern table for strings. Contents are on the arena.
|
||||||
|
llvm::UniqueStringSaver UniqueStrings;
|
||||||
|
std::vector<Symbol> Symbols;
|
||||||
|
/// Values are indices into Symbols vector.
|
||||||
|
llvm::DenseMap<SymbolID, size_t> SymbolIndex;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
|
||||||
|
: Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
|
||||||
|
|
||||||
|
llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
|
||||||
|
std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace clangd
|
||||||
|
} // namespace clang
|
||||||
|
|
||||||
|
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
|
Loading…
Reference in New Issue