llvm-project/clang-tools-extra/clangd/index/dex/Token.h

140 lines
4.2 KiB
C++

//===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Token objects represent a characteristic of a symbol, which can be used to
/// perform efficient search. Tokens are keys for inverted index which are
/// mapped to the corresponding posting lists.
///
/// The symbol std::cout might have the tokens:
/// * Scope "std::"
/// * Trigram "cou"
/// * Trigram "out"
/// * Type "std::ostream"
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
#include "index/Index.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
namespace clang {
namespace clangd {
namespace dex {
/// A Token represents an attribute of a symbol, such as a particular trigram
/// present in the name (used for fuzzy search).
///
/// Tokens can be used to perform more sophisticated search queries by
/// constructing complex iterator trees.
class Token {
public:
/// Kind specifies Token type which defines semantics for the internal
/// representation. Each Kind has different representation stored in Data
/// field.
// FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
// strings. For example, PathURI store URIs of each directory and its parents,
// which induces a lot of overhead because these paths tend to be long and
// each parent directory is a prefix.
enum class Kind {
/// Represents trigram used for fuzzy search of unqualified symbol names.
///
/// Data contains 3 bytes with trigram contents.
Trigram,
/// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
///
/// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
/// scope).
Scope,
/// Path Proximity URI to symbol declaration.
///
/// Data stores path URI of symbol declaration file or its parent.
///
/// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
/// and some amount of its parents.
ProximityURI,
/// Type of symbol (see `Symbol::Type`).
Type,
/// Internal Token type for invalid/special tokens, e.g. empty tokens for
/// llvm::DenseMap.
Sentinel,
};
Token(Kind TokenKind, llvm::StringRef Data)
: Data(Data), TokenKind(TokenKind) {}
bool operator==(const Token &Other) const {
return TokenKind == Other.TokenKind && Data == Other.Data;
}
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
switch (T.TokenKind) {
case Kind::Trigram:
OS << "T=";
break;
case Kind::Scope:
OS << "S=";
break;
case Kind::ProximityURI:
OS << "U=";
break;
case Kind::Type:
OS << "Ty=";
break;
case Kind::Sentinel:
OS << "?=";
break;
}
return OS << T.Data;
}
private:
/// Representation which is unique among Token with the same Kind.
std::string Data;
Kind TokenKind;
friend llvm::hash_code hash_value(const Token &Token) {
return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
}
};
} // namespace dex
} // namespace clangd
} // namespace clang
namespace llvm {
// Support Tokens as DenseMap keys.
template <> struct DenseMapInfo<clang::clangd::dex::Token> {
static inline clang::clangd::dex::Token getEmptyKey() {
return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
}
static inline clang::clangd::dex::Token getTombstoneKey() {
return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
}
static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
return hash_value(Tag);
}
static bool isEqual(const clang::clangd::dex::Token &LHS,
const clang::clangd::dex::Token &RHS) {
return LHS == RHS;
}
};
} // namespace llvm
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H