[clangd][NFC] Move SymbolID to a separate file

Prerequisity for textDocument/SymbolInfo

Differential Revision: https://reviews.llvm.org/D54799

llvm-svn: 347674
This commit is contained in:
Jan Korous 2018-11-27 16:40:34 +00:00
parent 6b2f3e07c6
commit 6089b6192e
6 changed files with 126 additions and 77 deletions

View File

@ -46,6 +46,7 @@ add_clang_library(clangDaemon
index/IndexAction.cpp
index/MemIndex.cpp
index/Merge.cpp
index/SymbolID.cpp
index/Serialization.cpp
index/SymbolCollector.cpp
index/YAMLSerialization.cpp

View File

@ -25,6 +25,7 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROTOCOL_H
#include "URI.h"
#include "index/SymbolID.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/JSON.h"
#include <bitset>

View File

@ -12,7 +12,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@ -43,34 +42,6 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) {
<< "-" << L.End.line() << ":" << L.End.column() << ")";
}
SymbolID::SymbolID(StringRef USR) {
auto Hash = SHA1::hash(arrayRefFromStringRef(USR));
static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
memcpy(HashValue.data(), Hash.data(), RawSize);
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
return OS << toHex(ID.raw());
}
SymbolID SymbolID::fromRaw(StringRef Raw) {
SymbolID ID;
assert(Raw.size() == RawSize);
memcpy(ID.HashValue.data(), Raw.data(), RawSize);
return ID;
}
std::string SymbolID::str() const { return toHex(raw()); }
Expected<SymbolID> SymbolID::fromStr(StringRef Str) {
if (Str.size() != RawSize * 2)
return createStringError(inconvertibleErrorCode(), "Bad ID length");
for (char C : Str)
if (!isHexDigit(C))
return createStringError(inconvertibleErrorCode(), "Bad hex ID");
return fromRaw(fromHex(Str));
}
raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) {
if (O == SymbolOrigin::Unknown)
return OS << "unknown";

View File

@ -11,11 +11,11 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#include "ExpectedTypes.h"
#include "SymbolID.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@ -95,53 +95,6 @@ inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) {
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using truncated SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
SymbolID() = default;
explicit SymbolID(llvm::StringRef USR);
bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
}
bool operator<(const SymbolID &Sym) const {
return HashValue < Sym.HashValue;
}
// The stored hash is truncated to RawSize bytes.
// This trades off memory against the number of symbols we can handle.
constexpr static size_t RawSize = 8;
llvm::StringRef raw() const {
return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
}
static SymbolID fromRaw(llvm::StringRef);
// Returns a hex encoded string.
std::string str() const;
static llvm::Expected<SymbolID> fromStr(llvm::StringRef);
private:
std::array<uint8_t, RawSize> HashValue;
};
inline llvm::hash_code hash_value(const SymbolID &ID) {
// We already have a good hash, just return the first bytes.
assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!");
size_t Result;
memcpy(&Result, ID.raw().data(), sizeof(size_t));
return llvm::hash_code(Result);
}
// Write SymbolID into the given stream. SymbolID is encoded as ID.str().
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
} // namespace clangd
} // namespace clang
namespace llvm {

View File

@ -0,0 +1,58 @@
//===--- SymbolID.cpp --------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "SymbolID.h"
#include "llvm/Support/SHA1.h"
using namespace llvm;
namespace clang {
namespace clangd {
SymbolID::SymbolID(StringRef USR) {
auto Hash = llvm::SHA1::hash(arrayRefFromStringRef(USR));
static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
memcpy(HashValue.data(), Hash.data(), RawSize);
}
llvm::StringRef SymbolID::raw() const {
return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
}
SymbolID SymbolID::fromRaw(StringRef Raw) {
SymbolID ID;
assert(Raw.size() == RawSize);
memcpy(ID.HashValue.data(), Raw.data(), RawSize);
return ID;
}
std::string SymbolID::str() const { return toHex(raw()); }
Expected<SymbolID> SymbolID::fromStr(StringRef Str) {
if (Str.size() != RawSize * 2)
return createStringError(inconvertibleErrorCode(), "Bad ID length");
for (char C : Str)
if (!isHexDigit(C))
return createStringError(inconvertibleErrorCode(), "Bad hex ID");
return fromRaw(fromHex(Str));
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
return OS << toHex(ID.raw());
}
llvm::hash_code hash_value(const SymbolID &ID) {
// We already have a good hash, just return the first bytes.
assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!");
size_t Result;
memcpy(&Result, ID.raw().data(), sizeof(size_t));
return llvm::hash_code(Result);
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,65 @@
//===--- SymbolID.h ----------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include <array>
#include <string>
namespace clang {
namespace clangd {
// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using truncated SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
SymbolID() = default;
explicit SymbolID(llvm::StringRef USR);
bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
}
bool operator<(const SymbolID &Sym) const {
return HashValue < Sym.HashValue;
}
// The stored hash is truncated to RawSize bytes.
// This trades off memory against the number of symbols we can handle.
constexpr static size_t RawSize = 8;
llvm::StringRef raw() const;
static SymbolID fromRaw(llvm::StringRef);
// Returns a hex encoded string.
std::string str() const;
static llvm::Expected<SymbolID> fromStr(llvm::StringRef);
private:
std::array<uint8_t, RawSize> HashValue;
};
llvm::hash_code hash_value(const SymbolID &ID);
// Write SymbolID into the given stream. SymbolID is encoded as ID.str().
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H