forked from OSchip/llvm-project
914 lines
35 KiB
C++
914 lines
35 KiB
C++
//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "SymbolCollector.h"
|
|
#include "AST.h"
|
|
#include "CanonicalIncludes.h"
|
|
#include "CodeComplete.h"
|
|
#include "CodeCompletionStrings.h"
|
|
#include "ExpectedTypes.h"
|
|
#include "SourceCode.h"
|
|
#include "SymbolLocation.h"
|
|
#include "URI.h"
|
|
#include "index/Relation.h"
|
|
#include "index/SymbolID.h"
|
|
#include "support/Logger.h"
|
|
#include "clang/AST/Decl.h"
|
|
#include "clang/AST/DeclBase.h"
|
|
#include "clang/AST/DeclCXX.h"
|
|
#include "clang/AST/DeclObjC.h"
|
|
#include "clang/AST/DeclTemplate.h"
|
|
#include "clang/Basic/SourceLocation.h"
|
|
#include "clang/Basic/SourceManager.h"
|
|
#include "clang/Basic/Specifiers.h"
|
|
#include "clang/Index/IndexSymbol.h"
|
|
#include "clang/Index/IndexingAction.h"
|
|
#include "clang/Index/USRGeneration.h"
|
|
#include "clang/Lex/Preprocessor.h"
|
|
#include "clang/Tooling/Syntax/Tokens.h"
|
|
#include "llvm/Support/Casting.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
|
|
namespace clang {
|
|
namespace clangd {
|
|
namespace {
|
|
|
|
/// If \p ND is a template specialization, returns the described template.
|
|
/// Otherwise, returns \p ND.
|
|
const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
|
|
if (auto T = ND.getDescribedTemplate())
|
|
return *T;
|
|
return ND;
|
|
}
|
|
|
|
// Checks whether the decl is a private symbol in a header generated by
|
|
// protobuf compiler.
|
|
// FIXME: make filtering extensible when there are more use cases for symbol
|
|
// filters.
|
|
bool isPrivateProtoDecl(const NamedDecl &ND) {
|
|
const auto &SM = ND.getASTContext().getSourceManager();
|
|
if (!isProtoFile(nameLocation(ND, SM), SM))
|
|
return false;
|
|
|
|
// ND without identifier can be operators.
|
|
if (ND.getIdentifier() == nullptr)
|
|
return false;
|
|
auto Name = ND.getIdentifier()->getName();
|
|
if (!Name.contains('_'))
|
|
return false;
|
|
// Nested proto entities (e.g. Message::Nested) have top-level decls
|
|
// that shouldn't be used (Message_Nested). Ignore them completely.
|
|
// The nested entities are dangling type aliases, we may want to reconsider
|
|
// including them in the future.
|
|
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
|
|
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
|
|
// will include OUTER_INNER and exclude some_enum_constant.
|
|
// FIXME: the heuristic relies on naming style (i.e. no underscore in
|
|
// user-defined names) and can be improved.
|
|
return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
|
|
}
|
|
|
|
// We only collect #include paths for symbols that are suitable for global code
|
|
// completion, except for namespaces since #include path for a namespace is hard
|
|
// to define.
|
|
bool shouldCollectIncludePath(index::SymbolKind Kind) {
|
|
using SK = index::SymbolKind;
|
|
switch (Kind) {
|
|
case SK::Macro:
|
|
case SK::Enum:
|
|
case SK::Struct:
|
|
case SK::Class:
|
|
case SK::Union:
|
|
case SK::TypeAlias:
|
|
case SK::Using:
|
|
case SK::Function:
|
|
case SK::Variable:
|
|
case SK::EnumConstant:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Return the symbol range of the token at \p TokLoc.
|
|
std::pair<SymbolLocation::Position, SymbolLocation::Position>
|
|
getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
auto CreatePosition = [&SM](SourceLocation Loc) {
|
|
auto LSPLoc = sourceLocToPosition(SM, Loc);
|
|
SymbolLocation::Position Pos;
|
|
Pos.setLine(LSPLoc.line);
|
|
Pos.setColumn(LSPLoc.character);
|
|
return Pos;
|
|
};
|
|
|
|
auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
|
|
return {CreatePosition(TokLoc),
|
|
CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
|
|
}
|
|
|
|
// Checks whether \p ND is a good candidate to be the *canonical* declaration of
|
|
// its symbol (e.g. a go-to-declaration target). This overrides the default of
|
|
// using Clang's canonical declaration, which is the first in the TU.
|
|
//
|
|
// Example: preferring a class declaration over its forward declaration.
|
|
bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
|
|
const auto &SM = ND.getASTContext().getSourceManager();
|
|
if (isa<TagDecl>(ND))
|
|
return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
|
|
!isInsideMainFile(ND.getLocation(), SM);
|
|
if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
|
|
return ID->isThisDeclarationADefinition();
|
|
if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
|
|
return PD->isThisDeclarationADefinition();
|
|
return false;
|
|
}
|
|
|
|
RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
|
|
RefKind Result = RefKind::Unknown;
|
|
if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
|
|
Result |= RefKind::Declaration;
|
|
if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
|
|
Result |= RefKind::Definition;
|
|
if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
|
|
Result |= RefKind::Reference;
|
|
if (Spelled)
|
|
Result |= RefKind::Spelled;
|
|
return Result;
|
|
}
|
|
|
|
llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
|
|
if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
|
|
return RelationKind::BaseOf;
|
|
if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
|
|
return RelationKind::OverriddenBy;
|
|
return None;
|
|
}
|
|
|
|
// Given a ref contained in enclosing decl `Enclosing`, return
|
|
// the decl that should be used as that ref's Ref::Container. This is
|
|
// usually `Enclosing` itself, but in cases where `Enclosing` is not
|
|
// indexed, we walk further up because Ref::Container should always be
|
|
// an indexed symbol.
|
|
// Note: we don't use DeclContext as the container as in some cases
|
|
// it's useful to use a Decl which is not a DeclContext. For example,
|
|
// for a ref occurring in the initializer of a namespace-scope variable,
|
|
// it's useful to use that variable as the container, as otherwise the
|
|
// next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl,
|
|
// which are both not indexed and less granular than we'd like for use cases
|
|
// like call hierarchy.
|
|
const Decl *getRefContainer(const Decl *Enclosing,
|
|
const SymbolCollector::Options &Opts) {
|
|
while (Enclosing) {
|
|
const auto *ND = dyn_cast<NamedDecl>(Enclosing);
|
|
if (ND && SymbolCollector::shouldCollectSymbol(*ND, ND->getASTContext(),
|
|
Opts, true)) {
|
|
break;
|
|
}
|
|
Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
|
|
}
|
|
return Enclosing;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// Encapsulates decisions about how to record header paths in the index,
|
|
// including filename normalization, URI conversion etc.
|
|
// Expensive checks are cached internally.
|
|
class SymbolCollector::HeaderFileURICache {
|
|
// Weird double-indirect access to PP, which might not be ready yet when
|
|
// HeaderFiles is created but will be by the time it's used.
|
|
// (IndexDataConsumer::setPreprocessor can happen before or after initialize)
|
|
const std::shared_ptr<Preprocessor> &PP;
|
|
const SourceManager &SM;
|
|
const CanonicalIncludes *Includes;
|
|
llvm::StringRef FallbackDir;
|
|
llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
|
|
llvm::StringMap<std::string> CachePathToURI;
|
|
llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
|
|
|
|
public:
|
|
HeaderFileURICache(const std::shared_ptr<Preprocessor> &PP,
|
|
const SourceManager &SM,
|
|
const SymbolCollector::Options &Opts)
|
|
: PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
|
|
}
|
|
|
|
// Returns a canonical URI for the file \p FE.
|
|
// We attempt to make the path absolute first.
|
|
const std::string &toURI(const FileEntry *FE) {
|
|
auto R = CacheFEToURI.try_emplace(FE);
|
|
if (R.second) {
|
|
auto CanonPath = getCanonicalPath(FE, SM);
|
|
R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
|
|
}
|
|
return *R.first->second;
|
|
}
|
|
|
|
// Returns a canonical URI for \p Path.
|
|
// If the file is in the FileManager, use that to canonicalize the path.
|
|
// We attempt to make the path absolute in any case.
|
|
const std::string &toURI(llvm::StringRef Path) {
|
|
if (auto File = SM.getFileManager().getFile(Path))
|
|
return toURI(*File);
|
|
return toURIInternal(Path);
|
|
}
|
|
|
|
// Gets a canonical include (URI of the header or <header> or "header") for
|
|
// header of \p FID (which should usually be the *expansion* file).
|
|
// This does not account for any per-symbol overrides!
|
|
// Returns "" if includes should not be inserted for this file.
|
|
llvm::StringRef getIncludeHeader(FileID FID) {
|
|
auto R = CacheFIDToInclude.try_emplace(FID);
|
|
if (R.second)
|
|
R.first->second = getIncludeHeaderUncached(FID);
|
|
return R.first->second;
|
|
}
|
|
|
|
private:
|
|
// This takes care of making paths absolute and path->URI caching, but no
|
|
// FileManager-based canonicalization.
|
|
const std::string &toURIInternal(llvm::StringRef Path) {
|
|
auto R = CachePathToURI.try_emplace(Path);
|
|
if (R.second) {
|
|
llvm::SmallString<256> AbsPath = Path;
|
|
if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
|
|
llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
|
|
assert(llvm::sys::path::is_absolute(AbsPath) &&
|
|
"If the VFS can't make paths absolute, a FallbackDir must be "
|
|
"provided");
|
|
llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
|
|
R.first->second = URI::create(AbsPath).toString();
|
|
}
|
|
return R.first->second;
|
|
}
|
|
|
|
llvm::StringRef getIncludeHeaderUncached(FileID FID) {
|
|
const FileEntry *FE = SM.getFileEntryForID(FID);
|
|
if (!FE || FE->getName().empty())
|
|
return "";
|
|
llvm::StringRef Filename = FE->getName();
|
|
// If a file is mapped by canonical headers, use that mapping, regardless
|
|
// of whether it's an otherwise-good header (header guards etc).
|
|
if (Includes) {
|
|
llvm::StringRef Canonical = Includes->mapHeader(Filename);
|
|
if (!Canonical.empty()) {
|
|
// If we had a mapping, always use it.
|
|
if (Canonical.startswith("<") || Canonical.startswith("\""))
|
|
return Canonical;
|
|
return toURI(Canonical);
|
|
}
|
|
}
|
|
if (!isSelfContainedHeader(FID, FE)) {
|
|
// A .inc or .def file is often included into a real header to define
|
|
// symbols (e.g. LLVM tablegen files).
|
|
if (Filename.endswith(".inc") || Filename.endswith(".def"))
|
|
// Don't use cache reentrantly due to iterator invalidation.
|
|
return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
|
|
// Conservatively refuse to insert #includes to files without guards.
|
|
return "";
|
|
}
|
|
// Standard case: just insert the file itself.
|
|
return toURI(FE);
|
|
}
|
|
|
|
bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
|
|
// FIXME: Should files that have been #import'd be considered
|
|
// self-contained? That's really a property of the includer,
|
|
// not of the file.
|
|
if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
|
|
!PP->getHeaderSearchInfo().hasFileBeenImported(FE))
|
|
return false;
|
|
// This pattern indicates that a header can't be used without
|
|
// particular preprocessor state, usually set up by another header.
|
|
if (isDontIncludeMeHeader(SM.getBufferData(FID)))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
// Is Line an #if or #ifdef directive?
|
|
static bool isIf(llvm::StringRef Line) {
|
|
Line = Line.ltrim();
|
|
if (!Line.consume_front("#"))
|
|
return false;
|
|
Line = Line.ltrim();
|
|
return Line.startswith("if");
|
|
}
|
|
|
|
// Is Line an #error directive mentioning includes?
|
|
static bool isErrorAboutInclude(llvm::StringRef Line) {
|
|
Line = Line.ltrim();
|
|
if (!Line.consume_front("#"))
|
|
return false;
|
|
Line = Line.ltrim();
|
|
if (!Line.startswith("error"))
|
|
return false;
|
|
return Line.contains_insensitive(
|
|
"includ"); // Matches "include" or "including".
|
|
}
|
|
|
|
// Heuristically headers that only want to be included via an umbrella.
|
|
static bool isDontIncludeMeHeader(llvm::StringRef Content) {
|
|
llvm::StringRef Line;
|
|
// Only sniff up to 100 lines or 10KB.
|
|
Content = Content.take_front(100 * 100);
|
|
for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
|
|
std::tie(Line, Content) = Content.split('\n');
|
|
if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// Return the symbol location of the token at \p TokLoc.
|
|
llvm::Optional<SymbolLocation>
|
|
SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
|
|
const auto &SM = ASTCtx->getSourceManager();
|
|
auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
|
|
if (!FE)
|
|
return None;
|
|
|
|
SymbolLocation Result;
|
|
Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
|
|
auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
|
|
Result.Start = Range.first;
|
|
Result.End = Range.second;
|
|
|
|
return Result;
|
|
}
|
|
|
|
SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
|
|
SymbolCollector::~SymbolCollector() = default;
|
|
|
|
void SymbolCollector::initialize(ASTContext &Ctx) {
|
|
ASTCtx = &Ctx;
|
|
HeaderFileURIs = std::make_unique<HeaderFileURICache>(
|
|
PP, ASTCtx->getSourceManager(), Opts);
|
|
CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
|
|
CompletionTUInfo =
|
|
std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
|
|
}
|
|
|
|
bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
|
|
const ASTContext &ASTCtx,
|
|
const Options &Opts,
|
|
bool IsMainFileOnly) {
|
|
// Skip anonymous declarations, e.g (anonymous enum/class/struct).
|
|
if (ND.getDeclName().isEmpty())
|
|
return false;
|
|
|
|
// Skip main-file symbols if we are not collecting them.
|
|
if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
|
|
return false;
|
|
|
|
// Skip symbols in anonymous namespaces in header files.
|
|
if (!IsMainFileOnly && ND.isInAnonymousNamespace())
|
|
return false;
|
|
|
|
// For function local symbols, index only classes and its member functions.
|
|
if (index::isFunctionLocalSymbol(&ND))
|
|
return isa<RecordDecl>(ND) ||
|
|
(ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
|
|
|
|
// We want most things but not "local" symbols such as symbols inside
|
|
// FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
|
|
// FIXME: Need a matcher for ExportDecl in order to include symbols declared
|
|
// within an export.
|
|
const auto *DeclCtx = ND.getDeclContext();
|
|
switch (DeclCtx->getDeclKind()) {
|
|
case Decl::TranslationUnit:
|
|
case Decl::Namespace:
|
|
case Decl::LinkageSpec:
|
|
case Decl::Enum:
|
|
case Decl::ObjCProtocol:
|
|
case Decl::ObjCInterface:
|
|
case Decl::ObjCCategory:
|
|
case Decl::ObjCCategoryImpl:
|
|
case Decl::ObjCImplementation:
|
|
break;
|
|
default:
|
|
// Record has a few derivations (e.g. CXXRecord, Class specialization), it's
|
|
// easier to cast.
|
|
if (!isa<RecordDecl>(DeclCtx))
|
|
return false;
|
|
}
|
|
|
|
// Avoid indexing internal symbols in protobuf generated headers.
|
|
if (isPrivateProtoDecl(ND))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
// Always return true to continue indexing.
|
|
bool SymbolCollector::handleDeclOccurrence(
|
|
const Decl *D, index::SymbolRoleSet Roles,
|
|
llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
|
|
index::IndexDataConsumer::ASTNodeInfo ASTNode) {
|
|
assert(ASTCtx && PP.get() && HeaderFileURIs);
|
|
assert(CompletionAllocator && CompletionTUInfo);
|
|
assert(ASTNode.OrigD);
|
|
// Indexing API puts canonical decl into D, which might not have a valid
|
|
// source location for implicit/built-in decls. Fallback to original decl in
|
|
// such cases.
|
|
if (D->getLocation().isInvalid())
|
|
D = ASTNode.OrigD;
|
|
// If OrigD is an declaration associated with a friend declaration and it's
|
|
// not a definition, skip it. Note that OrigD is the occurrence that the
|
|
// collector is currently visiting.
|
|
if ((ASTNode.OrigD->getFriendObjectKind() !=
|
|
Decl::FriendObjectKind::FOK_None) &&
|
|
!(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
|
|
return true;
|
|
// A declaration created for a friend declaration should not be used as the
|
|
// canonical declaration in the index. Use OrigD instead, unless we've already
|
|
// picked a replacement for D
|
|
if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
|
|
D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
|
|
// Flag to mark that D should be considered canonical meaning its declaration
|
|
// will override any previous declaration for the Symbol.
|
|
bool DeclIsCanonical = false;
|
|
// Avoid treating ObjCImplementationDecl as a canonical declaration if it has
|
|
// a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
|
|
if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
|
|
DeclIsCanonical = true;
|
|
if (const auto *CID = IID->getClassInterface())
|
|
if (const auto *DD = CID->getDefinition())
|
|
if (!DD->isImplicitInterfaceDecl())
|
|
D = DD;
|
|
}
|
|
// Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
|
|
// its ObjCCategoryDecl if it has one.
|
|
if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
|
|
DeclIsCanonical = true;
|
|
if (const auto *CD = CID->getCategoryDecl())
|
|
D = CD;
|
|
}
|
|
const NamedDecl *ND = dyn_cast<NamedDecl>(D);
|
|
if (!ND)
|
|
return true;
|
|
|
|
// Mark D as referenced if this is a reference coming from the main file.
|
|
// D may not be an interesting symbol, but it's cheaper to check at the end.
|
|
auto &SM = ASTCtx->getSourceManager();
|
|
if (Opts.CountReferences &&
|
|
(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
|
|
SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
|
|
ReferencedDecls.insert(ND);
|
|
|
|
auto ID = getSymbolID(ND);
|
|
if (!ID)
|
|
return true;
|
|
|
|
// ND is the canonical (i.e. first) declaration. If it's in the main file
|
|
// (which is not a header), then no public declaration was visible, so assume
|
|
// it's main-file only.
|
|
bool IsMainFileOnly =
|
|
SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
|
|
!isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
|
|
ASTCtx->getLangOpts());
|
|
// In C, printf is a redecl of an implicit builtin! So check OrigD instead.
|
|
if (ASTNode.OrigD->isImplicit() ||
|
|
!shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
|
|
return true;
|
|
|
|
// Note: we need to process relations for all decl occurrences, including
|
|
// refs, because the indexing code only populates relations for specific
|
|
// occurrences. For example, RelationBaseOf is only populated for the
|
|
// occurrence inside the base-specifier.
|
|
processRelations(*ND, ID, Relations);
|
|
|
|
bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
|
|
bool IsOnlyRef =
|
|
!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
|
|
static_cast<unsigned>(index::SymbolRole::Definition)));
|
|
|
|
if (IsOnlyRef && !CollectRef)
|
|
return true;
|
|
|
|
// Unlike other fields, e.g. Symbols (which use spelling locations), we use
|
|
// file locations for references (as it aligns the behavior of clangd's
|
|
// AST-based xref).
|
|
// FIXME: we should try to use the file locations for other fields.
|
|
if (CollectRef &&
|
|
(!IsMainFileOnly || Opts.CollectMainFileRefs ||
|
|
ND->isExternallyVisible()) &&
|
|
!isa<NamespaceDecl>(ND) &&
|
|
(Opts.RefsInHeaders ||
|
|
SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
|
|
DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles,
|
|
getRefContainer(ASTNode.Parent, Opts)});
|
|
// Don't continue indexing if this is a mere reference.
|
|
if (IsOnlyRef)
|
|
return true;
|
|
|
|
// FIXME: ObjCPropertyDecl are not properly indexed here:
|
|
// - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
|
|
// not a NamedDecl.
|
|
auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
|
|
if (!OriginalDecl)
|
|
return true;
|
|
|
|
const Symbol *BasicSymbol = Symbols.find(ID);
|
|
if (isPreferredDeclaration(*OriginalDecl, Roles))
|
|
// If OriginalDecl is preferred, replace/create the existing canonical
|
|
// declaration (e.g. a class forward declaration). There should be at most
|
|
// one duplicate as we expect to see only one preferred declaration per
|
|
// TU, because in practice they are definitions.
|
|
BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
|
|
else if (!BasicSymbol || DeclIsCanonical)
|
|
BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
|
|
|
|
if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
|
|
addDefinition(*OriginalDecl, *BasicSymbol);
|
|
|
|
return true;
|
|
}
|
|
|
|
void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
|
|
assert(HeaderFileURIs && PP.get());
|
|
const auto &SM = PP->getSourceManager();
|
|
const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
|
|
assert(MainFileEntry);
|
|
|
|
const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
|
|
// Add macro references.
|
|
for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
|
|
for (const auto &MacroRef : IDToRefs.second) {
|
|
const auto &Range = MacroRef.Rng;
|
|
bool IsDefinition = MacroRef.IsDefinition;
|
|
Ref R;
|
|
R.Location.Start.setLine(Range.start.line);
|
|
R.Location.Start.setColumn(Range.start.character);
|
|
R.Location.End.setLine(Range.end.line);
|
|
R.Location.End.setColumn(Range.end.character);
|
|
R.Location.FileURI = MainFileURI.c_str();
|
|
R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
|
|
Refs.insert(IDToRefs.first, R);
|
|
if (IsDefinition) {
|
|
Symbol S;
|
|
S.ID = IDToRefs.first;
|
|
auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
|
|
auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
|
|
S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
|
|
S.SymInfo.Kind = index::SymbolKind::Macro;
|
|
S.SymInfo.SubKind = index::SymbolSubKind::None;
|
|
S.SymInfo.Properties = index::SymbolPropertySet();
|
|
S.SymInfo.Lang = index::SymbolLanguage::C;
|
|
S.Origin = Opts.Origin;
|
|
S.CanonicalDeclaration = R.Location;
|
|
Symbols.insert(S);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
|
|
const MacroInfo *MI,
|
|
index::SymbolRoleSet Roles,
|
|
SourceLocation Loc) {
|
|
assert(PP.get());
|
|
// Builtin macros don't have useful locations and aren't needed in completion.
|
|
if (MI->isBuiltinMacro())
|
|
return true;
|
|
|
|
const auto &SM = PP->getSourceManager();
|
|
auto DefLoc = MI->getDefinitionLoc();
|
|
// Also avoid storing predefined macros like __DBL_MIN__.
|
|
if (SM.isWrittenInBuiltinFile(DefLoc) ||
|
|
Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
|
|
return true;
|
|
|
|
auto ID = getSymbolID(Name->getName(), MI, SM);
|
|
if (!ID)
|
|
return true;
|
|
|
|
auto SpellingLoc = SM.getSpellingLoc(Loc);
|
|
bool IsMainFileOnly =
|
|
SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
|
|
!isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
|
|
ASTCtx->getLangOpts());
|
|
// Do not store references to main-file macros.
|
|
if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
|
|
(Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
|
|
// FIXME: Populate container information for macro references.
|
|
MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
|
|
|
|
// Collect symbols.
|
|
if (!Opts.CollectMacro)
|
|
return true;
|
|
|
|
// Skip main-file macros if we are not collecting them.
|
|
if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
|
|
return false;
|
|
|
|
// Mark the macro as referenced if this is a reference coming from the main
|
|
// file. The macro may not be an interesting symbol, but it's cheaper to check
|
|
// at the end.
|
|
if (Opts.CountReferences &&
|
|
(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
|
|
SM.getFileID(SpellingLoc) == SM.getMainFileID())
|
|
ReferencedMacros.insert(Name);
|
|
|
|
// Don't continue indexing if this is a mere reference.
|
|
// FIXME: remove macro with ID if it is undefined.
|
|
if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
|
|
Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
|
|
return true;
|
|
|
|
// Only collect one instance in case there are multiple.
|
|
if (Symbols.find(ID) != nullptr)
|
|
return true;
|
|
|
|
Symbol S;
|
|
S.ID = std::move(ID);
|
|
S.Name = Name->getName();
|
|
if (!IsMainFileOnly) {
|
|
S.Flags |= Symbol::IndexedForCodeCompletion;
|
|
S.Flags |= Symbol::VisibleOutsideFile;
|
|
}
|
|
S.SymInfo = index::getSymbolInfoForMacro(*MI);
|
|
S.Origin = Opts.Origin;
|
|
// FIXME: use the result to filter out symbols.
|
|
shouldIndexFile(SM.getFileID(Loc));
|
|
if (auto DeclLoc = getTokenLocation(DefLoc))
|
|
S.CanonicalDeclaration = *DeclLoc;
|
|
|
|
CodeCompletionResult SymbolCompletion(Name);
|
|
const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
|
|
*PP, *CompletionAllocator, *CompletionTUInfo);
|
|
std::string Signature;
|
|
std::string SnippetSuffix;
|
|
getSignature(*CCS, &Signature, &SnippetSuffix);
|
|
S.Signature = Signature;
|
|
S.CompletionSnippetSuffix = SnippetSuffix;
|
|
|
|
IndexedMacros.insert(Name);
|
|
setIncludeLocation(S, DefLoc);
|
|
Symbols.insert(S);
|
|
return true;
|
|
}
|
|
|
|
void SymbolCollector::processRelations(
|
|
const NamedDecl &ND, const SymbolID &ID,
|
|
ArrayRef<index::SymbolRelation> Relations) {
|
|
for (const auto &R : Relations) {
|
|
auto RKind = indexableRelation(R);
|
|
if (!RKind)
|
|
continue;
|
|
const Decl *Object = R.RelatedSymbol;
|
|
|
|
auto ObjectID = getSymbolID(Object);
|
|
if (!ObjectID)
|
|
continue;
|
|
|
|
// Record the relation.
|
|
// TODO: There may be cases where the object decl is not indexed for some
|
|
// reason. Those cases should probably be removed in due course, but for
|
|
// now there are two possible ways to handle it:
|
|
// (A) Avoid storing the relation in such cases.
|
|
// (B) Store it anyways. Clients will likely lookup() the SymbolID
|
|
// in the index and find nothing, but that's a situation they
|
|
// probably need to handle for other reasons anyways.
|
|
// We currently do (B) because it's simpler.
|
|
if (*RKind == RelationKind::BaseOf)
|
|
this->Relations.insert({ID, *RKind, ObjectID});
|
|
else if (*RKind == RelationKind::OverriddenBy)
|
|
this->Relations.insert({ObjectID, *RKind, ID});
|
|
}
|
|
}
|
|
|
|
void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
|
|
if (Opts.CollectIncludePath)
|
|
if (shouldCollectIncludePath(S.SymInfo.Kind))
|
|
// Use the expansion location to get the #include header since this is
|
|
// where the symbol is exposed.
|
|
IncludeFiles[S.ID] =
|
|
PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
|
|
}
|
|
|
|
void SymbolCollector::finish() {
|
|
// At the end of the TU, add 1 to the refcount of all referenced symbols.
|
|
auto IncRef = [this](const SymbolID &ID) {
|
|
if (const auto *S = Symbols.find(ID)) {
|
|
Symbol Inc = *S;
|
|
++Inc.References;
|
|
Symbols.insert(Inc);
|
|
}
|
|
};
|
|
for (const NamedDecl *ND : ReferencedDecls) {
|
|
if (auto ID = getSymbolID(ND)) {
|
|
IncRef(ID);
|
|
}
|
|
}
|
|
if (Opts.CollectMacro) {
|
|
assert(PP);
|
|
// First, drop header guards. We can't identify these until EOF.
|
|
for (const IdentifierInfo *II : IndexedMacros) {
|
|
if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
|
|
if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
|
|
if (MI->isUsedForHeaderGuard())
|
|
Symbols.erase(ID);
|
|
}
|
|
// Now increment refcounts.
|
|
for (const IdentifierInfo *II : ReferencedMacros) {
|
|
if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
|
|
if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
|
|
IncRef(ID);
|
|
}
|
|
}
|
|
// Fill in IncludeHeaders.
|
|
// We delay this until end of TU so header guards are all resolved.
|
|
llvm::SmallString<128> QName;
|
|
for (const auto &Entry : IncludeFiles) {
|
|
if (const Symbol *S = Symbols.find(Entry.first)) {
|
|
llvm::StringRef IncludeHeader;
|
|
// Look for an overridden include header for this symbol specifically.
|
|
if (Opts.Includes) {
|
|
QName = S->Scope;
|
|
QName.append(S->Name);
|
|
IncludeHeader = Opts.Includes->mapSymbol(QName);
|
|
if (!IncludeHeader.empty()) {
|
|
if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
|
|
IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
|
|
else if (IncludeHeader == "<utility>" && QName == "std::move" &&
|
|
S->Signature.contains(','))
|
|
IncludeHeader = "<algorithm>";
|
|
}
|
|
}
|
|
// Otherwise find the approprate include header for the defining file.
|
|
if (IncludeHeader.empty())
|
|
IncludeHeader = HeaderFileURIs->getIncludeHeader(Entry.second);
|
|
|
|
// Symbols in slabs aren't mutable, insert() has to walk all the strings
|
|
if (!IncludeHeader.empty()) {
|
|
Symbol NewSym = *S;
|
|
NewSym.IncludeHeaders.push_back({IncludeHeader, 1});
|
|
Symbols.insert(NewSym);
|
|
}
|
|
}
|
|
}
|
|
|
|
const auto &SM = ASTCtx->getSourceManager();
|
|
auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
|
|
bool Spelled = false) {
|
|
auto FileID = SM.getFileID(LocAndRole.Loc);
|
|
// FIXME: use the result to filter out references.
|
|
shouldIndexFile(FileID);
|
|
if (const auto *FE = SM.getFileEntryForID(FileID)) {
|
|
auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
|
|
Ref R;
|
|
R.Location.Start = Range.first;
|
|
R.Location.End = Range.second;
|
|
R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
|
|
R.Kind = toRefKind(LocAndRole.Roles, Spelled);
|
|
R.Container = getSymbolID(LocAndRole.Container);
|
|
Refs.insert(ID, R);
|
|
}
|
|
};
|
|
// Populate Refs slab from MacroRefs.
|
|
// FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
|
|
for (const auto &IDAndRefs : MacroRefs)
|
|
for (const auto &LocAndRole : IDAndRefs.second)
|
|
CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
|
|
// Populate Refs slab from DeclRefs.
|
|
llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
|
|
for (auto &DeclAndRef : DeclRefs) {
|
|
if (auto ID = getSymbolID(DeclAndRef.first)) {
|
|
for (auto &LocAndRole : DeclAndRef.second) {
|
|
const auto FileID = SM.getFileID(LocAndRole.Loc);
|
|
// FIXME: It's better to use TokenBuffer by passing spelled tokens from
|
|
// the caller of SymbolCollector.
|
|
if (!FilesToTokensCache.count(FileID))
|
|
FilesToTokensCache[FileID] =
|
|
syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
|
|
llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
|
|
// Check if the referenced symbol is spelled exactly the same way the
|
|
// corresponding NamedDecl is. If it is, mark this reference as spelled.
|
|
const auto *IdentifierToken =
|
|
spelledIdentifierTouching(LocAndRole.Loc, Tokens);
|
|
DeclarationName Name = DeclAndRef.first->getDeclName();
|
|
const auto NameKind = Name.getNameKind();
|
|
bool IsTargetKind = NameKind == DeclarationName::Identifier ||
|
|
NameKind == DeclarationName::CXXConstructorName;
|
|
bool Spelled = IdentifierToken && IsTargetKind &&
|
|
Name.getAsString() == IdentifierToken->text(SM);
|
|
CollectRef(ID, LocAndRole, Spelled);
|
|
}
|
|
}
|
|
}
|
|
|
|
ReferencedDecls.clear();
|
|
ReferencedMacros.clear();
|
|
DeclRefs.clear();
|
|
IncludeFiles.clear();
|
|
}
|
|
|
|
const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
|
|
bool IsMainFileOnly) {
|
|
auto &Ctx = ND.getASTContext();
|
|
auto &SM = Ctx.getSourceManager();
|
|
|
|
Symbol S;
|
|
S.ID = std::move(ID);
|
|
std::string QName = printQualifiedName(ND);
|
|
// FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
|
|
// for consistency with CodeCompletionString and a clean name/signature split.
|
|
std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
|
|
std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
|
|
S.TemplateSpecializationArgs = TemplateSpecializationArgs;
|
|
|
|
// We collect main-file symbols, but do not use them for code completion.
|
|
if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
|
|
S.Flags |= Symbol::IndexedForCodeCompletion;
|
|
if (isImplementationDetail(&ND))
|
|
S.Flags |= Symbol::ImplementationDetail;
|
|
if (!IsMainFileOnly)
|
|
S.Flags |= Symbol::VisibleOutsideFile;
|
|
S.SymInfo = index::getSymbolInfo(&ND);
|
|
auto Loc = nameLocation(ND, SM);
|
|
assert(Loc.isValid() && "Invalid source location for NamedDecl");
|
|
// FIXME: use the result to filter out symbols.
|
|
shouldIndexFile(SM.getFileID(Loc));
|
|
if (auto DeclLoc = getTokenLocation(Loc))
|
|
S.CanonicalDeclaration = *DeclLoc;
|
|
|
|
S.Origin = Opts.Origin;
|
|
if (ND.getAvailability() == AR_Deprecated)
|
|
S.Flags |= Symbol::Deprecated;
|
|
|
|
// Add completion info.
|
|
// FIXME: we may want to choose a different redecl, or combine from several.
|
|
assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
|
|
// We use the primary template, as clang does during code completion.
|
|
CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
|
|
const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
|
|
*ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
|
|
*CompletionTUInfo,
|
|
/*IncludeBriefComments*/ false);
|
|
std::string Documentation =
|
|
formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
|
|
/*CommentsFromHeaders=*/true));
|
|
if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
|
|
if (Opts.StoreAllDocumentation)
|
|
S.Documentation = Documentation;
|
|
Symbols.insert(S);
|
|
return Symbols.find(S.ID);
|
|
}
|
|
S.Documentation = Documentation;
|
|
std::string Signature;
|
|
std::string SnippetSuffix;
|
|
getSignature(*CCS, &Signature, &SnippetSuffix);
|
|
S.Signature = Signature;
|
|
S.CompletionSnippetSuffix = SnippetSuffix;
|
|
std::string ReturnType = getReturnType(*CCS);
|
|
S.ReturnType = ReturnType;
|
|
|
|
llvm::Optional<OpaqueType> TypeStorage;
|
|
if (S.Flags & Symbol::IndexedForCodeCompletion) {
|
|
TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
|
|
if (TypeStorage)
|
|
S.Type = TypeStorage->raw();
|
|
}
|
|
|
|
Symbols.insert(S);
|
|
setIncludeLocation(S, ND.getLocation());
|
|
return Symbols.find(S.ID);
|
|
}
|
|
|
|
void SymbolCollector::addDefinition(const NamedDecl &ND,
|
|
const Symbol &DeclSym) {
|
|
if (DeclSym.Definition)
|
|
return;
|
|
// If we saw some forward declaration, we end up copying the symbol.
|
|
// This is not ideal, but avoids duplicating the "is this a definition" check
|
|
// in clang::index. We should only see one definition.
|
|
Symbol S = DeclSym;
|
|
const auto &SM = ND.getASTContext().getSourceManager();
|
|
auto Loc = nameLocation(ND, SM);
|
|
// FIXME: use the result to filter out symbols.
|
|
shouldIndexFile(SM.getFileID(Loc));
|
|
if (auto DefLoc = getTokenLocation(Loc))
|
|
S.Definition = *DefLoc;
|
|
Symbols.insert(S);
|
|
}
|
|
|
|
bool SymbolCollector::shouldIndexFile(FileID FID) {
|
|
if (!Opts.FileFilter)
|
|
return true;
|
|
auto I = FilesToIndexCache.try_emplace(FID);
|
|
if (I.second)
|
|
I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
|
|
return I.first->second;
|
|
}
|
|
|
|
} // namespace clangd
|
|
} // namespace clang
|