llvm-project/clang-tools-extra/clang-include-fixer/SymbolIndexManager.cpp

159 lines
6.1 KiB
C++
Raw Normal View History

//===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SymbolIndexManager.h"
#include "find-all-symbols/SymbolInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"
#define DEBUG_TYPE "clang-include-fixer"
namespace clang {
namespace include_fixer {
using find_all_symbols::SymbolInfo;
using find_all_symbols::SymbolAndSignals;
// Calculate a score based on whether we think the given header is closely
// related to the given source file.
static double similarityScore(llvm::StringRef FileName,
llvm::StringRef Header) {
// Compute the maximum number of common path segements between Header and
// a suffix of FileName.
// We do not do a full longest common substring computation, as Header
// specifies the path we would directly #include, so we assume it is rooted
// relatively to a subproject of the repository.
int MaxSegments = 1;
for (auto FileI = llvm::sys::path::begin(FileName),
FileE = llvm::sys::path::end(FileName);
FileI != FileE; ++FileI) {
int Segments = 0;
for (auto HeaderI = llvm::sys::path::begin(Header),
HeaderE = llvm::sys::path::end(Header), I = FileI;
HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
++Segments;
}
MaxSegments = std::max(Segments, MaxSegments);
}
return MaxSegments;
}
static void rank(std::vector<SymbolAndSignals> &Symbols,
llvm::StringRef FileName) {
llvm::DenseMap<llvm::StringRef, double> Score;
for (const auto &Symbol : Symbols) {
// Calculate a score from the similarity of the header the symbol is in
// with the current file and the popularity of the symbol.
double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
(1.0 + std::log2(1 + Symbol.Signals.Seen));
double &S = Score[Symbol.Symbol.getFilePath()];
S = std::max(S, NewScore);
}
// Sort by the gathered scores. Use file name as a tie breaker so we can
// deduplicate.
std::sort(Symbols.begin(), Symbols.end(),
[&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
auto AS = Score[A.Symbol.getFilePath()];
auto BS = Score[B.Symbol.getFilePath()];
if (AS != BS)
return AS > BS;
return A.Symbol.getFilePath() < B.Symbol.getFilePath();
});
}
std::vector<find_all_symbols::SymbolInfo>
SymbolIndexManager::search(llvm::StringRef Identifier,
bool IsNestedSearch,
llvm::StringRef FileName) const {
// The identifier may be fully qualified, so split it and get all the context
// names.
llvm::SmallVector<llvm::StringRef, 8> Names;
Identifier.split(Names, "::");
bool IsFullyQualified = false;
if (Identifier.startswith("::")) {
Names.erase(Names.begin()); // Drop first (empty) element.
IsFullyQualified = true;
}
// As long as we don't find a result keep stripping name parts from the end.
// This is to support nested classes which aren't recorded in the database.
// Eventually we will either hit a class (namespaces aren't in the database
// either) and can report that result.
bool TookPrefix = false;
std::vector<SymbolAndSignals> MatchedSymbols;
do {
std::vector<SymbolAndSignals> Symbols;
for (const auto &DB : SymbolIndices) {
auto Res = DB.get()->search(Names.back());
Symbols.insert(Symbols.end(), Res.begin(), Res.end());
}
LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
<< Symbols.size() << " results...\n");
for (auto &SymAndSig : Symbols) {
const SymbolInfo &Symbol = SymAndSig.Symbol;
// Match the identifier name without qualifier.
bool IsMatched = true;
auto SymbolContext = Symbol.getContexts().begin();
auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
// Match the remaining context names.
while (IdentiferContext != Names.rend() &&
SymbolContext != Symbol.getContexts().end()) {
if (SymbolContext->second == *IdentiferContext) {
++IdentiferContext;
++SymbolContext;
} else if (SymbolContext->first ==
find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
// Skip non-scoped enum context.
++SymbolContext;
} else {
IsMatched = false;
break;
}
}
// If the name was qualified we only want to add results if we evaluated
// all contexts.
if (IsFullyQualified)
IsMatched &= (SymbolContext == Symbol.getContexts().end());
// FIXME: Support full match. At this point, we only find symbols in
// database which end with the same contexts with the identifier.
if (IsMatched && IdentiferContext == Names.rend()) {
// If we're in a situation where we took a prefix but the thing we
// found couldn't possibly have a nested member ignore it.
if (TookPrefix &&
(Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
Symbol.getSymbolKind() ==
SymbolInfo::SymbolKind::EnumConstantDecl ||
Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
continue;
MatchedSymbols.push_back(std::move(SymAndSig));
}
}
Names.pop_back();
TookPrefix = true;
} while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
rank(MatchedSymbols, FileName);
// Strip signals, they are no longer needed.
std::vector<SymbolInfo> Res;
for (auto &SymAndSig : MatchedSymbols)
Res.push_back(std::move(SymAndSig.Symbol));
return Res;
}
} // namespace include_fixer
} // namespace clang