llvm-project/clang-tools-extra/clangd/FileDistance.cpp

223 lines
7.8 KiB
C++
Raw Normal View History

//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The FileDistance structure allows calculating the minimum distance to paths
// in a single tree.
// We simply walk up the path's ancestors until we find a node whose cost is
// known, and add the cost of walking back down. Initialization ensures this
// gives the correct path to the roots.
// We cache the results, so that the runtime is O(|A|), where A is the set of
// all distinct ancestors of visited paths.
//
// Example after initialization with /=2, /bar=0, DownCost = 1:
// / = 2
// /bar = 0
//
// After querying /foo/bar and /bar/foo:
// / = 2
// /bar = 0
// /bar/foo = 1
// /foo = 3
// /foo/bar = 4
//
// URIDistance creates FileDistance lazily for each URI scheme encountered. In
// practice this is a small constant factor.
//
//===-------------------------------------------------------------------------//
#include "FileDistance.h"
#include "Logger.h"
#include "llvm/ADT/STLExtras.h"
#include <queue>
namespace clang {
namespace clangd {
// Convert a path into the canonical form.
// Canonical form is either "/", or "/segment" * N:
// C:\foo\bar --> /c:/foo/bar
// /foo/ --> /foo
// a/b/c --> /a/b/c
static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
llvm::SmallString<128> Result = Path.rtrim('/');
native(Result, llvm::sys::path::Style::posix);
if (Result.empty() || Result.front() != '/')
Result.insert(Result.begin(), '/');
return Result;
}
constexpr const unsigned FileDistance::Unreachable;
const llvm::hash_code FileDistance::RootHash =
llvm::hash_value(llvm::StringRef("/"));
FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts)
: Opts(Opts) {
llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code, 4>>
DownEdges;
// Compute the best distance following only up edges.
// Keep track of down edges, in case we can use them to improve on this.
for (const auto &S : Sources) {
auto Canonical = canonicalize(S.getKey());
dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
S.second.MaxUpTraversals);
// Walk up to ancestors of this source, assigning cost.
llvm::StringRef Rest = Canonical;
llvm::hash_code Hash = llvm::hash_value(Rest);
for (unsigned I = 0; !Rest.empty(); ++I) {
Rest = parent_path(Rest, llvm::sys::path::Style::posix);
auto NextHash = llvm::hash_value(Rest);
auto &Down = DownEdges[NextHash];
if (!llvm::is_contained(Down, Hash))
Down.push_back(Hash);
// We can't just break after MaxUpTraversals, must still set DownEdges.
if (I > S.getValue().MaxUpTraversals) {
if (Cache.find(Hash) != Cache.end())
break;
} else {
unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
auto R = Cache.try_emplace(Hash, Cost);
if (!R.second) {
if (Cost < R.first->second) {
R.first->second = Cost;
} else {
// If we're not the best way to get to this path, stop assigning.
break;
}
}
}
Hash = NextHash;
}
}
// Now propagate scores parent -> child if that's an improvement.
// BFS ensures we propagate down chains (must visit parents before children).
std::queue<llvm::hash_code> Next;
for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
Next.push(Child);
while (!Next.empty()) {
auto Parent = Next.front();
Next.pop();
auto ParentCost = Cache.lookup(Parent);
for (auto Child : DownEdges.lookup(Parent)) {
if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
auto &ChildCost =
Cache.try_emplace(Child, Unreachable).first->getSecond();
if (ParentCost + Opts.DownCost < ChildCost)
ChildCost = ParentCost + Opts.DownCost;
}
Next.push(Child);
}
}
}
unsigned FileDistance::distance(llvm::StringRef Path) {
auto Canonical = canonicalize(Path);
unsigned Cost = Unreachable;
llvm::SmallVector<llvm::hash_code, 16> Ancestors;
// Walk up ancestors until we find a path we know the distance for.
for (llvm::StringRef Rest = Canonical; !Rest.empty();
Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
auto Hash = llvm::hash_value(Rest);
if (Hash == RootHash && !Ancestors.empty() &&
!Opts.AllowDownTraversalFromRoot) {
Cost = Unreachable;
break;
}
auto It = Cache.find(Hash);
if (It != Cache.end()) {
Cost = It->second;
break;
}
Ancestors.push_back(Hash);
}
// Now we know the costs for (known node, queried node].
// Fill these in, walking down the directory tree.
for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
if (Cost != Unreachable)
Cost += Opts.DownCost;
Cache.try_emplace(Hash, Cost);
}
dlog("distance({0} = {1})", Path, Cost);
return Cost;
}
unsigned URIDistance::distance(llvm::StringRef URI) {
auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
if (!R.second)
return R.first->getSecond();
if (auto U = clangd::URI::parse(URI)) {
dlog("distance({0} = {1})", URI, U->body());
R.first->second = forScheme(U->scheme()).distance(U->body());
} else {
log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
}
return R.first->second;
}
FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
auto &Delegate = ByScheme[Scheme];
if (!Delegate) {
llvm::StringMap<SourceParams> SchemeSources;
for (const auto &Source : Sources) {
if (auto U = clangd::URI::create(Source.getKey(), Scheme))
SchemeSources.try_emplace(U->body(), Source.getValue());
else
llvm::consumeError(U.takeError());
}
dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
SchemeSources.size(), Sources.size());
Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
}
return *Delegate;
}
static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
llvm::SmallVector<llvm::StringRef, 4> Split;
Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
return {"/" + llvm::join(Split, "/"), Split.size()};
}
static FileDistance
createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
FileDistanceOptions Opts;
Opts.UpCost = 2;
Opts.DownCost = 4;
Opts.AllowDownTraversalFromRoot = false;
llvm::StringMap<SourceParams> Sources;
llvm::StringRef Preferred =
QueryScopes.empty() ? "" : QueryScopes.front().c_str();
for (llvm::StringRef S : QueryScopes) {
SourceParams Param;
// Penalize the global scope even it's preferred, as all projects can define
// symbols in it, and there is pattern where using-namespace is used in
// place of enclosing namespaces (e.g. in implementation files).
if (S == Preferred)
[clangd] Tune down scope boost for global scope Summary: This improves cross-namespace completions and has ignorable impact on other completion types. Metrics ``` ================================================================================================== OVERALL (excl. CROSS_NAMESPACE) ================================================================================================== Total measurements: 109367 (-6) All measurements: MRR: 68.11 (+0.04) Top-1: 58.59% (+0.03%) Top-5: 80.00% (+0.01%) Top-100: 95.92% (-0.02%) Full identifiers: MRR: 98.35 (+0.09) Top-1: 97.87% (+0.17%) Top-5: 98.96% (+0.01%) Top-100: 99.03% (+0.00%) Filter length 0-5: MRR: 23.20 (+0.05) 58.72 (+0.01) 70.16 (-0.03) 73.44 (+0.03) 76.24 (+0.00) 80.79 (+0.14) Top-1: 11.90% (+0.03%) 45.07% (+0.03%) 58.49% (-0.05%) 62.44% (-0.02%) 66.31% (-0.05%) 72.10% (+0.07%) Top-5: 35.51% (+0.08%) 76.94% (-0.01%) 85.10% (-0.13%) 87.40% (-0.02%) 88.65% (+0.01%) 91.84% (+0.17%) Top-100: 83.25% (-0.02%) 96.61% (-0.15%) 98.15% (-0.02%) 98.43% (-0.01%) 98.53% (+0.01%) 98.66% (+0.02%) ================================================================================================== CROSS_NAMESPACE ================================================================================================== Total measurements: 17702 (+27) All measurements: MRR: 28.12 (+3.26) Top-1: 21.07% (+2.70%) Top-5: 35.11% (+4.48%) Top-100: 74.31% (+1.02%) Full identifiers: MRR: 79.20 (+3.72) Top-1: 71.78% (+4.86%) Top-5: 88.39% (+2.84%) Top-100: 98.99% (+0.00%) Filter length 0-5: MRR: 0.92 (-0.10) 5.51 (+0.57) 18.30 (+2.34) 21.62 (+3.76) 32.00 (+6.00) 41.55 (+7.61) Top-1: 0.56% (-0.08%) 2.44% (+0.15%) 9.82% (+1.47%) 12.59% (+2.16%) 21.17% (+4.47%) 30.05% (+6.72%) Top-5: 1.20% (-0.15%) 7.14% (+1.04%) 25.17% (+3.91%) 29.74% (+5.90%) 43.29% (+9.59%) 54.75% (+9.79%) Top-100: 5.49% (-0.01%) 56.22% (+2.59%) 86.69% (+1.08%) 89.03% (+2.04%) 93.74% (+0.78%) 96.99% (+0.59%) ``` Reviewers: sammccall Reviewed By: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D54851 llvm-svn: 347548
2018-11-26 20:12:01 +08:00
Param.Cost = S == "" ? 4 : 0;
else if (Preferred.startswith(S) && !S.empty())
continue; // just rely on up-traversals.
else
[clangd] Tune down scope boost for global scope Summary: This improves cross-namespace completions and has ignorable impact on other completion types. Metrics ``` ================================================================================================== OVERALL (excl. CROSS_NAMESPACE) ================================================================================================== Total measurements: 109367 (-6) All measurements: MRR: 68.11 (+0.04) Top-1: 58.59% (+0.03%) Top-5: 80.00% (+0.01%) Top-100: 95.92% (-0.02%) Full identifiers: MRR: 98.35 (+0.09) Top-1: 97.87% (+0.17%) Top-5: 98.96% (+0.01%) Top-100: 99.03% (+0.00%) Filter length 0-5: MRR: 23.20 (+0.05) 58.72 (+0.01) 70.16 (-0.03) 73.44 (+0.03) 76.24 (+0.00) 80.79 (+0.14) Top-1: 11.90% (+0.03%) 45.07% (+0.03%) 58.49% (-0.05%) 62.44% (-0.02%) 66.31% (-0.05%) 72.10% (+0.07%) Top-5: 35.51% (+0.08%) 76.94% (-0.01%) 85.10% (-0.13%) 87.40% (-0.02%) 88.65% (+0.01%) 91.84% (+0.17%) Top-100: 83.25% (-0.02%) 96.61% (-0.15%) 98.15% (-0.02%) 98.43% (-0.01%) 98.53% (+0.01%) 98.66% (+0.02%) ================================================================================================== CROSS_NAMESPACE ================================================================================================== Total measurements: 17702 (+27) All measurements: MRR: 28.12 (+3.26) Top-1: 21.07% (+2.70%) Top-5: 35.11% (+4.48%) Top-100: 74.31% (+1.02%) Full identifiers: MRR: 79.20 (+3.72) Top-1: 71.78% (+4.86%) Top-5: 88.39% (+2.84%) Top-100: 98.99% (+0.00%) Filter length 0-5: MRR: 0.92 (-0.10) 5.51 (+0.57) 18.30 (+2.34) 21.62 (+3.76) 32.00 (+6.00) 41.55 (+7.61) Top-1: 0.56% (-0.08%) 2.44% (+0.15%) 9.82% (+1.47%) 12.59% (+2.16%) 21.17% (+4.47%) 30.05% (+6.72%) Top-5: 1.20% (-0.15%) 7.14% (+1.04%) 25.17% (+3.91%) 29.74% (+5.90%) 43.29% (+9.59%) 54.75% (+9.79%) Top-100: 5.49% (-0.01%) 56.22% (+2.59%) 86.69% (+1.08%) 89.03% (+2.04%) 93.74% (+0.78%) 96.99% (+0.59%) ``` Reviewers: sammccall Reviewed By: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D54851 llvm-svn: 347548
2018-11-26 20:12:01 +08:00
Param.Cost = S == "" ? 6 : 2;
auto Path = scopeToPath(S);
// The global namespace is not 'near' its children.
Param.MaxUpTraversals = std::max(Path.second - 1, 0);
Sources[Path.first] = std::move(Param);
}
return FileDistance(Sources, Opts);
}
ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
: Distance(createScopeFileDistance(QueryScopes)) {}
unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
return Distance.distance(scopeToPath(SymbolScope).first);
}
} // namespace clangd
} // namespace clang