llvm-project/clang-tools-extra/clangd/index/Background.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

423 lines
15 KiB
C++
Raw Normal View History

//===-- Background.cpp - Build an index in a background thread ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "index/Background.h"
#include "Compiler.h"
#include "Config.h"
#include "Headers.h"
#include "ParsedAST.h"
#include "SourceCode.h"
#include "Symbol.h"
#include "URI.h"
#include "index/BackgroundIndexLoader.h"
#include "index/FileIndex.h"
#include "index/IndexAction.h"
#include "index/MemIndex.h"
#include "index/Ref.h"
#include "index/Relation.h"
#include "index/Serialization.h"
#include "index/SymbolCollector.h"
#include "support/Context.h"
#include "support/Logger.h"
#include "support/Path.h"
#include "support/Threading.h"
#include "support/ThreadsafeFS.h"
#include "support/Trace.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Driver/Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Threading.h"
#include <algorithm>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <mutex>
#include <numeric>
#include <queue>
#include <random>
#include <string>
#include <thread>
#include <utility>
#include <vector>
namespace clang {
namespace clangd {
namespace {
// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
// relative to Cmd.Directory, which might not be the same as current working
// directory.
llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
llvm::SmallString<128> AbsolutePath;
if (llvm::sys::path::is_absolute(Cmd.Filename)) {
AbsolutePath = Cmd.Filename;
} else {
AbsolutePath = Cmd.Directory;
llvm::sys::path::append(AbsolutePath, Cmd.Filename);
llvm::sys::path::remove_dots(AbsolutePath, true);
}
return AbsolutePath;
}
bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
auto Buf = FS->getBufferForFile(LS.AbsolutePath);
if (!Buf) {
elog("Background-index: Couldn't read {0} to validate stored index: {1}",
LS.AbsolutePath, Buf.getError().message());
// There is no point in indexing an unreadable file.
return false;
}
return digest(Buf->get()->getBuffer()) != LS.Digest;
}
} // namespace
BackgroundIndex::BackgroundIndex(
Context BackgroundContext, const ThreadsafeFS &TFS,
const GlobalCompilationDatabase &CDB,
BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize,
std::function<void(BackgroundQueue::Stats)> OnProgress,
std::function<Context(PathRef)> ContextProvider)
: SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB),
BackgroundContext(std::move(BackgroundContext)),
ContextProvider(std::move(ContextProvider)),
Rebuilder(this, &IndexedSymbols, ThreadPoolSize),
IndexStorageFactory(std::move(IndexStorageFactory)),
Queue(std::move(OnProgress)),
CommandsChanged(
CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
enqueue(ChangedFiles);
})) {
assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
assert(this->IndexStorageFactory && "Storage factory can not be null!");
for (unsigned I = 0; I < ThreadPoolSize; ++I) {
ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), [this] {
WithContext Ctx(this->BackgroundContext.clone());
Queue.work([&] { Rebuilder.idle(); });
});
}
}
BackgroundIndex::~BackgroundIndex() {
stop();
ThreadPool.wait();
}
BackgroundQueue::Task BackgroundIndex::changedFilesTask(
const std::vector<std::string> &ChangedFiles) {
BackgroundQueue::Task T([this, ChangedFiles] {
trace::Span Tracer("BackgroundIndexEnqueue");
llvm::Optional<WithContext> WithProvidedContext;
if (ContextProvider)
WithProvidedContext.emplace(ContextProvider(/*Path=*/""));
// We're doing this asynchronously, because we'll read shards here too.
log("Enqueueing {0} commands for indexing", ChangedFiles.size());
SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
// Run indexing for files that need to be updated.
std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
std::mt19937(std::random_device{}()));
std::vector<BackgroundQueue::Task> Tasks;
Tasks.reserve(NeedsReIndexing.size());
for (auto &Cmd : NeedsReIndexing)
Tasks.push_back(indexFileTask(std::move(Cmd)));
Queue.append(std::move(Tasks));
});
T.QueuePri = LoadShards;
T.ThreadPri = llvm::ThreadPriority::Default;
return T;
}
static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
Path = llvm::sys::path::filename(Path);
return Path.drop_back(llvm::sys::path::extension(Path).size());
}
BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
std::string Tag = filenameWithoutExtension(Path).str();
BackgroundQueue::Task T([this, Path(std::move(Path))] {
llvm::Optional<WithContext> WithProvidedContext;
if (ContextProvider)
WithProvidedContext.emplace(ContextProvider(Path));
auto Cmd = CDB.getCompileCommand(Path);
if (!Cmd)
return;
if (auto Error = index(std::move(*Cmd)))
elog("Indexing {0} failed: {1}", Path, std::move(Error));
});
T.QueuePri = IndexFile;
T.Tag = std::move(Tag);
return T;
}
void BackgroundIndex::boostRelated(llvm::StringRef Path) {
if (isHeaderFile(Path))
Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
}
/// Given index results from a TU, only update symbols coming from files that
/// are different or missing from than \p ShardVersionsSnapshot. Also stores new
/// index information on IndexStorage.
void BackgroundIndex::update(
llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
bool HadErrors) {
// Keys are URIs.
llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
// Note that sources do not contain any information regarding missing headers,
// since we don't even know what absolute path they should fall in.
for (const auto &IndexIt : *Index.Sources) {
const auto &IGN = IndexIt.getValue();
auto AbsPath = URI::resolve(IGN.URI, MainFile);
if (!AbsPath) {
elog("Failed to resolve URI: {0}", AbsPath.takeError());
continue;
}
const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
// File has different contents, or indexing was successful this time.
if (DigestIt == ShardVersionsSnapshot.end() ||
DigestIt->getValue().Digest != IGN.Digest ||
(DigestIt->getValue().HadErrors && !HadErrors))
FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
}
// Shard slabs into files.
FileShardedIndex ShardedIndex(std::move(Index));
// Build and store new slabs for each updated file.
for (const auto &FileIt : FilesToUpdate) {
auto Uri = FileIt.first();
auto IF = ShardedIndex.getShard(Uri);
assert(IF && "no shard for file in Index.Sources?");
PathRef Path = FileIt.getValue().first;
// Only store command line hash for main files of the TU, since our
// current model keeps only one version of a header file.
if (Path != MainFile)
IF->Cmd.reset();
// We need to store shards before updating the index, since the latter
// consumes slabs.
// FIXME: Also skip serializing the shard if it is already up-to-date.
if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF))
elog("Failed to write background-index shard for file {0}: {1}", Path,
std::move(Error));
{
std::lock_guard<std::mutex> Lock(ShardVersionsMu);
const auto &Hash = FileIt.getValue().second;
auto DigestIt = ShardVersions.try_emplace(Path);
ShardVersion &SV = DigestIt.first->second;
// Skip if file is already up to date, unless previous index was broken
// and this one is not.
if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors)
continue;
SV.Digest = Hash;
SV.HadErrors = HadErrors;
// This can override a newer version that is added in another thread, if
// this thread sees the older version but finishes later. This should be
// rare in practice.
IndexedSymbols.update(
Path, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
std::make_unique<RefSlab>(std::move(*IF->Refs)),
std::make_unique<RelationSlab>(std::move(*IF->Relations)),
Path == MainFile);
}
}
}
llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
trace::Span Tracer("BackgroundIndex");
SPAN_ATTACH(Tracer, "file", Cmd.Filename);
auto AbsolutePath = getAbsolutePath(Cmd);
auto FS = TFS.view(Cmd.Directory);
auto Buf = FS->getBufferForFile(AbsolutePath);
if (!Buf)
return llvm::errorCodeToError(Buf.getError());
auto Hash = digest(Buf->get()->getBuffer());
// Take a snapshot of the versions to avoid locking for each file in the TU.
llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
{
std::lock_guard<std::mutex> Lock(ShardVersionsMu);
ShardVersionsSnapshot = ShardVersions;
}
vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
ParseInputs Inputs;
Inputs.TFS = &TFS;
Inputs.CompileCommand = std::move(Cmd);
[clangd] Surface errors from command-line parsing Summary: Those errors are exposed at the first character of a file, for a lack of a better place. Previously, all errors were stored inside the AST and report accordingly. However, errors in command-line argument parsing could result in failure to produce the AST, so we need an alternative ways to report those errors. We take the following approach in this patch: - buildCompilerInvocation() now requires an explicit DiagnosticConsumer. - TUScheduler and TestTU now collect the diagnostics produced when parsing command line arguments. If pasing of the AST failed, diagnostics are reported via a new ParsingCallbacks::onFailedAST method. If parsing of the AST succeeded, any errors produced during command-line parsing are stored alongside the AST inside the ParsedAST instance and reported as previously by calling the ParsingCallbacks::onMainAST method; - The client code that uses ClangdServer's DiagnosticConsumer does not need to change, it will receive new diagnostics in the onDiagnosticsReady() callback Errors produced when parsing command-line arguments are collected using the same StoreDiags class that is used to collect all other errors. They are recognized by their location being invalid. IIUC, the location is invalid as there is no source manager at this point, it is created at a later stage. Although technically we might also get diagnostics that mention the command-line arguments FileID with after the source manager was created (and they have valid source locations), we choose to not handle those and they are dropped as not coming from the main file. AFAICT, those diagnostics should always be notes, therefore it's safe to drop them without loosing too much information. Reviewers: kadircet Reviewed By: kadircet Subscribers: nridge, javed.absar, MaskRay, jkorous, arphaman, cfe-commits, gribozavr Tags: #clang Differential Revision: https://reviews.llvm.org/D66759 llvm-svn: 370177
2019-08-28 17:24:55 +08:00
IgnoreDiagnostics IgnoreDiags;
auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
if (!CI)
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Couldn't build compiler invocation");
auto Clang =
prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
std::move(*Buf), std::move(FS), IgnoreDiags);
if (!Clang)
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Couldn't build compiler instance");
SymbolCollector::Options IndexOpts;
// Creates a filter to not collect index results from files with unchanged
// digests.
IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
FileID FID) {
const auto *F = SM.getFileEntryForID(FID);
if (!F)
return false; // Skip invalid files.
auto AbsPath = getCanonicalPath(F, SM);
if (!AbsPath)
return false; // Skip files without absolute path.
auto Digest = digestFile(SM, FID);
if (!Digest)
return false;
auto D = ShardVersionsSnapshot.find(*AbsPath);
if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
!D->second.HadErrors)
return false; // Skip files that haven't changed, without errors.
return true;
};
IndexFileIn Index;
auto Action = createStaticIndexingAction(
IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); },
[&](RefSlab R) { Index.Refs = std::move(R); },
[&](RelationSlab R) { Index.Relations = std::move(R); },
[&](IncludeGraph IG) { Index.Sources = std::move(IG); });
// We're going to run clang here, and it could potentially crash.
// We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
// but the leaky "recovery" is pretty scary too in a long-running process.
// If crashes are a real problem, maybe we should fork a child process.
const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
if (!Action->BeginSourceFile(*Clang, Input))
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"BeginSourceFile() failed");
if (llvm::Error Err = Action->Execute())
return Err;
Action->EndSourceFile();
Index.Cmd = Inputs.CompileCommand;
assert(Index.Symbols && Index.Refs && Index.Sources &&
"Symbols, Refs and Sources must be set.");
log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
Inputs.CompileCommand.Filename, Index.Symbols->size(),
Index.Refs->numRefs(), Index.Sources->size());
SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size()));
SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs()));
SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
bool HadErrors = Clang->hasDiagnostics() &&
Clang->getDiagnostics().hasUncompilableErrorOccurred();
if (HadErrors) {
log("Failed to compile {0}, index may be incomplete", AbsolutePath);
for (auto &It : *Index.Sources)
It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
}
update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
Rebuilder.indexedTU();
return llvm::Error::success();
}
// Restores shards for \p MainFiles from index storage. Then checks staleness of
// those shards and returns a list of TUs that needs to be indexed to update
// staleness.
std::vector<std::string>
BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
// Drop files where background indexing is disabled in config.
if (ContextProvider)
llvm::erase_if(MainFiles, [&](const std::string &TU) {
// Load the config for each TU, as indexing may be selectively enabled.
WithContext WithProvidedContext(ContextProvider(TU));
return Config::current().Index.Background ==
Config::BackgroundPolicy::Skip;
});
Rebuilder.startLoading();
// Load shards for all of the mainfiles.
const std::vector<LoadedShard> Result =
loadIndexShards(MainFiles, IndexStorageFactory, CDB);
size_t LoadedShards = 0;
{
// Update in-memory state.
std::lock_guard<std::mutex> Lock(ShardVersionsMu);
for (auto &LS : Result) {
if (!LS.Shard)
continue;
auto SS =
LS.Shard->Symbols
? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols))
: nullptr;
auto RS = LS.Shard->Refs
? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs))
: nullptr;
auto RelS =
LS.Shard->Relations
? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations))
: nullptr;
ShardVersion &SV = ShardVersions[LS.AbsolutePath];
SV.Digest = LS.Digest;
SV.HadErrors = LS.HadErrors;
++LoadedShards;
IndexedSymbols.update(LS.AbsolutePath, std::move(SS), std::move(RS),
std::move(RelS), LS.CountReferences);
}
}
Rebuilder.loadedShard(LoadedShards);
Rebuilder.doneLoading();
auto FS = TFS.view(/*CWD=*/llvm::None);
llvm::DenseSet<PathRef> TUsToIndex;
// We'll accept data from stale shards, but ensure the files get reindexed
// soon.
for (auto &LS : Result) {
if (!shardIsStale(LS, FS.get()))
continue;
PathRef TUForFile = LS.DependentTU;
assert(!TUForFile.empty() && "File without a TU!");
// FIXME: Currently, we simply schedule indexing on a TU whenever any of
// its dependencies needs re-indexing. We might do it smarter by figuring
// out a minimal set of TUs that will cover all the stale dependencies.
// FIXME: Try looking at other TUs if no compile commands are available
// for this TU, i.e TU was deleted after we performed indexing.
TUsToIndex.insert(TUForFile);
}
return {TUsToIndex.begin(), TUsToIndex.end()};
}
} // namespace clangd
} // namespace clang