Reland [clangd] Extract per-dir CDB cache to its own threadsafe class. NFC

This reverts commit 4d956af594.

Assertion failures on windows fixed by
965d71c69a
This commit is contained in:
Sam McCall 2020-12-15 14:00:03 +01:00
parent 965d71c69a
commit 92dd077af1
2 changed files with 194 additions and 88 deletions

View File

@ -16,11 +16,13 @@
#include "llvm/ADT/None.h" #include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h" #include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallString.h"
#include "llvm/Support/FileSystem.h" #include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h" #include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h" #include "llvm/Support/Path.h"
#include "llvm/Support/Program.h" #include "llvm/Support/Program.h"
#include <chrono>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
@ -72,10 +74,117 @@ GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
return Cmd; return Cmd;
} }
// Loads and caches the CDB from a single directory.
//
// This class is threadsafe, which is to say we have independent locks for each
// directory we're searching for a CDB.
// Loading is deferred until first access.
//
// The DirectoryBasedCDB keeps a map from path => DirectoryCache.
// Typical usage is to:
// - 1) determine all the paths that might be searched
// - 2) acquire the map lock and get-or-create all the DirectoryCache entries
// - 3) release the map lock and query the caches as desired
//
// FIXME: this should revalidate the cache sometimes
// FIXME: IO should go through a VFS
class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
// Absolute canonical path that we're the cache for. (Not case-folded).
const std::string Path;
// True if we've looked for a CDB here and found none.
// (This makes it possible for get() to return without taking a lock)
// FIXME: this should have an expiry time instead of lasting forever.
std::atomic<bool> FinalizedNoCDB = {false};
// Guards following cache state.
std::mutex Mu;
// Has cache been filled from disk? FIXME: this should be an expiry time.
bool CachePopulated = false;
// Whether a new CDB has been loaded but not broadcast yet.
bool NeedsBroadcast = false;
// Last loaded CDB, meaningful if CachePopulated is set.
// shared_ptr so we can overwrite this when callers are still using the CDB.
std::shared_ptr<tooling::CompilationDatabase> CDB;
public:
DirectoryCache(llvm::StringRef Path) : Path(Path) {
assert(llvm::sys::path::is_absolute(Path));
}
// Get the CDB associated with this directory.
// ShouldBroadcast:
// - as input, signals whether the caller is willing to broadcast a
// newly-discovered CDB. (e.g. to trigger background indexing)
// - as output, signals whether the caller should do so.
// (If a new CDB is discovered and ShouldBroadcast is false, we mark the
// CDB as needing broadcast, and broadcast it next time we can).
std::shared_ptr<const tooling::CompilationDatabase>
get(bool &ShouldBroadcast) {
// Fast path for common case without taking lock.
if (FinalizedNoCDB.load()) {
ShouldBroadcast = false;
return nullptr;
}
std::lock_guard<std::mutex> Lock(Mu);
auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
// If we loaded a new CDB, it should be broadcast at some point.
if (CDB != nullptr && CDB.get() != OldCDB)
NeedsBroadcast = true;
else if (CDB == nullptr) // nothing to broadcast anymore!
NeedsBroadcast = false;
// If we have something to broadcast, then do so iff allowed.
if (!ShouldBroadcast)
return;
ShouldBroadcast = NeedsBroadcast;
NeedsBroadcast = false;
});
// For now, we never actually attempt to revalidate a populated cache.
if (CachePopulated)
return CDB;
assert(CDB == nullptr);
load();
CachePopulated = true;
if (!CDB)
FinalizedNoCDB.store(true);
return CDB;
}
llvm::StringRef path() const { return Path; }
private:
// Updates `CDB` from disk state.
void load() {
std::string Error; // ignored, because it's often "didn't find anything".
CDB = tooling::CompilationDatabase::loadFromDirectory(Path, Error);
if (!CDB) {
// Fallback: check for $src/build, the conventional CMake build root.
// Probe existence first to avoid each plugin doing IO if it doesn't
// exist.
llvm::SmallString<256> BuildDir(Path);
llvm::sys::path::append(BuildDir, "build");
if (llvm::sys::fs::is_directory(BuildDir)) {
vlog("Found candidate build directory {0}", BuildDir);
CDB = tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error);
}
}
if (CDB) {
log("Loaded compilation database from {0}", Path);
} else {
vlog("No compilation database at {0}", Path);
}
}
};
DirectoryBasedGlobalCompilationDatabase:: DirectoryBasedGlobalCompilationDatabase::
DirectoryBasedGlobalCompilationDatabase( DirectoryBasedGlobalCompilationDatabase(
llvm::Optional<Path> CompileCommandsDir) llvm::Optional<Path> CompileCommandsDir) {
: CompileCommandsDir(std::move(CompileCommandsDir)) {} if (CompileCommandsDir)
OnlyDirCache = std::make_unique<DirectoryCache>(*CompileCommandsDir);
}
DirectoryBasedGlobalCompilationDatabase:: DirectoryBasedGlobalCompilationDatabase::
~DirectoryBasedGlobalCompilationDatabase() = default; ~DirectoryBasedGlobalCompilationDatabase() = default;
@ -121,31 +230,26 @@ static bool pathEqual(PathRef A, PathRef B) {
#endif #endif
} }
DirectoryBasedGlobalCompilationDatabase::CachedCDB & std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
DirectoryBasedGlobalCompilationDatabase::getCDBInDirLocked(PathRef Dir) const { DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
// FIXME(ibiryukov): Invalidate cached compilation databases on changes llvm::ArrayRef<llvm::StringRef> Dirs) const {
auto Key = maybeCaseFoldPath(Dir); std::vector<std::string> FoldedDirs;
auto R = CompilationDatabases.try_emplace(Key); FoldedDirs.reserve(Dirs.size());
if (R.second) { // Cache miss, try to load CDB. for (const auto &Dir : Dirs) {
CachedCDB &Entry = R.first->second; #ifndef NDEBUG
std::string Error; if (!llvm::sys::path::is_absolute(Dir))
Entry.Path = std::string(Dir); elog("Trying to cache CDB for relative {0}");
Entry.CDB = tooling::CompilationDatabase::loadFromDirectory(Dir, Error); #endif
// Check for $src/build, the conventional CMake build root. FoldedDirs.push_back(maybeCaseFoldPath(Dir));
// Probe existence first to avoid each plugin doing IO if it doesn't exist.
if (!CompileCommandsDir && !Entry.CDB) {
llvm::SmallString<256> BuildDir = Dir;
llvm::sys::path::append(BuildDir, "build");
if (llvm::sys::fs::is_directory(BuildDir)) {
vlog("Found candidate build directory {0}", BuildDir);
Entry.CDB =
tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error);
} }
}
if (Entry.CDB) std::vector<DirectoryCache *> Ret;
log("Loaded compilation database from {0}", Dir); Ret.reserve(Dirs.size());
}
return R.first->second; std::lock_guard<std::mutex> Lock(DirCachesMutex);
for (unsigned I = 0; I < Dirs.size(); ++I)
Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
return Ret;
} }
llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult> llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
@ -155,39 +259,40 @@ DirectoryBasedGlobalCompilationDatabase::lookupCDB(
"path must be absolute"); "path must be absolute");
bool ShouldBroadcast = false; bool ShouldBroadcast = false;
CDBLookupResult Result; DirectoryCache *DirCache = nullptr;
std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
{ if (OnlyDirCache) {
std::lock_guard<std::mutex> Lock(Mutex); DirCache = OnlyDirCache.get();
CachedCDB *Entry = nullptr; ShouldBroadcast = Request.ShouldBroadcast;
if (CompileCommandsDir) { CDB = DirCache->get(ShouldBroadcast);
Entry = &getCDBInDirLocked(*CompileCommandsDir);
} else { } else {
// Traverse the canonical version to prevent false positives. i.e.: // Traverse the canonical version to prevent false positives. i.e.:
// src/build/../a.cc can detect a CDB in /src/build if not canonicalized. // src/build/../a.cc can detect a CDB in /src/build if not canonicalized.
// FIXME(sammccall): this loop is hot, use a union-find-like structure. std::string CanonicalPath = removeDots(Request.FileName);
actOnAllParentDirectories(removeDots(Request.FileName), std::vector<llvm::StringRef> SearchDirs;
[&](PathRef Path) { actOnAllParentDirectories(CanonicalPath, [&](PathRef Path) {
Entry = &getCDBInDirLocked(Path); SearchDirs.push_back(Path);
return Entry->CDB != nullptr; return false;
}); });
for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
bool CandidateShouldBroadcast = Request.ShouldBroadcast;
if ((CDB = Candidate->get(CandidateShouldBroadcast))) {
DirCache = Candidate;
ShouldBroadcast = CandidateShouldBroadcast;
break;
}
}
} }
if (!Entry || !Entry->CDB) if (!CDB)
return llvm::None; return llvm::None;
// Mark CDB as broadcasted to make sure discovery is performed once. CDBLookupResult Result;
if (Request.ShouldBroadcast && !Entry->SentBroadcast) { Result.CDB = std::move(CDB);
Entry->SentBroadcast = true; Result.PI.SourceRoot = DirCache->path().str();
ShouldBroadcast = true;
}
Result.CDB = Entry->CDB.get(); // FIXME: Maybe make the following part async, since this can block
Result.PI.SourceRoot = Entry->Path; // retrieval of compile commands.
}
// FIXME: Maybe make the following part async, since this can block retrieval
// of compile commands.
if (ShouldBroadcast) if (ShouldBroadcast)
broadcastCDB(Result); broadcastCDB(Result);
return Result; return Result;
@ -200,17 +305,16 @@ void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
std::vector<std::string> AllFiles = Result.CDB->getAllFiles(); std::vector<std::string> AllFiles = Result.CDB->getAllFiles();
// We assume CDB in CompileCommandsDir owns all of its entries, since we don't // We assume CDB in CompileCommandsDir owns all of its entries, since we don't
// perform any search in parent paths whenever it is set. // perform any search in parent paths whenever it is set.
if (CompileCommandsDir) { if (OnlyDirCache) {
assert(*CompileCommandsDir == Result.PI.SourceRoot && assert(OnlyDirCache->path() == Result.PI.SourceRoot &&
"Trying to broadcast a CDB outside of CompileCommandsDir!"); "Trying to broadcast a CDB outside of CompileCommandsDir!");
OnCommandChanged.broadcast(std::move(AllFiles)); OnCommandChanged.broadcast(std::move(AllFiles));
return; return;
} }
llvm::StringMap<bool> DirectoryHasCDB;
{
std::lock_guard<std::mutex> Lock(Mutex);
// Uniquify all parent directories of all files. // Uniquify all parent directories of all files.
llvm::StringMap<bool> DirectoryHasCDB;
std::vector<llvm::StringRef> FileAncestors;
for (llvm::StringRef File : AllFiles) { for (llvm::StringRef File : AllFiles) {
actOnAllParentDirectories(File, [&](PathRef Path) { actOnAllParentDirectories(File, [&](PathRef Path) {
auto It = DirectoryHasCDB.try_emplace(Path); auto It = DirectoryHasCDB.try_emplace(Path);
@ -218,11 +322,15 @@ void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
if (!It.second) if (!It.second)
return true; return true;
CachedCDB &Entry = getCDBInDirLocked(Path); FileAncestors.push_back(It.first->getKey());
It.first->second = Entry.CDB != nullptr;
return pathEqual(Path, Result.PI.SourceRoot); return pathEqual(Path, Result.PI.SourceRoot);
}); });
} }
// Work out which ones have CDBs in them.
for (DirectoryCache *Dir : getDirectoryCaches(FileAncestors)) {
bool ShouldBroadcast = false;
if (Dir->get(ShouldBroadcast))
DirectoryHasCDB.find(Dir->path())->setValue(true);
} }
std::vector<std::string> GovernedFiles; std::vector<std::string> GovernedFiles;

View File

@ -81,13 +81,19 @@ public:
llvm::Optional<ProjectInfo> getProjectInfo(PathRef File) const override; llvm::Optional<ProjectInfo> getProjectInfo(PathRef File) const override;
private: private:
/// Caches compilation databases loaded from directories. class DirectoryCache;
struct CachedCDB { // If there's an explicit CompileCommandsDir, cache of the CDB found there.
std::string Path; // Not case-folded. mutable std::unique_ptr<DirectoryCache> OnlyDirCache;
std::unique_ptr<clang::tooling::CompilationDatabase> CDB = nullptr;
bool SentBroadcast = false; // Keyed by possibly-case-folded directory path.
}; // We can hand out pointers as they're stable and entries are never removed.
CachedCDB &getCDBInDirLocked(PathRef File) const; // Empty if CompileCommandsDir is given (OnlyDirCache is used instead).
mutable llvm::StringMap<DirectoryCache> DirCaches;
// DirCaches access must be locked (unlike OnlyDirCache, which is threadsafe).
mutable std::mutex DirCachesMutex;
std::vector<DirectoryCache *>
getDirectoryCaches(llvm::ArrayRef<llvm::StringRef> Dirs) const;
struct CDBLookupRequest { struct CDBLookupRequest {
PathRef FileName; PathRef FileName;
@ -95,21 +101,13 @@ private:
bool ShouldBroadcast = false; bool ShouldBroadcast = false;
}; };
struct CDBLookupResult { struct CDBLookupResult {
tooling::CompilationDatabase *CDB = nullptr; std::shared_ptr<const tooling::CompilationDatabase> CDB;
ProjectInfo PI; ProjectInfo PI;
}; };
llvm::Optional<CDBLookupResult> lookupCDB(CDBLookupRequest Request) const; llvm::Optional<CDBLookupResult> lookupCDB(CDBLookupRequest Request) const;
// Performs broadcast on governed files. // Performs broadcast on governed files.
void broadcastCDB(CDBLookupResult Res) const; void broadcastCDB(CDBLookupResult Res) const;
mutable std::mutex Mutex;
// Keyed by possibly-case-folded directory path.
mutable llvm::StringMap<CachedCDB> CompilationDatabases;
/// Used for command argument pointing to folder where compile_commands.json
/// is located.
llvm::Optional<Path> CompileCommandsDir;
}; };
/// Extracts system include search path from drivers matching QueryDriverGlobs /// Extracts system include search path from drivers matching QueryDriverGlobs