[clangd] Extract common file-caching logic from ConfigProvider.

The plan is to use this to use this for .clang-format, .clang-tidy, and
compile_commands.json. (Currently the former two are reparsed every
time, and the latter is cached forever and changes are never seen).

Differential Revision: https://reviews.llvm.org/D88172
This commit is contained in:
Sam McCall 2020-09-23 19:58:32 +02:00
parent ad5b83ddcf
commit d95db1693c
7 changed files with 277 additions and 94 deletions

View File

@ -9,6 +9,7 @@
#include "ConfigProvider.h"
#include "Config.h"
#include "ConfigFragment.h"
#include "support/FileCache.h"
#include "support/ThreadsafeFS.h"
#include "support/Trace.h"
#include "llvm/ADT/ScopeExit.h"
@ -24,89 +25,28 @@ namespace clangd {
namespace config {
// Threadsafe cache around reading a YAML config file from disk.
class FileConfigCache {
std::mutex Mu;
std::chrono::steady_clock::time_point ValidTime = {};
llvm::SmallVector<CompiledFragment, 1> CachedValue;
llvm::sys::TimePoint<> MTime = {};
unsigned Size = -1;
// Called once we are sure we want to read the file.
// REQUIRES: Cache keys are set. Mutex must be held.
void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
CachedValue.clear();
auto Buf = FS.getBufferForFile(Path);
// If we failed to read (but stat succeeded), don't cache failure.
if (!Buf) {
Size = -1;
MTime = {};
return;
}
// If file changed between stat and open, we don't know its mtime.
// For simplicity, don't cache the value in this case (use a bad key).
if (Buf->get()->getBufferSize() != Size) {
Size = -1;
MTime = {};
}
// Finally parse and compile the actual fragments.
for (auto &Fragment :
Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC)) {
Fragment.Source.Directory = Directory;
CachedValue.push_back(std::move(Fragment).compile(DC));
}
}
public:
// Must be set before the cache is used. Not a constructor param to allow
// computing ancestor-relative paths to be deferred.
std::string Path;
// Directory associated with this fragment.
class FileConfigCache : public FileCache {
mutable llvm::SmallVector<CompiledFragment, 1> CachedValue;
std::string Directory;
// Retrieves up-to-date config fragments from disk.
// A cached result may be reused if the mtime and size are unchanged.
// (But several concurrent read()s can miss the cache after a single change).
// Future performance ideas:
// - allow caches to be reused based on short elapsed walltime
// - allow latency-sensitive operations to skip revalidating the cache
void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
std::vector<CompiledFragment> &Out) {
std::lock_guard<std::mutex> Lock(Mu);
// We're going to update the cache and return whatever's in it.
auto Return = llvm::make_scope_exit(
[&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
public:
FileConfigCache(llvm::StringRef Path, llvm::StringRef Directory)
: FileCache(Path), Directory(Directory) {}
// Return any sufficiently recent result without doing any further work.
if (FreshTime && ValidTime >= FreshTime)
return;
// Ensure we bump the ValidTime at the end to allow for reuse.
auto MarkTime = llvm::make_scope_exit(
[&] { ValidTime = std::chrono::steady_clock::now(); });
// Stat is cheaper than opening the file, it's usually unchanged.
assert(llvm::sys::path::is_absolute(Path));
auto FS = TFS.view(/*CWD=*/llvm::None);
auto Stat = FS->status(Path);
// If there's no file, the result is empty. Ensure we have an invalid key.
if (!Stat || !Stat->isRegularFile()) {
MTime = {};
Size = -1;
CachedValue.clear();
return;
}
// If the modified-time and size match, assume the content does too.
if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
return;
// OK, the file has actually changed. Update cache key, compute new value.
Size = Stat->getSize();
MTime = Stat->getLastModificationTime();
fillCacheFromDisk(*FS, DC);
void get(const ThreadsafeFS &TFS, DiagnosticCallback DC,
std::chrono::steady_clock::time_point FreshTime,
std::vector<CompiledFragment> &Out) const {
read(
TFS, FreshTime,
[&](llvm::Optional<llvm::StringRef> Data) {
CachedValue.clear();
if (Data)
for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) {
Fragment.Source.Directory = Directory;
CachedValue.push_back(std::move(Fragment).compile(DC));
}
},
[&]() { llvm::copy(CachedValue, std::back_inserter(Out)); });
}
};
@ -120,17 +60,15 @@ std::unique_ptr<Provider> Provider::fromYAMLFile(llvm::StringRef AbsPath,
std::vector<CompiledFragment>
getFragments(const Params &P, DiagnosticCallback DC) const override {
std::vector<CompiledFragment> Result;
Cache.read(FS, DC, P.FreshTime, Result);
Cache.get(FS, DC, P.FreshTime, Result);
return Result;
};
public:
AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory,
const ThreadsafeFS &FS)
: FS(FS) {
: Cache(Path, Directory), FS(FS) {
assert(llvm::sys::path::is_absolute(Path));
Cache.Path = Path.str();
Cache.Directory = Directory.str();
}
};
@ -174,23 +112,21 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
{
std::lock_guard<std::mutex> Lock(Mu);
for (llvm::StringRef Ancestor : Ancestors) {
auto R = Cache.try_emplace(Ancestor);
auto It = Cache.find(Ancestor);
// Assemble the actual config file path only once.
if (R.second) {
if (It == Cache.end()) {
llvm::SmallString<256> ConfigPath = Ancestor;
path::append(ConfigPath, RelPath);
R.first->second.Path = ConfigPath.str().str();
R.first->second.Directory = Ancestor.str();
It = Cache.try_emplace(Ancestor, ConfigPath.str(), Ancestor).first;
}
Caches.push_back(&R.first->second);
Caches.push_back(&It->second);
}
}
// Finally query each individual file.
// This will take a (per-file) lock for each file that actually exists.
std::vector<CompiledFragment> Result;
for (FileConfigCache *Cache : Caches) {
Cache->read(FS, DC, P.FreshTime, Result);
}
for (FileConfigCache *Cache : Caches)
Cache->get(FS, DC, P.FreshTime, Result);
return Result;
};

View File

@ -38,8 +38,9 @@ struct Params {
llvm::StringRef Path;
/// Hint that stale data is OK to improve performance (e.g. avoid IO).
/// FreshTime sets a bound for how old the data can be.
/// If not set, providers should validate caches against the data source.
llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
/// By default, providers should validate caches against the data source.
std::chrono::steady_clock::time_point FreshTime =
std::chrono::steady_clock::time_point::max();
};
/// Used to report problems in parsing or interpreting a config.

View File

@ -19,6 +19,7 @@ include_directories(..)
add_clang_library(clangdSupport
Cancellation.cpp
Context.cpp
FileCache.cpp
Logger.cpp
Markup.cpp
MemoryTree.cpp

View File

@ -0,0 +1,80 @@
//===--- FileCache.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "support/FileCache.h"
namespace clang {
namespace clangd {
// Sentinel values for the Size cache key. In both cases, a successful stat of
// the file will never result in the cached value being reused.
// The cached value does not reflect the current content on disk.
static constexpr uint64_t CacheDiskMismatch =
std::numeric_limits<uint64_t>::max();
// The cached value reflects that the file doesn't exist.
static constexpr uint64_t FileNotFound = CacheDiskMismatch - 1;
FileCache::FileCache(llvm::StringRef Path)
: Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()),
ModifiedTime(), Size(CacheDiskMismatch) {
assert(llvm::sys::path::is_absolute(Path));
}
void FileCache::read(
const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime,
llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
llvm::function_ref<void()> Read) const {
std::lock_guard<std::mutex> Lock(Mu);
// We're going to update the cache and return whatever's in it.
auto Return = llvm::make_scope_exit(Read);
// Return any sufficiently recent result without doing any further work.
if (ValidTime > FreshTime)
return;
// Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on
// how often we do IO.
auto BumpValidTime = llvm::make_scope_exit(
[&] { ValidTime = std::chrono::steady_clock::now(); });
// stat is cheaper than opening the file. It's usually unchanged.
assert(llvm::sys::path::is_absolute(Path));
auto FS = TFS.view(/*CWD=*/llvm::None);
auto Stat = FS->status(Path);
if (!Stat || !Stat->isRegularFile()) {
if (Size != FileNotFound) // Allow "not found" value to be cached.
Parse(llvm::None);
// Ensure the cache key won't match any future stat().
Size = FileNotFound;
return;
}
// If the modified-time and size match, assume the content does too.
if (Size == Stat->getSize() &&
ModifiedTime == Stat->getLastModificationTime())
return;
// OK, the file has actually changed. Update cache key, compute new value.
Size = Stat->getSize();
ModifiedTime = Stat->getLastModificationTime();
// Now read the file from disk.
if (auto Buf = FS->getBufferForFile(Path)) {
Parse(Buf->get()->getBuffer());
// Result is cacheable if the actual read size matches the new cache key.
// (We can't update the cache key, because we don't know the new mtime).
if (Buf->get()->getBufferSize() != Size)
Size = CacheDiskMismatch;
} else {
// File was unreadable. Keep the old value and try again next time.
Size = CacheDiskMismatch;
}
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,81 @@
//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
#include "Path.h"
#include "ThreadsafeFS.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <mutex>
namespace clang {
namespace clangd {
/// Base class for threadsafe cache of data read from a file on disk.
///
/// We want configuration files to be "live" as much as possible.
/// Reading them every time is simplest, but caching solves a few problems:
/// - reading and parsing is cheap but not free (and happens on hot paths)
/// - we can ignore invalid data and use the old value (we may see truncated
/// compile_commands.json from non-atomic writers)
/// - we avoid reporting the same errors repeatedly
///
/// We still read and parse the data synchronously on demand, but skip as much
/// work as possible:
/// - if not enough wall-time has elapsed, assume the data is still up-to-date
/// - if we stat the file and it has the same mtime + size, don't read it
/// - obviously we only have to parse when we re-read the file
/// (Tracking OS change events is an alternative, but difficult to do portably.)
///
/// Caches for particular data (e.g. compilation databases) should inherit and:
/// - add mutable storage for the cached parsed data
/// - add a public interface implemented on top of read()
class FileCache {
protected:
// Path must be absolute.
FileCache(PathRef Path);
// Updates the cached value if needed, then provides threadsafe access to it.
//
// Specifically:
// - Parse() may be called (if the cache was not up-to-date)
// The lock is held, so cache storage may be safely written.
// Parse(None) means the file doesn't exist.
// - Read() will always be called, to provide access to the value.
// The lock is again held, so the value can be copied or used.
//
// If the last Parse is newer than FreshTime, we don't check metadata.
// - time_point::min() means we only do IO if we never read the file before
// - time_point::max() means we always at least stat the file
// - steady_clock::now() + seconds(1) means we accept 1 second of staleness
void read(const ThreadsafeFS &TFS,
std::chrono::steady_clock::time_point FreshTime,
llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
llvm::function_ref<void()> Read) const;
PathRef path() const { return Path; }
private:
std::string Path;
// Members are mutable so read() can present a const interface.
// (It is threadsafe and approximates read-through to TFS).
mutable std::mutex Mu;
// Time when the cache was known valid (reflected disk state).
mutable std::chrono::steady_clock::time_point ValidTime;
// Filesystem metadata corresponding to the currently cached data.
mutable std::chrono::system_clock::time_point ModifiedTime;
mutable uint64_t Size;
};
} // namespace clangd
} // namespace clang
#endif

View File

@ -151,6 +151,7 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) {
EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
}
// FIXME: delete this test, it's covered by FileCacheTests.
TEST(ProviderTest, Staleness) {
MockFS FS;

View File

@ -0,0 +1,83 @@
//===-- FileCacheTests.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "support/FileCache.h"
#include "TestFS.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <atomic>
#include <chrono>
namespace clang {
namespace clangd {
namespace config {
namespace {
class TestCache : public FileCache {
MockFS FS;
mutable std::string Value;
public:
TestCache() : FileCache(testPath("foo.cc")) {}
void setContents(const char *C) {
if (C)
FS.Files[testPath("foo.cc")] = C;
else
FS.Files.erase(testPath("foo.cc"));
}
std::string get(std::chrono::steady_clock::time_point FreshTime,
bool ExpectParse) const {
bool GotParse = false;
bool GotRead;
std::string Result;
read(
FS, FreshTime,
[&](llvm::Optional<llvm::StringRef> Data) {
GotParse = true;
Value = Data.getValueOr("").str();
},
[&]() {
GotRead = true;
Result = Value;
});
EXPECT_EQ(GotParse, ExpectParse);
EXPECT_TRUE(GotRead);
return Result;
}
};
TEST(FileCacheTest, Invalidation) {
TestCache C;
auto StaleOK = std::chrono::steady_clock::now();
auto MustBeFresh = StaleOK + std::chrono::hours(1);
C.setContents("a");
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time";
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)";
C.setContents("bb");
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)";
C.setContents(nullptr);
EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed";
EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)";
C.setContents("bb"); // Match the previous stat values!
EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
}
} // namespace
} // namespace config
} // namespace clangd
} // namespace clang