forked from OSchip/llvm-project
[clangd] Extract common file-caching logic from ConfigProvider.
The plan is to use this to use this for .clang-format, .clang-tidy, and compile_commands.json. (Currently the former two are reparsed every time, and the latter is cached forever and changes are never seen). Differential Revision: https://reviews.llvm.org/D88172
This commit is contained in:
parent
ad5b83ddcf
commit
d95db1693c
|
@ -9,6 +9,7 @@
|
|||
#include "ConfigProvider.h"
|
||||
#include "Config.h"
|
||||
#include "ConfigFragment.h"
|
||||
#include "support/FileCache.h"
|
||||
#include "support/ThreadsafeFS.h"
|
||||
#include "support/Trace.h"
|
||||
#include "llvm/ADT/ScopeExit.h"
|
||||
|
@ -24,89 +25,28 @@ namespace clangd {
|
|||
namespace config {
|
||||
|
||||
// Threadsafe cache around reading a YAML config file from disk.
|
||||
class FileConfigCache {
|
||||
std::mutex Mu;
|
||||
std::chrono::steady_clock::time_point ValidTime = {};
|
||||
llvm::SmallVector<CompiledFragment, 1> CachedValue;
|
||||
llvm::sys::TimePoint<> MTime = {};
|
||||
unsigned Size = -1;
|
||||
|
||||
// Called once we are sure we want to read the file.
|
||||
// REQUIRES: Cache keys are set. Mutex must be held.
|
||||
void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
|
||||
CachedValue.clear();
|
||||
|
||||
auto Buf = FS.getBufferForFile(Path);
|
||||
// If we failed to read (but stat succeeded), don't cache failure.
|
||||
if (!Buf) {
|
||||
Size = -1;
|
||||
MTime = {};
|
||||
return;
|
||||
}
|
||||
|
||||
// If file changed between stat and open, we don't know its mtime.
|
||||
// For simplicity, don't cache the value in this case (use a bad key).
|
||||
if (Buf->get()->getBufferSize() != Size) {
|
||||
Size = -1;
|
||||
MTime = {};
|
||||
}
|
||||
|
||||
// Finally parse and compile the actual fragments.
|
||||
for (auto &Fragment :
|
||||
Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC)) {
|
||||
Fragment.Source.Directory = Directory;
|
||||
CachedValue.push_back(std::move(Fragment).compile(DC));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
// Must be set before the cache is used. Not a constructor param to allow
|
||||
// computing ancestor-relative paths to be deferred.
|
||||
std::string Path;
|
||||
// Directory associated with this fragment.
|
||||
class FileConfigCache : public FileCache {
|
||||
mutable llvm::SmallVector<CompiledFragment, 1> CachedValue;
|
||||
std::string Directory;
|
||||
|
||||
// Retrieves up-to-date config fragments from disk.
|
||||
// A cached result may be reused if the mtime and size are unchanged.
|
||||
// (But several concurrent read()s can miss the cache after a single change).
|
||||
// Future performance ideas:
|
||||
// - allow caches to be reused based on short elapsed walltime
|
||||
// - allow latency-sensitive operations to skip revalidating the cache
|
||||
void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
|
||||
llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
|
||||
std::vector<CompiledFragment> &Out) {
|
||||
std::lock_guard<std::mutex> Lock(Mu);
|
||||
// We're going to update the cache and return whatever's in it.
|
||||
auto Return = llvm::make_scope_exit(
|
||||
[&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
|
||||
public:
|
||||
FileConfigCache(llvm::StringRef Path, llvm::StringRef Directory)
|
||||
: FileCache(Path), Directory(Directory) {}
|
||||
|
||||
// Return any sufficiently recent result without doing any further work.
|
||||
if (FreshTime && ValidTime >= FreshTime)
|
||||
return;
|
||||
|
||||
// Ensure we bump the ValidTime at the end to allow for reuse.
|
||||
auto MarkTime = llvm::make_scope_exit(
|
||||
[&] { ValidTime = std::chrono::steady_clock::now(); });
|
||||
|
||||
// Stat is cheaper than opening the file, it's usually unchanged.
|
||||
assert(llvm::sys::path::is_absolute(Path));
|
||||
auto FS = TFS.view(/*CWD=*/llvm::None);
|
||||
auto Stat = FS->status(Path);
|
||||
// If there's no file, the result is empty. Ensure we have an invalid key.
|
||||
if (!Stat || !Stat->isRegularFile()) {
|
||||
MTime = {};
|
||||
Size = -1;
|
||||
CachedValue.clear();
|
||||
return;
|
||||
}
|
||||
// If the modified-time and size match, assume the content does too.
|
||||
if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
|
||||
return;
|
||||
|
||||
// OK, the file has actually changed. Update cache key, compute new value.
|
||||
Size = Stat->getSize();
|
||||
MTime = Stat->getLastModificationTime();
|
||||
fillCacheFromDisk(*FS, DC);
|
||||
void get(const ThreadsafeFS &TFS, DiagnosticCallback DC,
|
||||
std::chrono::steady_clock::time_point FreshTime,
|
||||
std::vector<CompiledFragment> &Out) const {
|
||||
read(
|
||||
TFS, FreshTime,
|
||||
[&](llvm::Optional<llvm::StringRef> Data) {
|
||||
CachedValue.clear();
|
||||
if (Data)
|
||||
for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) {
|
||||
Fragment.Source.Directory = Directory;
|
||||
CachedValue.push_back(std::move(Fragment).compile(DC));
|
||||
}
|
||||
},
|
||||
[&]() { llvm::copy(CachedValue, std::back_inserter(Out)); });
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -120,17 +60,15 @@ std::unique_ptr<Provider> Provider::fromYAMLFile(llvm::StringRef AbsPath,
|
|||
std::vector<CompiledFragment>
|
||||
getFragments(const Params &P, DiagnosticCallback DC) const override {
|
||||
std::vector<CompiledFragment> Result;
|
||||
Cache.read(FS, DC, P.FreshTime, Result);
|
||||
Cache.get(FS, DC, P.FreshTime, Result);
|
||||
return Result;
|
||||
};
|
||||
|
||||
public:
|
||||
AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory,
|
||||
const ThreadsafeFS &FS)
|
||||
: FS(FS) {
|
||||
: Cache(Path, Directory), FS(FS) {
|
||||
assert(llvm::sys::path::is_absolute(Path));
|
||||
Cache.Path = Path.str();
|
||||
Cache.Directory = Directory.str();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -174,23 +112,21 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
|
|||
{
|
||||
std::lock_guard<std::mutex> Lock(Mu);
|
||||
for (llvm::StringRef Ancestor : Ancestors) {
|
||||
auto R = Cache.try_emplace(Ancestor);
|
||||
auto It = Cache.find(Ancestor);
|
||||
// Assemble the actual config file path only once.
|
||||
if (R.second) {
|
||||
if (It == Cache.end()) {
|
||||
llvm::SmallString<256> ConfigPath = Ancestor;
|
||||
path::append(ConfigPath, RelPath);
|
||||
R.first->second.Path = ConfigPath.str().str();
|
||||
R.first->second.Directory = Ancestor.str();
|
||||
It = Cache.try_emplace(Ancestor, ConfigPath.str(), Ancestor).first;
|
||||
}
|
||||
Caches.push_back(&R.first->second);
|
||||
Caches.push_back(&It->second);
|
||||
}
|
||||
}
|
||||
// Finally query each individual file.
|
||||
// This will take a (per-file) lock for each file that actually exists.
|
||||
std::vector<CompiledFragment> Result;
|
||||
for (FileConfigCache *Cache : Caches) {
|
||||
Cache->read(FS, DC, P.FreshTime, Result);
|
||||
}
|
||||
for (FileConfigCache *Cache : Caches)
|
||||
Cache->get(FS, DC, P.FreshTime, Result);
|
||||
return Result;
|
||||
};
|
||||
|
||||
|
|
|
@ -38,8 +38,9 @@ struct Params {
|
|||
llvm::StringRef Path;
|
||||
/// Hint that stale data is OK to improve performance (e.g. avoid IO).
|
||||
/// FreshTime sets a bound for how old the data can be.
|
||||
/// If not set, providers should validate caches against the data source.
|
||||
llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
|
||||
/// By default, providers should validate caches against the data source.
|
||||
std::chrono::steady_clock::time_point FreshTime =
|
||||
std::chrono::steady_clock::time_point::max();
|
||||
};
|
||||
|
||||
/// Used to report problems in parsing or interpreting a config.
|
||||
|
|
|
@ -19,6 +19,7 @@ include_directories(..)
|
|||
add_clang_library(clangdSupport
|
||||
Cancellation.cpp
|
||||
Context.cpp
|
||||
FileCache.cpp
|
||||
Logger.cpp
|
||||
Markup.cpp
|
||||
MemoryTree.cpp
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
//===--- FileCache.cpp ----------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "support/FileCache.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
// Sentinel values for the Size cache key. In both cases, a successful stat of
|
||||
// the file will never result in the cached value being reused.
|
||||
|
||||
// The cached value does not reflect the current content on disk.
|
||||
static constexpr uint64_t CacheDiskMismatch =
|
||||
std::numeric_limits<uint64_t>::max();
|
||||
// The cached value reflects that the file doesn't exist.
|
||||
static constexpr uint64_t FileNotFound = CacheDiskMismatch - 1;
|
||||
|
||||
FileCache::FileCache(llvm::StringRef Path)
|
||||
: Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()),
|
||||
ModifiedTime(), Size(CacheDiskMismatch) {
|
||||
assert(llvm::sys::path::is_absolute(Path));
|
||||
}
|
||||
|
||||
void FileCache::read(
|
||||
const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime,
|
||||
llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
|
||||
llvm::function_ref<void()> Read) const {
|
||||
|
||||
std::lock_guard<std::mutex> Lock(Mu);
|
||||
// We're going to update the cache and return whatever's in it.
|
||||
auto Return = llvm::make_scope_exit(Read);
|
||||
|
||||
// Return any sufficiently recent result without doing any further work.
|
||||
if (ValidTime > FreshTime)
|
||||
return;
|
||||
|
||||
// Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on
|
||||
// how often we do IO.
|
||||
auto BumpValidTime = llvm::make_scope_exit(
|
||||
[&] { ValidTime = std::chrono::steady_clock::now(); });
|
||||
|
||||
// stat is cheaper than opening the file. It's usually unchanged.
|
||||
assert(llvm::sys::path::is_absolute(Path));
|
||||
auto FS = TFS.view(/*CWD=*/llvm::None);
|
||||
auto Stat = FS->status(Path);
|
||||
if (!Stat || !Stat->isRegularFile()) {
|
||||
if (Size != FileNotFound) // Allow "not found" value to be cached.
|
||||
Parse(llvm::None);
|
||||
// Ensure the cache key won't match any future stat().
|
||||
Size = FileNotFound;
|
||||
return;
|
||||
}
|
||||
// If the modified-time and size match, assume the content does too.
|
||||
if (Size == Stat->getSize() &&
|
||||
ModifiedTime == Stat->getLastModificationTime())
|
||||
return;
|
||||
|
||||
// OK, the file has actually changed. Update cache key, compute new value.
|
||||
Size = Stat->getSize();
|
||||
ModifiedTime = Stat->getLastModificationTime();
|
||||
// Now read the file from disk.
|
||||
if (auto Buf = FS->getBufferForFile(Path)) {
|
||||
Parse(Buf->get()->getBuffer());
|
||||
// Result is cacheable if the actual read size matches the new cache key.
|
||||
// (We can't update the cache key, because we don't know the new mtime).
|
||||
if (Buf->get()->getBufferSize() != Size)
|
||||
Size = CacheDiskMismatch;
|
||||
} else {
|
||||
// File was unreadable. Keep the old value and try again next time.
|
||||
Size = CacheDiskMismatch;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
|
@ -0,0 +1,81 @@
|
|||
//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
|
||||
|
||||
#include "Path.h"
|
||||
#include "ThreadsafeFS.h"
|
||||
#include "llvm/ADT/ScopeExit.h"
|
||||
#include "llvm/Support/Chrono.h"
|
||||
#include "llvm/Support/VirtualFileSystem.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
/// Base class for threadsafe cache of data read from a file on disk.
|
||||
///
|
||||
/// We want configuration files to be "live" as much as possible.
|
||||
/// Reading them every time is simplest, but caching solves a few problems:
|
||||
/// - reading and parsing is cheap but not free (and happens on hot paths)
|
||||
/// - we can ignore invalid data and use the old value (we may see truncated
|
||||
/// compile_commands.json from non-atomic writers)
|
||||
/// - we avoid reporting the same errors repeatedly
|
||||
///
|
||||
/// We still read and parse the data synchronously on demand, but skip as much
|
||||
/// work as possible:
|
||||
/// - if not enough wall-time has elapsed, assume the data is still up-to-date
|
||||
/// - if we stat the file and it has the same mtime + size, don't read it
|
||||
/// - obviously we only have to parse when we re-read the file
|
||||
/// (Tracking OS change events is an alternative, but difficult to do portably.)
|
||||
///
|
||||
/// Caches for particular data (e.g. compilation databases) should inherit and:
|
||||
/// - add mutable storage for the cached parsed data
|
||||
/// - add a public interface implemented on top of read()
|
||||
class FileCache {
|
||||
protected:
|
||||
// Path must be absolute.
|
||||
FileCache(PathRef Path);
|
||||
|
||||
// Updates the cached value if needed, then provides threadsafe access to it.
|
||||
//
|
||||
// Specifically:
|
||||
// - Parse() may be called (if the cache was not up-to-date)
|
||||
// The lock is held, so cache storage may be safely written.
|
||||
// Parse(None) means the file doesn't exist.
|
||||
// - Read() will always be called, to provide access to the value.
|
||||
// The lock is again held, so the value can be copied or used.
|
||||
//
|
||||
// If the last Parse is newer than FreshTime, we don't check metadata.
|
||||
// - time_point::min() means we only do IO if we never read the file before
|
||||
// - time_point::max() means we always at least stat the file
|
||||
// - steady_clock::now() + seconds(1) means we accept 1 second of staleness
|
||||
void read(const ThreadsafeFS &TFS,
|
||||
std::chrono::steady_clock::time_point FreshTime,
|
||||
llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
|
||||
llvm::function_ref<void()> Read) const;
|
||||
|
||||
PathRef path() const { return Path; }
|
||||
|
||||
private:
|
||||
std::string Path;
|
||||
// Members are mutable so read() can present a const interface.
|
||||
// (It is threadsafe and approximates read-through to TFS).
|
||||
mutable std::mutex Mu;
|
||||
// Time when the cache was known valid (reflected disk state).
|
||||
mutable std::chrono::steady_clock::time_point ValidTime;
|
||||
// Filesystem metadata corresponding to the currently cached data.
|
||||
mutable std::chrono::system_clock::time_point ModifiedTime;
|
||||
mutable uint64_t Size;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
|
@ -151,6 +151,7 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) {
|
|||
EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
|
||||
}
|
||||
|
||||
// FIXME: delete this test, it's covered by FileCacheTests.
|
||||
TEST(ProviderTest, Staleness) {
|
||||
MockFS FS;
|
||||
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
//===-- FileCacheTests.cpp ------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "support/FileCache.h"
|
||||
|
||||
#include "TestFS.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace config {
|
||||
namespace {
|
||||
|
||||
class TestCache : public FileCache {
|
||||
MockFS FS;
|
||||
mutable std::string Value;
|
||||
|
||||
public:
|
||||
TestCache() : FileCache(testPath("foo.cc")) {}
|
||||
|
||||
void setContents(const char *C) {
|
||||
if (C)
|
||||
FS.Files[testPath("foo.cc")] = C;
|
||||
else
|
||||
FS.Files.erase(testPath("foo.cc"));
|
||||
}
|
||||
|
||||
std::string get(std::chrono::steady_clock::time_point FreshTime,
|
||||
bool ExpectParse) const {
|
||||
bool GotParse = false;
|
||||
bool GotRead;
|
||||
std::string Result;
|
||||
read(
|
||||
FS, FreshTime,
|
||||
[&](llvm::Optional<llvm::StringRef> Data) {
|
||||
GotParse = true;
|
||||
Value = Data.getValueOr("").str();
|
||||
},
|
||||
[&]() {
|
||||
GotRead = true;
|
||||
Result = Value;
|
||||
});
|
||||
EXPECT_EQ(GotParse, ExpectParse);
|
||||
EXPECT_TRUE(GotRead);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(FileCacheTest, Invalidation) {
|
||||
TestCache C;
|
||||
|
||||
auto StaleOK = std::chrono::steady_clock::now();
|
||||
auto MustBeFresh = StaleOK + std::chrono::hours(1);
|
||||
|
||||
C.setContents("a");
|
||||
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time";
|
||||
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
|
||||
EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)";
|
||||
C.setContents("bb");
|
||||
EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
|
||||
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
|
||||
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)";
|
||||
C.setContents(nullptr);
|
||||
EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
|
||||
EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed";
|
||||
EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)";
|
||||
C.setContents("bb"); // Match the previous stat values!
|
||||
EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
|
||||
EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace config
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
Loading…
Reference in New Issue