2018-09-12 15:49:44 +08:00
|
|
|
//===--- IndexBenchmark.cpp - Clangd index benchmarks -----------*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-09-12 15:49:44 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
[clangd] Merge binary + YAML serialization behind a (mostly) common interface.
Summary:
Interface is in one file, implementation in two as they have little in common.
A couple of ad-hoc YAML functions left exposed:
- symbol -> YAML I expect to keep for tools like dexp
- YAML -> symbol is used for the MR-style indexer, I think we can eliminate
this (merge-on-the-fly, else use a different serialization)
Reviewers: kbobyrev
Subscribers: mgorny, ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D52453
llvm-svn: 342999
2018-09-26 02:06:43 +08:00
|
|
|
#include "../index/Serialization.h"
|
2018-09-12 15:49:44 +08:00
|
|
|
#include "../index/dex/Dex.h"
|
|
|
|
#include "benchmark/benchmark.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/Support/Path.h"
|
|
|
|
#include "llvm/Support/Regex.h"
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
const char *IndexFilename;
|
2018-09-13 22:21:50 +08:00
|
|
|
const char *RequestsFilename;
|
2018-09-12 15:49:44 +08:00
|
|
|
|
|
|
|
namespace clang {
|
|
|
|
namespace clangd {
|
|
|
|
namespace {
|
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
std::unique_ptr<SymbolIndex> buildMem() {
|
2022-01-13 20:42:01 +08:00
|
|
|
return loadIndex(IndexFilename, clang::clangd::SymbolOrigin::Static,
|
|
|
|
/*UseDex=*/false);
|
2018-09-12 15:49:44 +08:00
|
|
|
}
|
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
std::unique_ptr<SymbolIndex> buildDex() {
|
2022-01-13 20:42:01 +08:00
|
|
|
return loadIndex(IndexFilename, clang::clangd::SymbolOrigin::Static,
|
|
|
|
/*UseDex=*/true);
|
2018-09-12 15:49:44 +08:00
|
|
|
}
|
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
// Reads JSON array of serialized FuzzyFindRequest's from user-provided file.
|
|
|
|
std::vector<FuzzyFindRequest> extractQueriesFromLogs() {
|
2021-03-03 05:57:16 +08:00
|
|
|
|
|
|
|
auto Buffer = llvm::MemoryBuffer::getFile(RequestsFilename);
|
|
|
|
if (!Buffer) {
|
|
|
|
llvm::errs() << "Error cannot open JSON request file:" << RequestsFilename
|
|
|
|
<< ": " << Buffer.getError().message() << "\n";
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef Log = Buffer.get()->getBuffer();
|
2018-09-12 15:49:44 +08:00
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
std::vector<FuzzyFindRequest> Requests;
|
2019-01-07 23:45:19 +08:00
|
|
|
auto JSONArray = llvm::json::parse(Log);
|
2018-09-12 15:49:44 +08:00
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
// Panic if the provided file couldn't be parsed.
|
|
|
|
if (!JSONArray) {
|
2019-01-07 23:45:19 +08:00
|
|
|
llvm::errs() << "Error when parsing JSON requests file: "
|
|
|
|
<< llvm::toString(JSONArray.takeError());
|
2018-09-13 22:21:50 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (!JSONArray->getAsArray()) {
|
2019-01-07 23:45:19 +08:00
|
|
|
llvm::errs() << "Error: top-level value is not a JSON array: " << Log
|
|
|
|
<< '\n';
|
2018-09-13 22:21:50 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
2018-09-12 15:49:44 +08:00
|
|
|
|
2018-09-13 22:21:50 +08:00
|
|
|
for (const auto &Item : *JSONArray->getAsArray()) {
|
|
|
|
FuzzyFindRequest Request;
|
|
|
|
// Panic if the provided file couldn't be parsed.
|
[JSON] Add error reporting to fromJSON and ObjectMapper
Translating between JSON objects and C++ strutctures is common.
From experience in clangd, fromJSON/ObjectMapper work well and save a lot of
code, but aren't adopted elsewhere at least partly due to total lack of error
reporting beyond "ok"/"bad".
The recently-added error model should be rich enough for most applications.
It requires tracking the path within the root object and reporting local
errors at appropriate places.
To do this, we exploit the fact that the call graph of recursive
parse functions mirror the structure of the JSON itself.
The current path is represented as a linked list of segments, each of which is
on the stack as a parameter. Concretely, fromJSON now looks like:
bool fromJSON(const Value&, T&, Path);
Beyond the signature change, this is reasonably unobtrusive: building
the path segments is mostly handled by ObjectMapper and the vector<T> fromJSON.
However the root caller of fromJSON must now create a Root object to
store the errors, which is a little clunky.
I've added high-level parse<T>(StringRef) -> Expected<T>, but it's not
general enough to be the primary interface I think (at least, not usable in
clangd).
All existing users (mostly just clangd) are updated in this patch,
making this change backwards-compatible is a bit hairy.
Differential Revision: https://reviews.llvm.org/D88103
2020-09-24 07:14:12 +08:00
|
|
|
llvm::json::Path::Root Root("FuzzyFindRequest");
|
|
|
|
if (!fromJSON(Item, Request, Root)) {
|
|
|
|
llvm::errs() << llvm::toString(Root.getError()) << "\n";
|
|
|
|
Root.printErrorContext(Item, llvm::errs());
|
2018-09-13 22:21:50 +08:00
|
|
|
exit(1);
|
2018-09-12 15:49:44 +08:00
|
|
|
}
|
2018-09-13 22:21:50 +08:00
|
|
|
Requests.push_back(Request);
|
2018-09-12 15:49:44 +08:00
|
|
|
}
|
2018-09-13 22:21:50 +08:00
|
|
|
return Requests;
|
2018-09-12 15:49:44 +08:00
|
|
|
}
|
|
|
|
|
2022-02-01 18:14:07 +08:00
|
|
|
static void memQueries(benchmark::State &State) {
|
2018-09-12 15:49:44 +08:00
|
|
|
const auto Mem = buildMem();
|
|
|
|
const auto Requests = extractQueriesFromLogs();
|
|
|
|
for (auto _ : State)
|
|
|
|
for (const auto &Request : Requests)
|
|
|
|
Mem->fuzzyFind(Request, [](const Symbol &S) {});
|
|
|
|
}
|
2022-02-01 18:14:07 +08:00
|
|
|
BENCHMARK(memQueries);
|
2018-09-12 15:49:44 +08:00
|
|
|
|
2022-02-01 18:14:07 +08:00
|
|
|
static void dexQueries(benchmark::State &State) {
|
2018-09-12 15:49:44 +08:00
|
|
|
const auto Dex = buildDex();
|
|
|
|
const auto Requests = extractQueriesFromLogs();
|
|
|
|
for (auto _ : State)
|
|
|
|
for (const auto &Request : Requests)
|
|
|
|
Dex->fuzzyFind(Request, [](const Symbol &S) {});
|
|
|
|
}
|
2022-02-01 18:14:07 +08:00
|
|
|
BENCHMARK(dexQueries);
|
2018-09-12 15:49:44 +08:00
|
|
|
|
2022-02-01 18:14:07 +08:00
|
|
|
static void dexBuild(benchmark::State &State) {
|
2020-05-14 08:43:27 +08:00
|
|
|
for (auto _ : State)
|
|
|
|
buildDex();
|
|
|
|
}
|
2022-02-01 18:14:07 +08:00
|
|
|
BENCHMARK(dexBuild);
|
2020-05-14 08:43:27 +08:00
|
|
|
|
2018-09-12 15:49:44 +08:00
|
|
|
} // namespace
|
|
|
|
} // namespace clangd
|
|
|
|
} // namespace clang
|
|
|
|
|
|
|
|
// FIXME(kbobyrev): Add index building time benchmarks.
|
|
|
|
// FIXME(kbobyrev): Add memory consumption "benchmarks" by manually measuring
|
|
|
|
// in-memory index size and reporting it as time.
|
|
|
|
// FIXME(kbobyrev): Create a logger wrapper to suppress debugging info printer.
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
if (argc < 3) {
|
2019-01-07 23:45:19 +08:00
|
|
|
llvm::errs() << "Usage: " << argv[0]
|
2021-07-21 20:50:40 +08:00
|
|
|
<< " global-symbol-index.dex requests.json "
|
2019-01-07 23:45:19 +08:00
|
|
|
"BENCHMARK_OPTIONS...\n";
|
2018-09-12 15:49:44 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
IndexFilename = argv[1];
|
2018-09-13 22:21:50 +08:00
|
|
|
RequestsFilename = argv[2];
|
2018-09-14 20:21:09 +08:00
|
|
|
// Trim first two arguments of the benchmark invocation and pretend no
|
|
|
|
// arguments were passed in the first place.
|
|
|
|
argv[2] = argv[0];
|
|
|
|
argv += 2;
|
|
|
|
argc -= 2;
|
2018-09-12 15:49:44 +08:00
|
|
|
::benchmark::Initialize(&argc, argv);
|
|
|
|
::benchmark::RunSpecifiedBenchmarks();
|
|
|
|
}
|