2018-09-14 19:39:05 +08:00
|
|
|
//===--- IndexerMain.cpp -----------------------------------------*- C++-*-===//
|
2017-12-22 22:38:05 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-12-22 22:38:05 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2018-10-05 17:05:28 +08:00
|
|
|
// clangd-indexer is a tool to gather index data (symbols, xrefs) from source.
|
2017-12-22 22:38:05 +08:00
|
|
|
//
|
2018-08-15 00:03:32 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2017-12-22 22:38:05 +08:00
|
|
|
|
2018-09-26 04:02:36 +08:00
|
|
|
#include "index/IndexAction.h"
|
2018-02-09 22:42:01 +08:00
|
|
|
#include "index/Merge.h"
|
2019-02-28 21:49:25 +08:00
|
|
|
#include "index/Ref.h"
|
[clangd] Define a compact binary serialization fomat for symbol slab/index.
Summary:
This is intended to replace the current YAML format for general use.
It's ~10x more compact than YAML, and ~40% more compact than gzipped YAML:
llvmidx.riff = 20M, llvmidx.yaml = 272M, llvmidx.yaml.gz = 32M
It's also simpler/faster to read and write.
The format is a RIFF container (chunks of (type, size, data)) with:
- a compressed string table
- simple binary encoding of symbols (with varints for compactness)
It can be extended to include occurrences, Dex posting lists, etc.
There's no rich backwards-compatibility scheme, but a version number is included
so we can detect incompatible files and do ad-hoc back-compat.
Alternatives considered:
- compressed YAML or JSON: bulky and slow to load
- llvm bitstream: confusing model and libraries are hard to use. My attempt
produced slightly larger files, and the code was longer and slower.
- protobuf or similar: would be really nice (esp for back-compat) but the
dependency is a big hassle
- ad-hoc binary format without a container: it seems clear we're going
to add posting lists and occurrences here, and that they will benefit
from sharing a string table. The container makes it easy to debug
these pieces in isolation, and make them optional.
Reviewers: ioeric
Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D51585
llvm-svn: 341375
2018-09-05 00:16:50 +08:00
|
|
|
#include "index/Serialization.h"
|
2019-02-28 21:49:25 +08:00
|
|
|
#include "index/Symbol.h"
|
2017-12-22 22:38:05 +08:00
|
|
|
#include "index/SymbolCollector.h"
|
2020-07-29 07:40:46 +08:00
|
|
|
#include "support/Logger.h"
|
2019-03-07 22:47:17 +08:00
|
|
|
#include "clang/Tooling/ArgumentsAdjusters.h"
|
2017-12-22 22:38:05 +08:00
|
|
|
#include "clang/Tooling/CommonOptionsParser.h"
|
2018-01-09 23:21:45 +08:00
|
|
|
#include "clang/Tooling/Execution.h"
|
2017-12-22 22:38:05 +08:00
|
|
|
#include "clang/Tooling/Tooling.h"
|
2018-01-09 23:21:45 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Signals.h"
|
2017-12-22 22:38:05 +08:00
|
|
|
|
|
|
|
namespace clang {
|
|
|
|
namespace clangd {
|
2018-02-09 22:42:01 +08:00
|
|
|
namespace {
|
2017-12-22 22:38:05 +08:00
|
|
|
|
2019-01-07 23:45:19 +08:00
|
|
|
static llvm::cl::opt<IndexFileFormat>
|
|
|
|
Format("format", llvm::cl::desc("Format of the index to be written"),
|
|
|
|
llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
|
|
|
|
"human-readable YAML format"),
|
|
|
|
clEnumValN(IndexFileFormat::RIFF, "binary",
|
|
|
|
"binary RIFF format")),
|
|
|
|
llvm::cl::init(IndexFileFormat::RIFF));
|
[clangd] Define a compact binary serialization fomat for symbol slab/index.
Summary:
This is intended to replace the current YAML format for general use.
It's ~10x more compact than YAML, and ~40% more compact than gzipped YAML:
llvmidx.riff = 20M, llvmidx.yaml = 272M, llvmidx.yaml.gz = 32M
It's also simpler/faster to read and write.
The format is a RIFF container (chunks of (type, size, data)) with:
- a compressed string table
- simple binary encoding of symbols (with varints for compactness)
It can be extended to include occurrences, Dex posting lists, etc.
There's no rich backwards-compatibility scheme, but a version number is included
so we can detect incompatible files and do ad-hoc back-compat.
Alternatives considered:
- compressed YAML or JSON: bulky and slow to load
- llvm bitstream: confusing model and libraries are hard to use. My attempt
produced slightly larger files, and the code was longer and slower.
- protobuf or similar: would be really nice (esp for back-compat) but the
dependency is a big hassle
- ad-hoc binary format without a container: it seems clear we're going
to add posting lists and occurrences here, and that they will benefit
from sharing a string table. The container makes it easy to debug
these pieces in isolation, and make them optional.
Reviewers: ioeric
Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D51585
llvm-svn: 341375
2018-09-05 00:16:50 +08:00
|
|
|
|
2018-10-04 16:30:03 +08:00
|
|
|
class IndexActionFactory : public tooling::FrontendActionFactory {
|
2017-12-22 22:38:05 +08:00
|
|
|
public:
|
2018-10-04 16:30:03 +08:00
|
|
|
IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
|
2017-12-22 22:38:05 +08:00
|
|
|
|
2019-08-30 00:38:36 +08:00
|
|
|
std::unique_ptr<FrontendAction> create() override {
|
2018-10-04 16:30:03 +08:00
|
|
|
SymbolCollector::Options Opts;
|
2019-05-09 22:22:07 +08:00
|
|
|
Opts.CountReferences = true;
|
2020-11-11 19:29:03 +08:00
|
|
|
Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
|
|
|
|
const auto *F = SM.getFileEntryForID(FID);
|
|
|
|
if (!F)
|
|
|
|
return false; // Skip invalid files.
|
|
|
|
auto AbsPath = getCanonicalPath(F, SM);
|
|
|
|
if (!AbsPath)
|
|
|
|
return false; // Skip files without absolute path.
|
|
|
|
std::lock_guard<std::mutex> Lock(FilesMu);
|
|
|
|
return Files.insert(*AbsPath).second; // Skip already processed files.
|
|
|
|
};
|
2018-09-26 04:02:36 +08:00
|
|
|
return createStaticIndexingAction(
|
2019-08-30 00:38:36 +08:00
|
|
|
Opts,
|
|
|
|
[&](SymbolSlab S) {
|
|
|
|
// Merge as we go.
|
|
|
|
std::lock_guard<std::mutex> Lock(SymbolsMu);
|
|
|
|
for (const auto &Sym : S) {
|
|
|
|
if (const auto *Existing = Symbols.find(Sym.ID))
|
|
|
|
Symbols.insert(mergeSymbol(*Existing, Sym));
|
|
|
|
else
|
|
|
|
Symbols.insert(Sym);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
[&](RefSlab S) {
|
2020-11-11 19:29:03 +08:00
|
|
|
std::lock_guard<std::mutex> Lock(RefsMu);
|
2019-08-30 00:38:36 +08:00
|
|
|
for (const auto &Sym : S) {
|
|
|
|
// Deduplication happens during insertion.
|
|
|
|
for (const auto &Ref : Sym.second)
|
|
|
|
Refs.insert(Sym.first, Ref);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
[&](RelationSlab S) {
|
2020-11-11 19:29:03 +08:00
|
|
|
std::lock_guard<std::mutex> Lock(RelsMu);
|
2019-08-30 00:38:36 +08:00
|
|
|
for (const auto &R : S) {
|
|
|
|
Relations.insert(R);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
/*IncludeGraphCallback=*/nullptr);
|
2017-12-22 22:38:05 +08:00
|
|
|
}
|
|
|
|
|
2018-10-04 16:30:03 +08:00
|
|
|
// Awkward: we write the result in the destructor, because the executor
|
|
|
|
// takes ownership so it's the easiest way to get our data back out.
|
2018-10-04 22:09:55 +08:00
|
|
|
~IndexActionFactory() {
|
|
|
|
Result.Symbols = std::move(Symbols).build();
|
|
|
|
Result.Refs = std::move(Refs).build();
|
2019-06-15 10:26:47 +08:00
|
|
|
Result.Relations = std::move(Relations).build();
|
2018-10-04 22:09:55 +08:00
|
|
|
}
|
2018-08-24 17:03:54 +08:00
|
|
|
|
|
|
|
private:
|
2018-10-04 16:30:03 +08:00
|
|
|
IndexFileIn &Result;
|
2020-11-11 19:29:03 +08:00
|
|
|
std::mutex FilesMu;
|
|
|
|
llvm::StringSet<> Files;
|
2018-10-04 16:30:03 +08:00
|
|
|
std::mutex SymbolsMu;
|
|
|
|
SymbolSlab::Builder Symbols;
|
2020-11-11 19:29:03 +08:00
|
|
|
std::mutex RefsMu;
|
2018-10-04 22:09:55 +08:00
|
|
|
RefSlab::Builder Refs;
|
2020-11-11 19:29:03 +08:00
|
|
|
std::mutex RelsMu;
|
2019-06-15 10:26:47 +08:00
|
|
|
RelationSlab::Builder Relations;
|
2018-08-24 17:03:54 +08:00
|
|
|
};
|
2018-02-09 22:42:01 +08:00
|
|
|
|
|
|
|
} // namespace
|
2017-12-22 22:38:05 +08:00
|
|
|
} // namespace clangd
|
|
|
|
} // namespace clang
|
|
|
|
|
|
|
|
int main(int argc, const char **argv) {
|
2019-11-26 10:06:56 +08:00
|
|
|
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
|
2017-12-22 22:38:05 +08:00
|
|
|
|
2018-07-26 17:41:24 +08:00
|
|
|
const char *Overview = R"(
|
2018-10-04 16:30:03 +08:00
|
|
|
Creates an index of symbol information etc in a whole project.
|
2018-07-26 17:41:24 +08:00
|
|
|
|
2018-10-04 16:30:03 +08:00
|
|
|
Example usage for a project using CMake compile commands:
|
2018-07-26 17:41:24 +08:00
|
|
|
|
2018-10-05 17:05:28 +08:00
|
|
|
$ clangd-indexer --executor=all-TUs compile_commands.json > clangd.dex
|
2018-07-26 17:41:24 +08:00
|
|
|
|
|
|
|
Example usage for file sequence index without flags:
|
|
|
|
|
2018-10-05 17:05:28 +08:00
|
|
|
$ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
|
2018-07-26 17:41:24 +08:00
|
|
|
|
2018-10-04 16:30:03 +08:00
|
|
|
Note: only symbols from header files will be indexed.
|
2018-07-26 17:41:24 +08:00
|
|
|
)";
|
|
|
|
|
2018-01-09 23:21:45 +08:00
|
|
|
auto Executor = clang::tooling::createExecutorFromCommandLineArgs(
|
2019-07-11 16:54:28 +08:00
|
|
|
argc, argv, llvm::cl::GeneralCategory, Overview);
|
2018-01-09 23:21:45 +08:00
|
|
|
|
|
|
|
if (!Executor) {
|
2019-01-07 23:45:19 +08:00
|
|
|
llvm::errs() << llvm::toString(Executor.takeError()) << "\n";
|
2018-01-09 23:21:45 +08:00
|
|
|
return 1;
|
2017-12-22 22:38:05 +08:00
|
|
|
}
|
|
|
|
|
2018-10-04 16:30:03 +08:00
|
|
|
// Collect symbols found in each translation unit, merging as we go.
|
|
|
|
clang::clangd::IndexFileIn Data;
|
2018-01-09 23:21:45 +08:00
|
|
|
auto Err = Executor->get()->execute(
|
2019-08-15 07:52:23 +08:00
|
|
|
std::make_unique<clang::clangd::IndexActionFactory>(Data),
|
2019-03-07 22:47:17 +08:00
|
|
|
clang::tooling::getStripPluginsAdjuster());
|
2018-01-09 23:21:45 +08:00
|
|
|
if (Err) {
|
2020-07-29 07:40:46 +08:00
|
|
|
clang::clangd::elog("{0}", std::move(Err));
|
2017-12-22 22:38:05 +08:00
|
|
|
}
|
2018-10-04 16:30:03 +08:00
|
|
|
|
|
|
|
// Emit collected data.
|
|
|
|
clang::clangd::IndexFileOut Out(Data);
|
[clangd] Merge binary + YAML serialization behind a (mostly) common interface.
Summary:
Interface is in one file, implementation in two as they have little in common.
A couple of ad-hoc YAML functions left exposed:
- symbol -> YAML I expect to keep for tools like dexp
- YAML -> symbol is used for the MR-style indexer, I think we can eliminate
this (merge-on-the-fly, else use a different serialization)
Reviewers: kbobyrev
Subscribers: mgorny, ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D52453
llvm-svn: 342999
2018-09-26 02:06:43 +08:00
|
|
|
Out.Format = clang::clangd::Format;
|
2019-01-07 23:45:19 +08:00
|
|
|
llvm::outs() << Out;
|
2017-12-22 22:38:05 +08:00
|
|
|
return 0;
|
|
|
|
}
|