[clangd] Allow to merge symbols on-the-fly in global-symbol-builder

Summary:
The new mode avoids serializing and deserializing YAML.
This results in better performance and less memory usage. Reduce phase
is now almost instant.

The default is to use the old mode going through YAML serialization to
allow migrating MapReduce clients that require the old mode to operate
properly. After we migrate the clients, we can switch the default to
the new mode.

Reviewers: hokein, ioeric, kbobyrev, sammccall

Reviewed By: ioeric

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D51155

llvm-svn: 340600
This commit is contained in:
Ilya Biryukov 2018-08-24 09:03:54 +00:00
parent dbdd8e5238
commit e9fe4db876
1 changed files with 108 additions and 35 deletions

View File

@ -49,9 +49,33 @@ static llvm::cl::opt<std::string> AssumedHeaderDir(
"not given, such headers will have relative paths."),
llvm::cl::init(""));
static llvm::cl::opt<bool> MergeOnTheFly(
"merge-on-the-fly",
llvm::cl::desc(
"Merges symbols for each processed translation unit as soon "
"they become available. This results in a smaller memory "
"usage and an almost instant reduce stage. Optimal for running as a "
"standalone tool, but cannot be used with multi-process executors like "
"MapReduce."),
llvm::cl::init(true), llvm::cl::Hidden);
/// Responsible for aggregating symbols from each processed file and producing
/// the final results. All methods in this class must be thread-safe,
/// 'consumeSymbols' may be called from multiple threads.
class SymbolsConsumer {
public:
virtual ~SymbolsConsumer() = default;
/// Consume a SymbolSlab build for a file.
virtual void consumeSymbols(SymbolSlab Symbols) = 0;
/// Produce a resulting symbol slab, by combining occurrences of the same
/// symbols across translation units.
virtual SymbolSlab mergeResults() = 0;
};
class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
public:
SymbolIndexActionFactory(tooling::ExecutionContext *Ctx) : Ctx(Ctx) {}
SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
clang::FrontendAction *create() override {
// Wraps the index action and reports collected symbols to the execution
@ -61,10 +85,10 @@ public:
WrappedIndexAction(std::shared_ptr<SymbolCollector> C,
std::unique_ptr<CanonicalIncludes> Includes,
const index::IndexingOptions &Opts,
tooling::ExecutionContext *Ctx)
SymbolsConsumer &Consumer)
: WrapperFrontendAction(
index::createIndexingAction(C, Opts, nullptr)),
Ctx(Ctx), Collector(C), Includes(std::move(Includes)),
Consumer(Consumer), Collector(C), Includes(std::move(Includes)),
PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
std::unique_ptr<ASTConsumer>
@ -91,14 +115,11 @@ public:
return;
}
auto Symbols = Collector->takeSymbols();
for (const auto &Sym : Symbols) {
Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym));
}
Consumer.consumeSymbols(Collector->takeSymbols());
}
private:
tooling::ExecutionContext *Ctx;
SymbolsConsumer &Consumer;
std::shared_ptr<SymbolCollector> Collector;
std::unique_ptr<CanonicalIncludes> Includes;
std::unique_ptr<CommentHandler> PragmaHandler;
@ -118,30 +139,72 @@ public:
CollectorOpts.Includes = Includes.get();
return new WrappedIndexAction(
std::make_shared<SymbolCollector>(std::move(CollectorOpts)),
std::move(Includes), IndexOpts, Ctx);
std::move(Includes), IndexOpts, Consumer);
}
tooling::ExecutionContext *Ctx;
SymbolsConsumer &Consumer;
};
// Combine occurrences of the same symbol across translation units.
SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
SymbolSlab::Builder UniqueSymbols;
llvm::BumpPtrAllocator Arena;
Symbol::Details Scratch;
Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
Arena.Reset();
llvm::yaml::Input Yin(Value, &Arena);
auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
clang::clangd::SymbolID ID;
Key >> ID;
if (const auto *Existing = UniqueSymbols.find(ID))
UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
else
UniqueSymbols.insert(Sym);
});
return std::move(UniqueSymbols).build();
}
/// Stashes per-file results inside ExecutionContext, merges all of them at the
/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
/// from multiple files in memory.
class ToolExecutorConsumer : public SymbolsConsumer {
public:
ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
void consumeSymbols(SymbolSlab Symbols) override {
for (const auto &Sym : Symbols)
Executor.getExecutionContext()->reportResult(Sym.ID.str(),
SymbolToYAML(Sym));
}
SymbolSlab mergeResults() override {
SymbolSlab::Builder UniqueSymbols;
llvm::BumpPtrAllocator Arena;
Symbol::Details Scratch;
Executor.getToolResults()->forEachResult(
[&](llvm::StringRef Key, llvm::StringRef Value) {
Arena.Reset();
llvm::yaml::Input Yin(Value, &Arena);
auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
clang::clangd::SymbolID ID;
Key >> ID;
if (const auto *Existing = UniqueSymbols.find(ID))
UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
else
UniqueSymbols.insert(Sym);
});
return std::move(UniqueSymbols).build();
}
private:
ToolExecutor &Executor;
};
/// Merges symbols for each translation unit as soon as the file is processed.
/// Optimal choice for standalone tools.
class OnTheFlyConsumer : public SymbolsConsumer {
public:
void consumeSymbols(SymbolSlab Symbols) override {
std::lock_guard<std::mutex> Lock(Mut);
for (auto &&Sym : Symbols) {
Symbol::Details Scratch;
if (const auto *Existing = Result.find(Sym.ID))
Result.insert(mergeSymbol(*Existing, Sym, &Scratch));
else
Result.insert(Sym);
}
}
SymbolSlab mergeResults() override {
std::lock_guard<std::mutex> Lock(Mut);
return std::move(Result).build();
}
private:
std::mutex Mut;
SymbolSlab::Builder Result;
};
} // namespace
} // namespace clangd
@ -181,18 +244,28 @@ int main(int argc, const char **argv) {
return 1;
}
if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
llvm::errs()
<< "Found multi-process executor, forcing the use of intermediate YAML "
"serialization instead of the on-the-fly merge.\n";
clang::clangd::MergeOnTheFly = false;
}
std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
if (clang::clangd::MergeOnTheFly)
Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
else
Consumer =
llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
// Map phase: emit symbols found in each translation unit.
auto Err = Executor->get()->execute(
llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
Executor->get()->getExecutionContext()));
llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
}
// Reduce phase: combine symbols using the ID as a key.
auto UniqueSymbols =
clang::clangd::mergeSymbols(Executor->get()->getToolResults());
// Reduce phase: combine symbols with the same IDs.
auto UniqueSymbols = Consumer->mergeResults();
// Output phase: emit YAML for result symbols.
SymbolsToYAML(UniqueSymbols, llvm::outs());
return 0;