forked from OSchip/llvm-project
[clangd] Allow to merge symbols on-the-fly in global-symbol-builder
Summary: The new mode avoids serializing and deserializing YAML. This results in better performance and less memory usage. Reduce phase is now almost instant. The default is to use the old mode going through YAML serialization to allow migrating MapReduce clients that require the old mode to operate properly. After we migrate the clients, we can switch the default to the new mode. Reviewers: hokein, ioeric, kbobyrev, sammccall Reviewed By: ioeric Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D51155 llvm-svn: 340600
This commit is contained in:
parent
dbdd8e5238
commit
e9fe4db876
|
@ -49,9 +49,33 @@ static llvm::cl::opt<std::string> AssumedHeaderDir(
|
|||
"not given, such headers will have relative paths."),
|
||||
llvm::cl::init(""));
|
||||
|
||||
static llvm::cl::opt<bool> MergeOnTheFly(
|
||||
"merge-on-the-fly",
|
||||
llvm::cl::desc(
|
||||
"Merges symbols for each processed translation unit as soon "
|
||||
"they become available. This results in a smaller memory "
|
||||
"usage and an almost instant reduce stage. Optimal for running as a "
|
||||
"standalone tool, but cannot be used with multi-process executors like "
|
||||
"MapReduce."),
|
||||
llvm::cl::init(true), llvm::cl::Hidden);
|
||||
|
||||
/// Responsible for aggregating symbols from each processed file and producing
|
||||
/// the final results. All methods in this class must be thread-safe,
|
||||
/// 'consumeSymbols' may be called from multiple threads.
|
||||
class SymbolsConsumer {
|
||||
public:
|
||||
virtual ~SymbolsConsumer() = default;
|
||||
|
||||
/// Consume a SymbolSlab build for a file.
|
||||
virtual void consumeSymbols(SymbolSlab Symbols) = 0;
|
||||
/// Produce a resulting symbol slab, by combining occurrences of the same
|
||||
/// symbols across translation units.
|
||||
virtual SymbolSlab mergeResults() = 0;
|
||||
};
|
||||
|
||||
class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
|
||||
public:
|
||||
SymbolIndexActionFactory(tooling::ExecutionContext *Ctx) : Ctx(Ctx) {}
|
||||
SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
|
||||
|
||||
clang::FrontendAction *create() override {
|
||||
// Wraps the index action and reports collected symbols to the execution
|
||||
|
@ -61,10 +85,10 @@ public:
|
|||
WrappedIndexAction(std::shared_ptr<SymbolCollector> C,
|
||||
std::unique_ptr<CanonicalIncludes> Includes,
|
||||
const index::IndexingOptions &Opts,
|
||||
tooling::ExecutionContext *Ctx)
|
||||
SymbolsConsumer &Consumer)
|
||||
: WrapperFrontendAction(
|
||||
index::createIndexingAction(C, Opts, nullptr)),
|
||||
Ctx(Ctx), Collector(C), Includes(std::move(Includes)),
|
||||
Consumer(Consumer), Collector(C), Includes(std::move(Includes)),
|
||||
PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
|
||||
|
||||
std::unique_ptr<ASTConsumer>
|
||||
|
@ -91,14 +115,11 @@ public:
|
|||
return;
|
||||
}
|
||||
|
||||
auto Symbols = Collector->takeSymbols();
|
||||
for (const auto &Sym : Symbols) {
|
||||
Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym));
|
||||
}
|
||||
Consumer.consumeSymbols(Collector->takeSymbols());
|
||||
}
|
||||
|
||||
private:
|
||||
tooling::ExecutionContext *Ctx;
|
||||
SymbolsConsumer &Consumer;
|
||||
std::shared_ptr<SymbolCollector> Collector;
|
||||
std::unique_ptr<CanonicalIncludes> Includes;
|
||||
std::unique_ptr<CommentHandler> PragmaHandler;
|
||||
|
@ -118,30 +139,72 @@ public:
|
|||
CollectorOpts.Includes = Includes.get();
|
||||
return new WrappedIndexAction(
|
||||
std::make_shared<SymbolCollector>(std::move(CollectorOpts)),
|
||||
std::move(Includes), IndexOpts, Ctx);
|
||||
std::move(Includes), IndexOpts, Consumer);
|
||||
}
|
||||
|
||||
tooling::ExecutionContext *Ctx;
|
||||
SymbolsConsumer &Consumer;
|
||||
};
|
||||
|
||||
// Combine occurrences of the same symbol across translation units.
|
||||
SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
|
||||
SymbolSlab::Builder UniqueSymbols;
|
||||
llvm::BumpPtrAllocator Arena;
|
||||
Symbol::Details Scratch;
|
||||
Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
|
||||
Arena.Reset();
|
||||
llvm::yaml::Input Yin(Value, &Arena);
|
||||
auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
|
||||
clang::clangd::SymbolID ID;
|
||||
Key >> ID;
|
||||
if (const auto *Existing = UniqueSymbols.find(ID))
|
||||
UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
|
||||
else
|
||||
UniqueSymbols.insert(Sym);
|
||||
});
|
||||
return std::move(UniqueSymbols).build();
|
||||
}
|
||||
/// Stashes per-file results inside ExecutionContext, merges all of them at the
|
||||
/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
|
||||
/// from multiple files in memory.
|
||||
class ToolExecutorConsumer : public SymbolsConsumer {
|
||||
public:
|
||||
ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
|
||||
|
||||
void consumeSymbols(SymbolSlab Symbols) override {
|
||||
for (const auto &Sym : Symbols)
|
||||
Executor.getExecutionContext()->reportResult(Sym.ID.str(),
|
||||
SymbolToYAML(Sym));
|
||||
}
|
||||
|
||||
SymbolSlab mergeResults() override {
|
||||
SymbolSlab::Builder UniqueSymbols;
|
||||
llvm::BumpPtrAllocator Arena;
|
||||
Symbol::Details Scratch;
|
||||
Executor.getToolResults()->forEachResult(
|
||||
[&](llvm::StringRef Key, llvm::StringRef Value) {
|
||||
Arena.Reset();
|
||||
llvm::yaml::Input Yin(Value, &Arena);
|
||||
auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
|
||||
clang::clangd::SymbolID ID;
|
||||
Key >> ID;
|
||||
if (const auto *Existing = UniqueSymbols.find(ID))
|
||||
UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
|
||||
else
|
||||
UniqueSymbols.insert(Sym);
|
||||
});
|
||||
return std::move(UniqueSymbols).build();
|
||||
}
|
||||
|
||||
private:
|
||||
ToolExecutor &Executor;
|
||||
};
|
||||
|
||||
/// Merges symbols for each translation unit as soon as the file is processed.
|
||||
/// Optimal choice for standalone tools.
|
||||
class OnTheFlyConsumer : public SymbolsConsumer {
|
||||
public:
|
||||
void consumeSymbols(SymbolSlab Symbols) override {
|
||||
std::lock_guard<std::mutex> Lock(Mut);
|
||||
for (auto &&Sym : Symbols) {
|
||||
Symbol::Details Scratch;
|
||||
if (const auto *Existing = Result.find(Sym.ID))
|
||||
Result.insert(mergeSymbol(*Existing, Sym, &Scratch));
|
||||
else
|
||||
Result.insert(Sym);
|
||||
}
|
||||
}
|
||||
|
||||
SymbolSlab mergeResults() override {
|
||||
std::lock_guard<std::mutex> Lock(Mut);
|
||||
return std::move(Result).build();
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex Mut;
|
||||
SymbolSlab::Builder Result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
|
@ -181,18 +244,28 @@ int main(int argc, const char **argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
|
||||
llvm::errs()
|
||||
<< "Found multi-process executor, forcing the use of intermediate YAML "
|
||||
"serialization instead of the on-the-fly merge.\n";
|
||||
clang::clangd::MergeOnTheFly = false;
|
||||
}
|
||||
|
||||
std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
|
||||
if (clang::clangd::MergeOnTheFly)
|
||||
Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
|
||||
else
|
||||
Consumer =
|
||||
llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
|
||||
|
||||
// Map phase: emit symbols found in each translation unit.
|
||||
auto Err = Executor->get()->execute(
|
||||
llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
|
||||
Executor->get()->getExecutionContext()));
|
||||
llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
|
||||
if (Err) {
|
||||
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
|
||||
}
|
||||
|
||||
// Reduce phase: combine symbols using the ID as a key.
|
||||
auto UniqueSymbols =
|
||||
clang::clangd::mergeSymbols(Executor->get()->getToolResults());
|
||||
|
||||
// Reduce phase: combine symbols with the same IDs.
|
||||
auto UniqueSymbols = Consumer->mergeResults();
|
||||
// Output phase: emit YAML for result symbols.
|
||||
SymbolsToYAML(UniqueSymbols, llvm::outs());
|
||||
return 0;
|
||||
|
|
Loading…
Reference in New Issue