[clangd] Skip function bodies inside processed files while indexing

Summary:
This significantly improves performance of background indexing.

We do not collect references and declarations inside the processed
files, so this does not affect the final indexing results.

The idea is borrowed from libclang, which has a similar optimization in
its indexing functionality.

Measurements show a nice decrease in indexing time, up to ~40% for
building the whole index. These are not proper benchmarks, so one should
not rely on these results too much.

1. Rebuilding the whole index for LLVM:
  - Before. Total time: 14m58s.
    ./bin/clangd -pch-storage=memory < ./clangd.input  23917.67s user 515.86s system 2718% cpu 14:58.68 total
  - After. Total time: 8m41s.
    ./bin/clangd -pch-storage=memory < ./clangd.input  13627.29s user 288.10s system 2672% cpu 8:40.67 total

2. Rebuilding index after removing shards matching '*clangd*' (case-insensitively):
  - Before. Total time: 30s.
    ./bin/clangd -pch-storage=memory < ./clangd.input  130.94s user 6.82s system 452% cpu 30.423 total
  - After. Total time: 26s.
    ./bin/clangd -pch-storage=memory < ./clangd.input  80.51s user 5.40s system 333% cpu 25.777 total

Reviewers: kadircet, sammccall

Reviewed By: kadircet

Subscribers: MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D66226

llvm-svn: 369349
This commit is contained in:
Ilya Biryukov 2019-08-20 08:54:30 +00:00
parent 522377494b
commit 30c86b64da
3 changed files with 67 additions and 16 deletions

View File

@ -11,9 +11,17 @@
#include "Logger.h"
#include "index/Relation.h"
#include "index/SymbolOrigin.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/MultiplexConsumer.h"
#include "clang/Index/IndexingAction.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/STLExtras.h"
#include <functional>
#include <memory>
#include <utility>
namespace clang {
@ -113,6 +121,40 @@ private:
IncludeGraph &IG;
};
/// Returns an ASTConsumer that wraps \p Inner and additionally instructs the
/// parser to skip bodies of functions in the files that should not be
/// processed.
static std::unique_ptr<ASTConsumer>
skipProcessedFunctions(std::unique_ptr<ASTConsumer> Inner,
std::function<bool(FileID)> ShouldIndexFile) {
class SkipProcessedFunctions : public ASTConsumer {
public:
SkipProcessedFunctions(std::function<bool(FileID)> FileFilter)
: ShouldIndexFile(std::move(FileFilter)), Context(nullptr) {
assert(this->ShouldIndexFile);
}
void Initialize(ASTContext &Context) override { this->Context = &Context; }
bool shouldSkipFunctionBody(Decl *D) override {
assert(Context && "Initialize() was never called.");
auto &SM = Context->getSourceManager();
auto FID = SM.getFileID(SM.getExpansionLoc(D->getLocation()));
if (!FID.isValid())
return false;
return !ShouldIndexFile(FID);
}
private:
std::function<bool(FileID)> ShouldIndexFile;
const ASTContext *Context;
};
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
Consumers.push_back(
std::make_unique<SkipProcessedFunctions>(ShouldIndexFile));
Consumers.push_back(std::move(Inner));
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}
// Wraps the index action and reports index data after each translation unit.
class IndexAction : public WrapperFrontendAction {
public:
@ -137,7 +179,9 @@ public:
if (IncludeGraphCallback != nullptr)
CI.getPreprocessor().addPPCallbacks(
std::make_unique<IncludeGraphCollector>(CI.getSourceManager(), IG));
return WrapperFrontendAction::CreateASTConsumer(CI, InFile);
return skipProcessedFunctions(
WrapperFrontendAction::CreateASTConsumer(CI, InFile),
[this](FileID FID) { return Collector->shouldIndexFile(FID); });
}
bool BeginInvocation(CompilerInstance &CI) override {
@ -147,6 +191,10 @@ public:
// Avoids some analyses too. Set in two places as we're late to the party.
CI.getDiagnosticOpts().IgnoreWarnings = true;
CI.getDiagnostics().setIgnoreAllWarnings(true);
// Instruct the parser to ask our ASTConsumer if it should skip function
// bodies. The ASTConsumer will take care of skipping only functions inside
// the files that we have already processed.
CI.getFrontendOpts().SkipFunctionBodies = true;
return WrapperFrontendAction::BeginInvocation(CI);
}

View File

@ -147,17 +147,6 @@ getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
}
bool shouldIndexFile(const SourceManager &SM, FileID FID,
const SymbolCollector::Options &Opts,
llvm::DenseMap<FileID, bool> *FilesToIndexCache) {
if (!Opts.FileFilter)
return true;
auto I = FilesToIndexCache->try_emplace(FID);
if (I.second)
I.first->second = Opts.FileFilter(SM, FID);
return I.first->second;
}
// Return the symbol location of the token at \p TokLoc.
llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
@ -410,7 +399,7 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name,
S.SymInfo = index::getSymbolInfoForMacro(*MI);
std::string FileURI;
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
shouldIndexFile(SM.getFileID(Loc));
if (auto DeclLoc =
getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
S.CanonicalDeclaration = *DeclLoc;
@ -540,7 +529,7 @@ void SymbolCollector::finish() {
for (const auto &LocAndRole : It.second) {
auto FileID = SM.getFileID(LocAndRole.first);
// FIXME: use the result to filter out references.
shouldIndexFile(SM, FileID, Opts, &FilesToIndexCache);
shouldIndexFile(FileID);
if (auto FileURI = GetURI(FileID)) {
auto Range =
getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
@ -590,7 +579,7 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
auto Loc = spellingLocIfSpelled(findName(&ND), SM);
assert(Loc.isValid() && "Invalid source location for NamedDecl");
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
shouldIndexFile(SM.getFileID(Loc));
if (auto DeclLoc =
getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
S.CanonicalDeclaration = *DeclLoc;
@ -650,7 +639,7 @@ void SymbolCollector::addDefinition(const NamedDecl &ND,
const auto &SM = ND.getASTContext().getSourceManager();
auto Loc = spellingLocIfSpelled(findName(&ND), SM);
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
shouldIndexFile(SM.getFileID(Loc));
if (auto DefLoc =
getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
S.Definition = *DefLoc;
@ -742,5 +731,14 @@ bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
return false;
}
bool SymbolCollector::shouldIndexFile(FileID FID) {
if (!Opts.FileFilter)
return true;
auto I = FilesToIndexCache.try_emplace(FID);
if (I.second)
I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
return I.first->second;
}
} // namespace clangd
} // namespace clang

View File

@ -112,6 +112,11 @@ public:
RefSlab takeRefs() { return std::move(Refs).build(); }
RelationSlab takeRelations() { return std::move(Relations).build(); }
/// Returns true if we are interested in references and declarations from \p
/// FID. If this function return false, bodies of functions inside those files
/// will be skipped to decrease indexing time.
bool shouldIndexFile(FileID FID);
void finish() override;
private: