[clangd] Use pre-populated mappings for standard symbols

Summary:
This takes ~5% of time when running clangd unit tests.

To achieve this, move mapping of system includes out of CanonicalIncludes
and into a separate class

Reviewers: sammccall, hokein

Reviewed By: sammccall

Subscribers: MaskRay, jkorous, arphaman, kadircet, jfb, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D67172

llvm-svn: 371408
This commit is contained in:
Ilya Biryukov 2019-09-09 15:32:51 +00:00
parent 8e3bc9b572
commit 8b76709bac
7 changed files with 84 additions and 76 deletions

View File

@ -367,7 +367,7 @@ ParsedAST::build(std::unique_ptr<clang::CompilerInvocation> CI,
if (Preamble)
CanonIncludes = Preamble->CanonIncludes;
else
addSystemHeadersMapping(&CanonIncludes, Clang->getLangOpts());
CanonIncludes.addSystemHeadersMapping(Clang->getLangOpts());
std::unique_ptr<CommentHandler> IWYUHandler =
collectIWYUHeaderMaps(&CanonIncludes);
Clang->getPreprocessor().addCommentHandler(IWYUHandler.get());

View File

@ -78,7 +78,7 @@ public:
}
void BeforeExecute(CompilerInstance &CI) override {
addSystemHeadersMapping(&CanonIncludes, CI.getLangOpts());
CanonIncludes.addSystemHeadersMapping(CI.getLangOpts());
SourceMgr = &CI.getSourceManager();
}

View File

@ -9,6 +9,7 @@
#include "CanonicalIncludes.h"
#include "Headers.h"
#include "clang/Driver/Types.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Path.h"
#include <algorithm>
@ -18,43 +19,40 @@ namespace {
const char IWYUPragma[] = "// IWYU pragma: private, include ";
} // namespace
void CanonicalIncludes::addPathSuffixMapping(llvm::StringRef Suffix,
llvm::StringRef CanonicalPath) {
int Components = std::distance(llvm::sys::path::begin(Suffix),
llvm::sys::path::end(Suffix));
MaxSuffixComponents = std::max(MaxSuffixComponents, Components);
SuffixHeaderMapping[Suffix] = CanonicalPath;
}
void CanonicalIncludes::addMapping(llvm::StringRef Path,
llvm::StringRef CanonicalPath) {
FullPathMapping[Path] = CanonicalPath;
}
void CanonicalIncludes::addSymbolMapping(llvm::StringRef QualifiedName,
llvm::StringRef CanonicalPath) {
this->SymbolMapping[QualifiedName] = CanonicalPath;
}
/// The maximum number of path components in a key from StdSuffixHeaderMapping.
/// Used to minimize the number of lookups in suffix path mappings.
constexpr int MaxSuffixComponents = 3;
llvm::StringRef
CanonicalIncludes::mapHeader(llvm::StringRef Header,
llvm::StringRef QualifiedName) const {
assert(!Header.empty());
auto SE = SymbolMapping.find(QualifiedName);
if (SE != SymbolMapping.end())
return SE->second;
if (StdSymbolMapping) {
auto SE = StdSymbolMapping->find(QualifiedName);
if (SE != StdSymbolMapping->end())
return SE->second;
}
auto MapIt = FullPathMapping.find(Header);
if (MapIt != FullPathMapping.end())
return MapIt->second;
if (!StdSuffixHeaderMapping)
return Header;
int Components = 1;
for (auto It = llvm::sys::path::rbegin(Header),
End = llvm::sys::path::rend(Header);
It != End && Components <= MaxSuffixComponents; ++It, ++Components) {
auto SubPath = Header.substr(It->data() - Header.begin());
auto MappingIt = SuffixHeaderMapping.find(SubPath);
if (MappingIt != SuffixHeaderMapping.end())
auto MappingIt = StdSuffixHeaderMapping->find(SubPath);
if (MappingIt != StdSuffixHeaderMapping->end())
return MappingIt->second;
}
return Header;
@ -86,29 +84,27 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
return std::make_unique<PragmaCommentHandler>(Includes);
}
void addSystemHeadersMapping(CanonicalIncludes *Includes,
const LangOptions &Language) {
static constexpr std::pair<const char *, const char *> SymbolMap[] = {
#define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header },
#include "StdSymbolMap.inc"
#undef SYMBOL
};
static constexpr std::pair<const char *, const char *> CSymbolMap[] = {
#define SYMBOL(Name, NameSpace, Header) { #Name, #Header },
#include "CSymbolMap.inc"
#undef SYMBOL
};
void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) {
if (Language.CPlusPlus) {
for (const auto &Pair : SymbolMap)
Includes->addSymbolMapping(Pair.first, Pair.second);
static const auto *Symbols = new llvm::StringMap<llvm::StringRef>({
#define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header},
#include "StdSymbolMap.inc"
#undef SYMBOL
});
StdSymbolMapping = Symbols;
} else if (Language.C11) {
for (const auto &Pair : CSymbolMap)
Includes->addSymbolMapping(Pair.first, Pair.second);
static const auto *CSymbols = new llvm::StringMap<llvm::StringRef>({
#define SYMBOL(Name, NameSpace, Header) {#Name, #Header},
#include "CSymbolMap.inc"
#undef SYMBOL
});
StdSymbolMapping = CSymbols;
}
// FIXME: remove the std header mapping once we support ambiguous symbols, now
// it serves as a fallback to disambiguate:
// - symbols with multiple headers (e.g. std::move)
static constexpr std::pair<const char *, const char *> SystemHeaderMap[] = {
static const auto *SystemHeaderMap = new llvm::StringMap<llvm::StringRef>({
{"include/__stddef_max_align_t.h", "<cstddef>"},
{"include/__wmmintrin_aes.h", "<wmmintrin.h>"},
{"include/__wmmintrin_pclmul.h", "<wmmintrin.h>"},
@ -760,9 +756,20 @@ void addSystemHeadersMapping(CanonicalIncludes *Includes,
{"bits/waitflags.h", "<sys/wait.h>"},
{"bits/waitstatus.h", "<sys/wait.h>"},
{"bits/xtitypes.h", "<stropts.h>"},
};
for (const auto &Pair : SystemHeaderMap)
Includes->addPathSuffixMapping(Pair.first, Pair.second);
});
// Check MaxSuffixComponents constant is correct.
assert(llvm::all_of(SystemHeaderMap->keys(), [](llvm::StringRef Path) {
return std::distance(llvm::sys::path::begin(Path),
llvm::sys::path::end(Path)) <= MaxSuffixComponents;
}));
// ... and precise.
assert(llvm::find_if(SystemHeaderMap->keys(), [](llvm::StringRef Path) {
return std::distance(llvm::sys::path::begin(Path),
llvm::sys::path::end(Path)) ==
MaxSuffixComponents;
}) != SystemHeaderMap->keys().end());
StdSuffixHeaderMapping = SystemHeaderMap;
}
} // namespace clangd

View File

@ -21,6 +21,7 @@
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Regex.h"
#include <mutex>
#include <string>
@ -34,37 +35,34 @@ namespace clangd {
/// Only const methods (i.e. mapHeader) in this class are thread safe.
class CanonicalIncludes {
public:
CanonicalIncludes() = default;
/// Adds a string-to-string mapping from \p Path to \p CanonicalPath.
void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath);
/// Maps files with last path components matching \p Suffix to \p
/// CanonicalPath.
void addPathSuffixMapping(llvm::StringRef Suffix,
llvm::StringRef CanonicalPath);
/// Sets the canonical include for any symbol with \p QualifiedName.
/// Symbol mappings take precedence over header mappings.
void addSymbolMapping(llvm::StringRef QualifiedName,
llvm::StringRef CanonicalPath);
/// Returns the canonical include for symbol with \p QualifiedName.
/// \p Header is the file the declaration was reachable from.
/// Header itself will be returned if there is no relevant mapping.
llvm::StringRef mapHeader(llvm::StringRef Header,
llvm::StringRef QualifiedName) const;
/// Adds mapping for system headers and some special symbols (e.g. STL symbols
/// in <iosfwd> need to be mapped individually). Approximately, the following
/// system headers are handled:
/// - C++ standard library e.g. bits/basic_string.h$ -> <string>
/// - Posix library e.g. bits/pthreadtypes.h$ -> <pthread.h>
/// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> <immintrin.h>
/// The mapping is hardcoded and hand-maintained, so it might not cover all
/// headers.
void addSystemHeadersMapping(const LangOptions &Language);
private:
/// A map from full include path to a canonical path.
llvm::StringMap<std::string> FullPathMapping;
/// A map from a suffix (one or components of a path) to a canonical path.
llvm::StringMap<std::string> SuffixHeaderMapping;
/// Maximum number of path components stored in a key of SuffixHeaderMapping.
/// Used to reduce the number of lookups into SuffixHeaderMapping.
int MaxSuffixComponents = 0;
/// Used only for mapping standard headers.
const llvm::StringMap<llvm::StringRef> *StdSuffixHeaderMapping = nullptr;
/// A map from fully qualified symbol names to header names.
llvm::StringMap<std::string> SymbolMapping;
/// Used only for mapping standard symbols.
const llvm::StringMap<llvm::StringRef> *StdSymbolMapping = nullptr;
};
/// Returns a CommentHandler that parses pragma comment on include files to
@ -76,17 +74,6 @@ private:
std::unique_ptr<CommentHandler>
collectIWYUHeaderMaps(CanonicalIncludes *Includes);
/// Adds mapping for system headers and some special symbols (e.g. STL symbols
/// in <iosfwd> need to be mapped individually). Approximately, the following
/// system headers are handled:
/// - C++ standard library e.g. bits/basic_string.h$ -> <string>
/// - Posix library e.g. bits/pthreadtypes.h$ -> <pthread.h>
/// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> <immintrin.h>
/// The mapping is hardcoded and hand-maintained, so it might not cover all
/// headers.
void addSystemHeadersMapping(CanonicalIncludes *Includes,
const LangOptions &Language);
} // namespace clangd
} // namespace clang

View File

@ -141,7 +141,7 @@ public:
std::unique_ptr<ASTConsumer>
CreateASTConsumer(CompilerInstance &CI, llvm::StringRef InFile) override {
CI.getPreprocessor().addCommentHandler(PragmaHandler.get());
addSystemHeadersMapping(Includes.get(), CI.getLangOpts());
Includes->addSystemHeadersMapping(CI.getLangOpts());
if (IncludeGraphCallback != nullptr)
CI.getPreprocessor().addPPCallbacks(
std::make_unique<IncludeGraphCollector>(CI.getSourceManager(), IG));

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "index/CanonicalIncludes.h"
#include "clang/Basic/LangOptions.h"
#include "gtest/gtest.h"
namespace clang {
@ -17,7 +18,7 @@ TEST(CanonicalIncludesTest, CStandardLibrary) {
CanonicalIncludes CI;
auto Language = LangOptions();
Language.C11 = true;
addSystemHeadersMapping(&CI, Language);
CI.addSystemHeadersMapping(Language);
// Usual standard library symbols are mapped correctly.
EXPECT_EQ("<stdio.h>", CI.mapHeader("path/stdio.h", "printf"));
}
@ -26,7 +27,7 @@ TEST(CanonicalIncludesTest, CXXStandardLibrary) {
CanonicalIncludes CI;
auto Language = LangOptions();
Language.CPlusPlus = true;
addSystemHeadersMapping(&CI, Language);
CI.addSystemHeadersMapping(Language);
// Usual standard library symbols are mapped correctly.
EXPECT_EQ("<vector>", CI.mapHeader("path/vector.h", "std::vector"));
@ -54,19 +55,32 @@ TEST(CanonicalIncludesTest, PathMapping) {
TEST(CanonicalIncludesTest, SymbolMapping) {
// As used for standard library.
CanonicalIncludes CI;
CI.addSymbolMapping("some::symbol", "<baz>");
LangOptions Language;
Language.CPlusPlus = true;
// Ensures 'std::vector' is mapped to '<vector>'.
CI.addSystemHeadersMapping(Language);
EXPECT_EQ("<baz>", CI.mapHeader("foo/bar", "some::symbol"));
EXPECT_EQ("<vector>", CI.mapHeader("foo/bar", "std::vector"));
EXPECT_EQ("foo/bar", CI.mapHeader("foo/bar", "other::symbol"));
}
TEST(CanonicalIncludesTest, Precedence) {
CanonicalIncludes CI;
CI.addMapping("some/path", "<path>");
CI.addSymbolMapping("some::symbol", "<symbol>");
LangOptions Language;
Language.CPlusPlus = true;
CI.addSystemHeadersMapping(Language);
// Symbol mapping beats path mapping.
EXPECT_EQ("<symbol>", CI.mapHeader("some/path", "some::symbol"));
// We added a mapping from some/path to <path>.
ASSERT_EQ("<path>", CI.mapHeader("some/path", ""));
// We should have a path from 'bits/stl_vector.h' to '<vector>'.
ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h", ""));
// We should also have a symbol mapping from 'std::map' to '<map>'.
ASSERT_EQ("<map>", CI.mapHeader("some/header.h", "std::map"));
// And the symbol mapping should take precedence over paths mapping.
EXPECT_EQ("<map>", CI.mapHeader("bits/stl_vector.h", "std::map"));
EXPECT_EQ("<map>", CI.mapHeader("some/path", "std::map"));
}
} // namespace

View File

@ -974,7 +974,7 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
CanonicalIncludes Includes;
auto Language = LangOptions();
Language.CPlusPlus = true;
addSystemHeadersMapping(&Includes, Language);
Includes.addSystemHeadersMapping(Language);
CollectorOpts.Includes = &Includes;
runSymbolCollector("namespace std { class string {}; }", /*Main=*/"");
EXPECT_THAT(Symbols,