forked from OSchip/llvm-project
Parse compile commands lazily in InterpolatingCompilationDatabase
Summary: This greatly reduces the time to read 'compile_commands.json'. For Chromium on my machine it's now 0.7 seconds vs 30 seconds before the change. Reviewers: sammccall, jfb Reviewed By: sammccall Subscribers: mgrang, jfb, cfe-commits Differential Revision: https://reviews.llvm.org/D51314 llvm-svn: 340838
This commit is contained in:
parent
6a92b5e1e2
commit
5167e2d1af
|
@ -123,8 +123,8 @@ static types::ID foldType(types::ID Lang) {
|
|||
struct TransferableCommand {
|
||||
// Flags that should not apply to all files are stripped from CommandLine.
|
||||
CompileCommand Cmd;
|
||||
// Language detected from -x or the filename.
|
||||
types::ID Type = types::TY_INVALID;
|
||||
// Language detected from -x or the filename. Never TY_INVALID.
|
||||
Optional<types::ID> Type;
|
||||
// Standard specified by -std.
|
||||
LangStandard::Kind Std = LangStandard::lang_unspecified;
|
||||
|
||||
|
@ -171,7 +171,10 @@ struct TransferableCommand {
|
|||
|
||||
if (Std != LangStandard::lang_unspecified) // -std take precedence over -x
|
||||
Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
|
||||
Type = foldType(Type);
|
||||
Type = foldType(*Type);
|
||||
// The contract is to store None instead of TY_INVALID.
|
||||
if (Type == types::TY_INVALID)
|
||||
Type = llvm::None;
|
||||
}
|
||||
|
||||
// Produce a CompileCommand for \p filename, based on this one.
|
||||
|
@ -181,10 +184,10 @@ struct TransferableCommand {
|
|||
bool TypeCertain;
|
||||
auto TargetType = guessType(Filename, &TypeCertain);
|
||||
// If the filename doesn't determine the language (.h), transfer with -x.
|
||||
if (!TypeCertain) {
|
||||
if (TargetType != types::TY_INVALID && !TypeCertain && Type) {
|
||||
TargetType = types::onlyPrecompileType(TargetType) // header?
|
||||
? types::lookupHeaderTypeForSourceType(Type)
|
||||
: Type;
|
||||
? types::lookupHeaderTypeForSourceType(*Type)
|
||||
: *Type;
|
||||
Result.CommandLine.push_back("-x");
|
||||
Result.CommandLine.push_back(types::getTypeName(TargetType));
|
||||
}
|
||||
|
@ -217,28 +220,31 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
// CommandIndex does the real work: given a filename, it produces the best
|
||||
// matching TransferableCommand by matching filenames. Basic strategy:
|
||||
// Given a filename, FileIndex picks the best matching file from the underlying
|
||||
// DB. This is the proxy file whose CompileCommand will be reused. The
|
||||
// heuristics incorporate file name, extension, and directory structure.
|
||||
// Strategy:
|
||||
// - Build indexes of each of the substrings we want to look up by.
|
||||
// These indexes are just sorted lists of the substrings.
|
||||
// - Forward requests to the inner CDB. If it fails, we must pick a proxy.
|
||||
// - Each criterion corresponds to a range lookup into the index, so we only
|
||||
// need O(log N) string comparisons to determine scores.
|
||||
// - We then break ties among the candidates with the highest score.
|
||||
class CommandIndex {
|
||||
//
|
||||
// Apart from path proximity signals, also takes file extensions into account
|
||||
// when scoring the candidates.
|
||||
class FileIndex {
|
||||
public:
|
||||
CommandIndex(std::vector<TransferableCommand> AllCommands)
|
||||
: Commands(std::move(AllCommands)), Strings(Arena) {
|
||||
FileIndex(std::vector<std::string> Files)
|
||||
: OriginalPaths(std::move(Files)), Strings(Arena) {
|
||||
// Sort commands by filename for determinism (index is a tiebreaker later).
|
||||
llvm::sort(
|
||||
Commands.begin(), Commands.end(),
|
||||
[](const TransferableCommand &Left, const TransferableCommand &Right) {
|
||||
return Left.Cmd.Filename < Right.Cmd.Filename;
|
||||
});
|
||||
for (size_t I = 0; I < Commands.size(); ++I) {
|
||||
StringRef Path =
|
||||
Strings.save(StringRef(Commands[I].Cmd.Filename).lower());
|
||||
Paths.push_back({Path, I});
|
||||
llvm::sort(OriginalPaths.begin(), OriginalPaths.end());
|
||||
Paths.reserve(OriginalPaths.size());
|
||||
Types.reserve(OriginalPaths.size());
|
||||
Stems.reserve(OriginalPaths.size());
|
||||
for (size_t I = 0; I < OriginalPaths.size(); ++I) {
|
||||
StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
|
||||
|
||||
Paths.emplace_back(Path, I);
|
||||
Types.push_back(foldType(guessType(Path)));
|
||||
Stems.emplace_back(sys::path::stem(Path), I);
|
||||
auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
|
||||
for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
|
||||
|
@ -250,29 +256,28 @@ public:
|
|||
llvm::sort(Components.begin(), Components.end());
|
||||
}
|
||||
|
||||
bool empty() const { return Commands.empty(); }
|
||||
bool empty() const { return Paths.empty(); }
|
||||
|
||||
// Returns the command that best fits OriginalFilename.
|
||||
// Candidates with PreferLanguage will be chosen over others (unless it's
|
||||
// TY_INVALID, or all candidates are bad).
|
||||
const TransferableCommand &chooseProxy(StringRef OriginalFilename,
|
||||
types::ID PreferLanguage) const {
|
||||
// Returns the path for the file that best fits OriginalFilename.
|
||||
// Candidates with extensions matching PreferLanguage will be chosen over
|
||||
// others (unless it's TY_INVALID, or all candidates are bad).
|
||||
StringRef chooseProxy(StringRef OriginalFilename,
|
||||
types::ID PreferLanguage) const {
|
||||
assert(!empty() && "need at least one candidate!");
|
||||
std::string Filename = OriginalFilename.lower();
|
||||
auto Candidates = scoreCandidates(Filename);
|
||||
std::pair<size_t, int> Best =
|
||||
pickWinner(Candidates, Filename, PreferLanguage);
|
||||
|
||||
DEBUG_WITH_TYPE("interpolate",
|
||||
llvm::dbgs()
|
||||
<< "interpolate: chose "
|
||||
<< Commands[Best.first].Cmd.Filename << " as proxy for "
|
||||
<< OriginalFilename << " preferring "
|
||||
<< (PreferLanguage == types::TY_INVALID
|
||||
? "none"
|
||||
: types::getTypeName(PreferLanguage))
|
||||
<< " score=" << Best.second << "\n");
|
||||
return Commands[Best.first];
|
||||
DEBUG_WITH_TYPE(
|
||||
"interpolate",
|
||||
llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
|
||||
<< " as proxy for " << OriginalFilename << " preferring "
|
||||
<< (PreferLanguage == types::TY_INVALID
|
||||
? "none"
|
||||
: types::getTypeName(PreferLanguage))
|
||||
<< " score=" << Best.second << "\n");
|
||||
return OriginalPaths[Best.first];
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -338,7 +343,7 @@ private:
|
|||
ScoredCandidate S;
|
||||
S.Index = Candidate.first;
|
||||
S.Preferred = PreferredLanguage == types::TY_INVALID ||
|
||||
PreferredLanguage == Commands[S.Index].Type;
|
||||
PreferredLanguage == Types[S.Index];
|
||||
S.Points = Candidate.second;
|
||||
if (!S.Preferred && Best.Preferred)
|
||||
continue;
|
||||
|
@ -371,7 +376,7 @@ private:
|
|||
// If Prefix is true, it's instead the range starting with Key.
|
||||
template <bool Prefix>
|
||||
ArrayRef<SubstringAndIndex>
|
||||
indexLookup(StringRef Key, const std::vector<SubstringAndIndex> &Idx) const {
|
||||
indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
|
||||
// Use pointers as iteratiors to ease conversion of result to ArrayRef.
|
||||
auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
|
||||
Less<Prefix>());
|
||||
|
@ -379,8 +384,8 @@ private:
|
|||
}
|
||||
|
||||
// Performs a point lookup into a nonempty index, returning a longest match.
|
||||
SubstringAndIndex
|
||||
longestMatch(StringRef Key, const std::vector<SubstringAndIndex> &Idx) const {
|
||||
SubstringAndIndex longestMatch(StringRef Key,
|
||||
ArrayRef<SubstringAndIndex> Idx) const {
|
||||
assert(!Idx.empty());
|
||||
// Longest substring match will be adjacent to a direct lookup.
|
||||
auto It =
|
||||
|
@ -395,22 +400,27 @@ private:
|
|||
return Prefix > PrevPrefix ? *It : *--It;
|
||||
}
|
||||
|
||||
std::vector<TransferableCommand> Commands; // Indexes point into this.
|
||||
// Original paths, everything else is in lowercase.
|
||||
std::vector<std::string> OriginalPaths;
|
||||
BumpPtrAllocator Arena;
|
||||
StringSaver Strings;
|
||||
// Indexes of candidates by certain substrings.
|
||||
// String is lowercase and sorted, index points into OriginalPaths.
|
||||
std::vector<SubstringAndIndex> Paths; // Full path.
|
||||
// Lang types obtained by guessing on the corresponding path. I-th element is
|
||||
// a type for the I-th path.
|
||||
std::vector<types::ID> Types;
|
||||
std::vector<SubstringAndIndex> Stems; // Basename, without extension.
|
||||
std::vector<SubstringAndIndex> Components; // Last path components.
|
||||
};
|
||||
|
||||
// The actual CompilationDatabase wrapper delegates to its inner database.
|
||||
// If no match, looks up a command in CommandIndex and transfers it to the file.
|
||||
// If no match, looks up a proxy file in FileIndex and transfers its
|
||||
// command to the requested file.
|
||||
class InterpolatingCompilationDatabase : public CompilationDatabase {
|
||||
public:
|
||||
InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
|
||||
: Inner(std::move(Inner)), Index(allCommands()) {}
|
||||
: Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
|
||||
|
||||
std::vector<CompileCommand>
|
||||
getCompileCommands(StringRef Filename) const override {
|
||||
|
@ -421,7 +431,11 @@ public:
|
|||
auto Lang = guessType(Filename, &TypeCertain);
|
||||
if (!TypeCertain)
|
||||
Lang = types::TY_INVALID;
|
||||
return {Index.chooseProxy(Filename, foldType(Lang)).transferTo(Filename)};
|
||||
auto ProxyCommands =
|
||||
Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
|
||||
if (ProxyCommands.empty())
|
||||
return {};
|
||||
return {TransferableCommand(ProxyCommands[0]).transferTo(Filename)};
|
||||
}
|
||||
|
||||
std::vector<std::string> getAllFiles() const override {
|
||||
|
@ -433,18 +447,8 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
std::vector<TransferableCommand> allCommands() {
|
||||
std::vector<TransferableCommand> Result;
|
||||
for (auto Command : Inner->getAllCompileCommands()) {
|
||||
Result.emplace_back(std::move(Command));
|
||||
if (Result.back().Type == types::TY_INVALID)
|
||||
Result.pop_back();
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
std::unique_ptr<CompilationDatabase> Inner;
|
||||
CommandIndex Index;
|
||||
FileIndex Index;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -707,6 +707,7 @@ TEST_F(InterpolateTest, Nearby) {
|
|||
|
||||
TEST_F(InterpolateTest, Language) {
|
||||
add("dir/foo.cpp", "-std=c++17");
|
||||
add("dir/bar.c", "");
|
||||
add("dir/baz.cee", "-x c");
|
||||
|
||||
// .h is ambiguous, so we add explicit language flags
|
||||
|
@ -716,9 +717,11 @@ TEST_F(InterpolateTest, Language) {
|
|||
EXPECT_EQ(getCommand("foo.hpp"), "clang -D dir/foo.cpp -std=c++17");
|
||||
// respect -x if it's already there.
|
||||
EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c-header");
|
||||
// prefer a worse match with the right language
|
||||
EXPECT_EQ(getCommand("foo.c"), "clang -D dir/baz.cee");
|
||||
Entries.erase(path(StringRef("dir/baz.cee")));
|
||||
// prefer a worse match with the right extension.
|
||||
EXPECT_EQ(getCommand("foo.c"), "clang -D dir/bar.c");
|
||||
// make sure we don't crash on queries with invalid extensions.
|
||||
EXPECT_EQ(getCommand("foo.cce"), "clang -D dir/foo.cpp");
|
||||
Entries.erase(path(StringRef("dir/bar.c")));
|
||||
// Now we transfer across languages, so drop -std too.
|
||||
EXPECT_EQ(getCommand("foo.c"), "clang -D dir/foo.cpp");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue