Support symlinks and relative paths in complilation databases.

Review: http://llvm-reviews.chandlerc.com/D30
llvm-svn: 165392
This commit is contained in:
Daniel Jasper 2012-10-08 16:08:15 +00:00
parent 480477c0e1
commit 26cf9c4379
7 changed files with 393 additions and 12 deletions

View File

@ -33,6 +33,7 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include <string>
#include <vector>

View File

@ -0,0 +1,91 @@
//===--- FileMatchTrie.h - --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a match trie to find the matching file in a compilation
// database based on a given path in the presence of symlinks.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H
#define LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include <string>
#include <vector>
namespace clang {
namespace tooling {
struct PathComparator {
virtual ~PathComparator() {}
virtual bool equivalent(const Twine &FileA, const Twine &FileB) const = 0;
};
class FileMatchTrieNode;
/// \brief A trie to efficiently match against the entries of the compilation
/// database in order of matching suffix length.
///
/// When a clang tool is supposed to operate on a specific file, we have to
/// find the corresponding file in the compilation database. Although entries
/// in the compilation database are keyed by filename, a simple string match
/// is insufficient because of symlinks. Commonly, a project hierarchy looks
/// like this:
/// /<project-root>/src/<path>/<somefile>.cc (used as input for the tool)
/// /<project-root>/build/<symlink-to-src>/<path>/<somefile>.cc (stored in DB)
///
/// Furthermore, there might be symlinks inside the source folder or inside the
/// database, so that the same source file is translated with different build
/// options.
///
/// For a given input file, the \c FileMatchTrie finds its entries in order
/// of matching suffix length. For each suffix length, there might be one or
/// more entries in the database. For each of those entries, it calls
/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might
/// be zero or more entries with the same matching suffix length that are
/// equivalent to the input file. Three cases are distinguished:
/// 0 equivalent files: Continue with the next suffix length.
/// 1 equivalent file: Best match found, return it.
/// >1 equivalent files: Match is ambiguous, return error.
class FileMatchTrie {
public:
FileMatchTrie();
/// \brief Construct a new \c FileMatchTrie with the given \c PathComparator.
///
/// The \c FileMatchTrie takes ownership of 'Comparator'. Used for testing.
FileMatchTrie(PathComparator* Comparator);
~FileMatchTrie();
/// \brief Insert a new absolute path. Relative paths are ignored.
void insert(StringRef NewPath);
/// \brief Finds the corresponding file in this trie.
///
/// Returns file name stored in this trie that is equivalent to 'FileName'
/// according to 'Comparator', if it can be uniquely identified. If there
/// are no matches an empty \c StringRef is returned. If there are ambigious
/// matches, an empty \c StringRef is returned and a corresponding message
/// written to 'Error'.
StringRef findEquivalent(StringRef FileName,
llvm::raw_ostream &Error) const;
private:
FileMatchTrieNode *Root;
OwningPtr<PathComparator> Comparator;
};
} // end namespace tooling
} // end namespace clang
#endif // LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H

View File

@ -17,6 +17,7 @@
#include "clang/Basic/LLVM.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "clang/Tooling/FileMatchTrie.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@ -93,6 +94,8 @@ private:
// Maps file paths to the compile command lines for that file.
llvm::StringMap< std::vector<CompileCommandRef> > IndexByFile;
FileMatchTrie MatchTrie;
llvm::OwningPtr<llvm::MemoryBuffer> Database;
llvm::SourceMgr SM;
llvm::yaml::Stream YAMLStream;

View File

@ -4,6 +4,7 @@ add_clang_library(clangTooling
ArgumentsAdjusters.cpp
CommonOptionsParser.cpp
CompilationDatabase.cpp
FileMatchTrie.cpp
JSONCompilationDatabase.cpp
Refactoring.cpp
RefactoringCallbacks.cpp

View File

@ -0,0 +1,188 @@
//===--- FileMatchTrie.cpp - ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of a FileMatchTrie.
//
//===----------------------------------------------------------------------===//
#include <sstream>
#include "clang/Tooling/FileMatchTrie.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/PathV2.h"
namespace clang {
namespace tooling {
/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
struct DefaultPathComparator : public PathComparator {
virtual ~DefaultPathComparator() {}
virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
return FileA.str() == FileB.str() ||
llvm::sys::fs::equivalent(FileA, FileB);
}
};
/// \brief A node of the \c FileMatchTrie.
///
/// Each node has storage for up to one path and a map mapping a path segment to
/// child nodes. The trie starts with an empty root node.
class FileMatchTrieNode {
public:
/// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
/// the number of \c NewPath's trailing characters already consumed during
/// recursion.
///
/// An insert of a path
/// 'p'starts at the root node and does the following:
/// - If the node is empty, insert 'p' into its storage and abort.
/// - If the node has a path 'p2' but no children, take the last path segment
/// 's' of 'p2', put a new child into the map at 's' an insert the rest of
/// 'p2' there.
/// - Insert a new child for the last segment of 'p' and insert the rest of
/// 'p' there.
///
/// An insert operation is linear in the number of a path's segments.
void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
// We cannot put relative paths into the FileMatchTrie as then a path can be
// a postfix of another path, violating a core assumption of the trie.
if (llvm::sys::path::is_relative(NewPath))
return;
if (Path.empty()) {
// This is an empty leaf. Store NewPath and return.
Path = NewPath;
return;
}
if (Children.empty()) {
// This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
if (NewPath == Path)
return;
// Make this a node and create a child-leaf with 'Path'.
StringRef Element(llvm::sys::path::filename(
StringRef(Path).drop_back(ConsumedLength)));
Children[Element].Path = Path;
}
StringRef Element(llvm::sys::path::filename(
StringRef(NewPath).drop_back(ConsumedLength)));
Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
}
/// \brief Tries to find the node under this \c FileMatchTrieNode that best
/// matches 'FileName'.
///
/// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
/// \c true and an empty string is returned. If no path fits 'FileName', an
/// empty string is returned. \c ConsumedLength denotes the number of
/// \c Filename's trailing characters already consumed during recursion.
///
/// To find the best matching node for a given path 'p', the
/// \c findEquivalent() function is called recursively for each path segment
/// (back to fron) of 'p' until a node 'n' is reached that does not ..
/// - .. have children. In this case it is checked
/// whether the stored path is equivalent to 'p'. If yes, the best match is
/// found. Otherwise continue with the parent node as if this node did not
/// exist.
/// - .. a child matching the next path segment. In this case, all children of
/// 'n' are an equally good match for 'p'. All children are of 'n' are found
/// recursively and their equivalence to 'p' is determined. If none are
/// equivalent, continue with the parent node as if 'n' didn't exist. If one
/// is equivalent, the best match is found. Otherwise, report and ambigiuity
/// error.
StringRef findEquivalent(const PathComparator& Comparator,
StringRef FileName,
bool &IsAmbiguous,
unsigned ConsumedLength = 0) const {
if (Children.empty()) {
if (Comparator.equivalent(StringRef(Path), FileName))
return StringRef(Path);
return StringRef();
}
StringRef Element(llvm::sys::path::filename(FileName.drop_back(
ConsumedLength)));
llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
Children.find(Element);
if (MatchingChild != Children.end()) {
StringRef Result = MatchingChild->getValue().findEquivalent(
Comparator, FileName, IsAmbiguous,
ConsumedLength + Element.size() + 1);
if (!Result.empty() || IsAmbiguous)
return Result;
}
std::vector<StringRef> AllChildren;
getAll(AllChildren, MatchingChild);
StringRef Result;
for (unsigned i = 0; i < AllChildren.size(); i++) {
if (Comparator.equivalent(AllChildren[i], FileName)) {
if (Result.empty()) {
Result = AllChildren[i];
} else {
IsAmbiguous = true;
return StringRef();
}
}
}
return Result;
}
private:
/// \brief Gets all paths under this FileMatchTrieNode.
void getAll(std::vector<StringRef> &Results,
llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
if (Path.empty())
return;
if (Children.empty()) {
Results.push_back(StringRef(Path));
return;
}
for (llvm::StringMap<FileMatchTrieNode>::const_iterator
It = Children.begin(), E = Children.end();
It != E; ++It) {
if (It == Except)
continue;
It->getValue().getAll(Results, Children.end());
}
}
// The stored absolute path in this node. Only valid for leaf nodes, i.e.
// nodes where Children.empty().
std::string Path;
// The children of this node stored in a map based on the next path segment.
llvm::StringMap<FileMatchTrieNode> Children;
};
FileMatchTrie::FileMatchTrie()
: Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
: Root(new FileMatchTrieNode), Comparator(Comparator) {}
FileMatchTrie::~FileMatchTrie() {
delete Root;
}
void FileMatchTrie::insert(StringRef NewPath) {
Root->insert(NewPath);
}
StringRef FileMatchTrie::findEquivalent(StringRef FileName,
llvm::raw_ostream &Error) const {
if (llvm::sys::path::is_relative(FileName)) {
Error << "Cannot resolve relative paths";
return StringRef();
}
bool IsAmbiguous = false;
StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
if (IsAmbiguous)
Error << "Path is ambiguous";
return Result;
}
} // end namespace tooling
} // end namespace clang

View File

@ -164,8 +164,18 @@ std::vector<CompileCommand>
JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
llvm::SmallString<128> NativeFilePath;
llvm::sys::path::native(FilePath, NativeFilePath);
std::vector<StringRef> PossibleMatches;
std::string Error;
llvm::raw_string_ostream ES(Error);
StringRef Match = MatchTrie.findEquivalent(NativeFilePath.str(), ES);
if (Match.empty()) {
if (Error.empty())
Error = "No match found.";
llvm::outs() << Error << "\n";
return std::vector<CompileCommand>();
}
llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
CommandsRefI = IndexByFile.find(NativeFilePath);
CommandsRefI = IndexByFile.find(Match);
if (CommandsRefI == IndexByFile.end())
return std::vector<CompileCommand>();
const std::vector<CompileCommandRef> &CommandsRef = CommandsRefI->getValue();
@ -271,10 +281,20 @@ bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
return false;
}
llvm::SmallString<8> FileStorage;
StringRef FileName = File->getValue(FileStorage);
llvm::SmallString<128> NativeFilePath;
llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath);
if (llvm::sys::path::is_relative(FileName)) {
llvm::SmallString<8> DirectoryStorage;
llvm::SmallString<128> AbsolutePath(
Directory->getValue(DirectoryStorage));
llvm::sys::path::append(AbsolutePath, FileName);
llvm::sys::path::native(AbsolutePath.str(), NativeFilePath);
} else {
llvm::sys::path::native(FileName, NativeFilePath);
}
IndexByFile[NativeFilePath].push_back(
CompileCommandRef(Directory, Command));
CompileCommandRef(Directory, Command));
MatchTrie.insert(NativeFilePath.str());
}
return true;
}

View File

@ -11,6 +11,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclGroup.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Tooling/FileMatchTrie.h"
#include "clang/Tooling/JSONCompilationDatabase.h"
#include "clang/Tooling/Tooling.h"
#include "gtest/gtest.h"
@ -55,13 +56,13 @@ TEST(JSONCompilationDatabase, GetAllFiles) {
getAllFiles("[]", ErrorMessage)) << ErrorMessage;
std::vector<std::string> expected_files;
expected_files.push_back("file1");
expected_files.push_back("file2");
expected_files.push_back("/dir/file1");
expected_files.push_back("/dir/file2");
EXPECT_EQ(expected_files, getAllFiles(
"[{\"directory\":\"dir\","
"[{\"directory\":\"/dir\","
"\"command\":\"command\","
"\"file\":\"file1\"},"
" {\"directory\":\"dir\","
" {\"directory\":\"/dir\","
"\"command\":\"command\","
"\"file\":\"file2\"}]",
ErrorMessage)) << ErrorMessage;
@ -81,6 +82,82 @@ static CompileCommand findCompileArgsInJsonDatabase(StringRef FileName,
return Commands[0];
}
struct FakeComparator : public PathComparator {
virtual ~FakeComparator() {}
virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
return StringRef(FileA.str()).equals_lower(FileB.str());
}
};
class FileMatchTrieTest : public ::testing::Test {
protected:
FileMatchTrieTest() : Trie(new FakeComparator()) {}
StringRef find(StringRef Path) {
llvm::raw_string_ostream ES(Error);
return Trie.findEquivalent(Path, ES);
}
FileMatchTrie Trie;
std::string Error;
};
TEST_F(FileMatchTrieTest, InsertingRelativePath) {
Trie.insert("/path/file.cc");
Trie.insert("file.cc");
EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
}
TEST_F(FileMatchTrieTest, MatchingRelativePath) {
EXPECT_EQ("", find("file.cc"));
}
TEST_F(FileMatchTrieTest, ReturnsBestResults) {
Trie.insert("/d/c/b.cc");
Trie.insert("/d/b/b.cc");
EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc"));
}
TEST_F(FileMatchTrieTest, HandlesSymlinks) {
Trie.insert("/AA/file.cc");
EXPECT_EQ("/AA/file.cc", find("/aa/file.cc"));
}
TEST_F(FileMatchTrieTest, ReportsSymlinkAmbiguity) {
Trie.insert("/Aa/file.cc");
Trie.insert("/aA/file.cc");
EXPECT_TRUE(find("/aa/file.cc").empty());
EXPECT_EQ("Path is ambiguous", Error);
}
TEST_F(FileMatchTrieTest, LongerMatchingSuffixPreferred) {
Trie.insert("/src/Aa/file.cc");
Trie.insert("/src/aA/file.cc");
Trie.insert("/SRC/aa/file.cc");
EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc"));
}
TEST_F(FileMatchTrieTest, EmptyTrie) {
EXPECT_TRUE(find("/some/path").empty());
}
TEST_F(FileMatchTrieTest, NoResult) {
Trie.insert("/somepath/otherfile.cc");
Trie.insert("/otherpath/somefile.cc");
EXPECT_EQ("", find("/somepath/somefile.cc"));
}
TEST_F(FileMatchTrieTest, RootElementDifferent) {
Trie.insert("/path/file.cc");
Trie.insert("/otherpath/file.cc");
EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
}
TEST_F(FileMatchTrieTest, CannotResolveRelativePath) {
EXPECT_EQ("", find("relative-path.cc"));
EXPECT_EQ("Cannot resolve relative paths", Error);
}
TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
std::string ErrorMessage;
CompileCommand NotFound = findCompileArgsInJsonDatabase(
@ -148,7 +225,7 @@ TEST(findCompileArgsInJsonDatabase, ReadsDirectoryWithSpaces) {
}
TEST(findCompileArgsInJsonDatabase, FindsEntry) {
StringRef Directory("directory");
StringRef Directory("/directory");
StringRef FileName("file");
StringRef Command("command");
std::string JsonDatabase = "[";
@ -162,19 +239,19 @@ TEST(findCompileArgsInJsonDatabase, FindsEntry) {
JsonDatabase += "]";
std::string ErrorMessage;
CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
"file4", JsonDatabase, ErrorMessage);
EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage;
"/directory4/file4", JsonDatabase, ErrorMessage);
EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage;
ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage;
EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage;
}
static std::vector<std::string> unescapeJsonCommandLine(StringRef Command) {
std::string JsonDatabase =
("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" +
("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" +
Command + "\"}]").str();
std::string ErrorMessage;
CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
"test", JsonDatabase, ErrorMessage);
"/root/test", JsonDatabase, ErrorMessage);
EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage;
return FoundCommand.CommandLine;
}