[clangd] Add support for different file URI schemas.

Summary: I will replace the existing URI struct in Protocol.h with the new URI and rename FileURI to URI in a followup patch.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: jkorous-apple, klimek, mgorny, ilya-biryukov, cfe-commits

Differential Revision: https://reviews.llvm.org/D41946

llvm-svn: 323101
This commit is contained in:
Eric Liu 2018-01-22 11:48:20 +00:00
parent 6a92e99b64
commit f5b8c82198
5 changed files with 522 additions and 0 deletions

View File

@ -21,6 +21,7 @@ add_clang_library(clangDaemon
ProtocolHandlers.cpp
SourceCode.cpp
Trace.cpp
URI.cpp
XRefs.cpp
index/FileIndex.cpp
index/Index.cpp

View File

@ -0,0 +1,199 @@
//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "URI.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include <iomanip>
#include <sstream>
LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
namespace clang {
namespace clangd {
namespace {
inline llvm::Error make_string_error(const llvm::Twine &Message) {
return llvm::make_error<llvm::StringError>(Message,
llvm::inconvertibleErrorCode());
}
/// \brief This manages file paths in the file system. All paths in the scheme
/// are absolute (with leading '/').
class FileSystemScheme : public URIScheme {
public:
static const char *Scheme;
llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
llvm::StringRef /*HintPath*/) const override {
if (!Body.startswith("/"))
return make_string_error("File scheme: expect body to be an absolute "
"path starting with '/': " +
Body);
// For Windows paths e.g. /X:
if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':')
Body.consume_front("/");
llvm::SmallVector<char, 16> Path(Body.begin(), Body.end());
llvm::sys::path::native(Path);
return std::string(Path.begin(), Path.end());
}
llvm::Expected<FileURI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
using namespace llvm::sys;
std::string Body;
// For Windows paths e.g. X:
if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':')
Body = "/";
Body += path::convert_to_slash(AbsolutePath);
return FileURI::create(Scheme, /*Authority=*/"", Body);
}
};
const char *FileSystemScheme::Scheme = "file";
static URISchemeRegistry::Add<FileSystemScheme>
X(FileSystemScheme::Scheme,
"URI scheme for absolute paths in the file system.");
llvm::Expected<std::unique_ptr<URIScheme>>
findSchemeByName(llvm::StringRef Scheme) {
for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end();
I != E; ++I) {
if (I->getName() != Scheme)
continue;
return I->instantiate();
}
return make_string_error("Can't find scheme: " + Scheme);
}
bool shouldEscape(unsigned char C) {
// Unreserved characters.
if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'))
return false;
switch (C) {
case '-':
case '_':
case '.':
case '~':
case '/': // '/' is only reserved when parsing.
return false;
}
return true;
}
/// Encodes a string according to percent-encoding.
/// - Unreserved characters are not escaped.
/// - Reserved characters always escaped with exceptions like '/'.
/// - All other characters are escaped.
std::string percentEncode(llvm::StringRef Content) {
std::string Result;
llvm::raw_string_ostream OS(Result);
for (unsigned char C : Content)
if (shouldEscape(C))
OS << '%' << llvm::format_hex_no_prefix(C, 2);
else
OS << C;
OS.flush();
return Result;
}
/// Decodes a string according to percent-encoding.
std::string percentDecode(llvm::StringRef Content) {
std::string Result;
for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
if (*I != '%') {
Result += *I;
continue;
}
if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
llvm::isHexDigit(*(I + 2))) {
Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
I += 2;
} else
Result.push_back(*I);
}
return Result;
}
} // namespace
llvm::Expected<FileURI> FileURI::create(llvm::StringRef Scheme,
llvm::StringRef Authority,
llvm::StringRef Body) {
if (Scheme.empty())
return make_string_error("Scheme must be specified in a URI.");
if (!Authority.empty() && !Body.startswith("/"))
return make_string_error(
"URI body must start with '/' when authority is present.");
FileURI U;
U.Scheme = Scheme;
U.Authority = Authority;
U.Body = Body;
return U;
}
std::string FileURI::toString() const {
std::string Result;
llvm::raw_string_ostream OS(Result);
OS << percentEncode(Scheme) << ":";
if (Authority.empty() && Body.empty())
return OS.str();
// If authority if empty, we only print body if it starts with "/"; otherwise,
// the URI is invalid.
if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
OS << "//" << percentEncode(Authority);
OS << percentEncode(Body);
OS.flush();
return Result;
}
llvm::Expected<FileURI> FileURI::parse(llvm::StringRef OrigUri) {
FileURI U;
llvm::StringRef Uri = OrigUri;
auto Pos = Uri.find(':');
if (Pos == 0 || Pos == llvm::StringRef::npos)
return make_string_error("Scheme must be provided in URI: " + OrigUri);
U.Scheme = percentDecode(Uri.substr(0, Pos));
Uri = Uri.substr(Pos + 1);
if (Uri.consume_front("//")) {
Pos = Uri.find('/');
U.Authority = percentDecode(Uri.substr(0, Pos));
Uri = Uri.substr(Pos);
}
U.Body = percentDecode(Uri);
return U;
}
llvm::Expected<FileURI> FileURI::create(llvm::StringRef AbsolutePath,
llvm::StringRef Scheme) {
if (!llvm::sys::path::is_absolute(AbsolutePath))
return make_string_error("Not a valid absolute path: " + AbsolutePath);
auto S = findSchemeByName(Scheme);
if (!S)
return S.takeError();
return S->get()->uriFromAbsolutePath(AbsolutePath);
}
llvm::Expected<std::string> FileURI::resolve(const FileURI &Uri,
llvm::StringRef HintPath) {
auto S = findSchemeByName(Uri.Scheme);
if (!S)
return S.takeError();
return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,101 @@
//===--- URI.h - File URIs with schemes --------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Registry.h"
namespace clang {
namespace clangd {
/// A URI describes the location of a source file.
/// In the simplest case, this is a "file" URI that directly encodes the
/// absolute path to a file. More abstract cases are possible: a shared index
/// service might expose repo:// URIs that are relative to the source control
/// root.
///
/// Clangd handles URIs of the form <scheme>:[//<authority>]<body>. It doesn't
/// further split the authority or body into constituent parts (e.g. query
/// strings is included in the body).
class FileURI {
public:
/// Returns decoded scheme e.g. "https"
llvm::StringRef scheme() const { return Scheme; }
/// Returns decoded authority e.g. "reviews.lvm.org"
llvm::StringRef authority() const { return Authority; }
/// Returns decoded body e.g. "/D41946"
llvm::StringRef body() const { return Body; }
/// Returns a string URI with all components percent-encoded.
std::string toString() const;
/// Create a FileURI from unescaped scheme+authority+body.
static llvm::Expected<FileURI> create(llvm::StringRef Scheme,
llvm::StringRef Authority,
llvm::StringRef Body);
/// Creates a FileURI for a file in the given scheme. \p Scheme must be
/// registered. The URI is percent-encoded.
static llvm::Expected<FileURI> create(llvm::StringRef AbsolutePath,
llvm::StringRef Scheme = "file");
/// Parse a URI string "<scheme>:[//<authority>/]<path>". Percent-encoded
/// characters in the URI will be decoded.
static llvm::Expected<FileURI> parse(llvm::StringRef Uri);
/// Resolves the absolute path of \p U. If there is no matching scheme, or the
/// URI is invalid in the scheme, this returns an error.
///
/// \p HintPath A related path, such as the current file or working directory,
/// which can help disambiguate when the same file exists in many workspaces.
static llvm::Expected<std::string> resolve(const FileURI &U,
llvm::StringRef HintPath = "");
friend bool operator==(const FileURI &LHS, const FileURI &RHS) {
return std::tie(LHS.Scheme, LHS.Authority, LHS.Body) ==
std::tie(RHS.Scheme, RHS.Authority, RHS.Body);
}
private:
FileURI() = default;
std::string Scheme;
std::string Authority;
std::string Body;
};
/// URIScheme is an extension point for teaching clangd to recognize a custom
/// URI scheme. This is expected to be implemented and exposed via the
/// URISchemeRegistry.
class URIScheme {
public:
virtual ~URIScheme() = default;
/// Returns the absolute path of the file corresponding to the URI
/// authority+body in the file system. See FileURI::resolve for semantics of
/// \p HintPath.
virtual llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
llvm::StringRef HintPath) const = 0;
virtual llvm::Expected<FileURI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const = 0;
};
/// By default, a "file" scheme is supported where URI paths are always absolute
/// in the file system.
typedef llvm::Registry<URIScheme> URISchemeRegistry;
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H

View File

@ -18,6 +18,7 @@ add_extra_unittest(ClangdTests
FuzzyMatchTests.cpp
IndexTests.cpp
JSONExprTests.cpp
URITests.cpp
TestFS.cpp
TraceTests.cpp
SourceCodeTests.cpp

View File

@ -0,0 +1,220 @@
//===-- URITests.cpp ---------------------------------*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "TestFS.h"
#include "URI.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
namespace clangd {
namespace {
using ::testing::AllOf;
MATCHER_P(Scheme, S, "") { return arg.scheme() == S; }
MATCHER_P(Authority, A, "") { return arg.authority() == A; }
MATCHER_P(Body, B, "") { return arg.body() == B; }
// Assume all files in the schema have a "test-root/" root directory, and the
// schema path is the relative path to the root directory.
// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
class TestScheme : public URIScheme {
public:
static const char *Scheme;
static const char *TestRoot;
llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
llvm::StringRef HintPath) const override {
auto Pos = HintPath.find(TestRoot);
assert(Pos != llvm::StringRef::npos);
return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
.str();
}
llvm::Expected<FileURI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
auto Pos = AbsolutePath.find(TestRoot);
assert(Pos != llvm::StringRef::npos);
return FileURI::create(
Scheme, /*Authority=*/"",
AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
}
};
const char *TestScheme::Scheme = "test";
const char *TestScheme::TestRoot = "/test-root/";
static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
std::string createOrDie(llvm::StringRef AbsolutePath,
llvm::StringRef Scheme = "file") {
auto Uri = FileURI::create(AbsolutePath, Scheme);
if (!Uri)
llvm_unreachable(llvm::toString(Uri.takeError()).c_str());
return Uri->toString();
}
std::string createOrDie(llvm::StringRef Scheme, llvm::StringRef Authority,
llvm::StringRef Body) {
auto Uri = FileURI::create(Scheme, Authority, Body);
if (!Uri)
llvm_unreachable(llvm::toString(Uri.takeError()).c_str());
return Uri->toString();
}
FileURI parseOrDie(llvm::StringRef Uri) {
auto U = FileURI::parse(Uri);
if (!U)
llvm_unreachable(llvm::toString(U.takeError()).c_str());
return *U;
}
TEST(PercentEncodingTest, Encode) {
EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a/b/c"), "x:a/b/c");
EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a!b;c~"), "x:a%21b%3bc~");
}
TEST(PercentEncodingTest, Decode) {
EXPECT_EQ(parseOrDie("x:a/b/c").body(), "a/b/c");
EXPECT_EQ(parseOrDie("%3a://%3a/%3").scheme(), ":");
EXPECT_EQ(parseOrDie("%3a://%3a/%3").authority(), ":");
EXPECT_EQ(parseOrDie("%3a://%3a/%3").body(), "/%3");
EXPECT_EQ(parseOrDie("x:a%21b%3ac~").body(), "a!b:c~");
}
std::string resolveOrDie(const FileURI &U, llvm::StringRef HintPath = "") {
auto Path = FileURI::resolve(U, HintPath);
if (!Path)
llvm_unreachable(llvm::toString(Path.takeError()).c_str());
return *Path;
}
TEST(URITest, Create) {
#ifdef LLVM_ON_WIN32
EXPECT_THAT(createOrDie("c:\\x\\y\\z"), "file:///c:/x/y/z");
#else
EXPECT_THAT(createOrDie("/x/y/z"), "file:///x/y/z");
EXPECT_THAT(createOrDie("/(x)/y/\\ z"), "file:///%28x%29/y/%5c%20z");
#endif
}
TEST(URITest, FailedCreate) {
auto Fail = [](llvm::Expected<FileURI> U) {
if (!U) {
llvm::consumeError(U.takeError());
return true;
}
return false;
};
// Create from scheme+authority+body:
//
// Scheme must be provided.
EXPECT_TRUE(Fail(FileURI::create("", "auth", "/a")));
// Body must start with '/' if authority is present.
EXPECT_TRUE(Fail(FileURI::create("scheme", "auth", "x/y/z")));
// Create from scheme registry:
//
EXPECT_TRUE(Fail(FileURI::create("/x/y/z", "no")));
// Path has to be absolute.
EXPECT_TRUE(Fail(FileURI::create("x/y/z")));
}
TEST(URITest, Parse) {
EXPECT_THAT(parseOrDie("file://auth/x/y/z"),
AllOf(Scheme("file"), Authority("auth"), Body("/x/y/z")));
EXPECT_THAT(parseOrDie("file://au%3dth/%28x%29/y/%5c%20z"),
AllOf(Scheme("file"), Authority("au=th"), Body("/(x)/y/\\ z")));
EXPECT_THAT(parseOrDie("file:///%28x%29/y/%5c%20z"),
AllOf(Scheme("file"), Authority(""), Body("/(x)/y/\\ z")));
EXPECT_THAT(parseOrDie("file:///x/y/z"),
AllOf(Scheme("file"), Authority(""), Body("/x/y/z")));
EXPECT_THAT(parseOrDie("file:"),
AllOf(Scheme("file"), Authority(""), Body("")));
EXPECT_THAT(parseOrDie("file:///x/y/z%2"),
AllOf(Scheme("file"), Authority(""), Body("/x/y/z%2")));
EXPECT_THAT(parseOrDie("http://llvm.org"),
AllOf(Scheme("http"), Authority("llvm.org"), Body("")));
EXPECT_THAT(parseOrDie("http://llvm.org/"),
AllOf(Scheme("http"), Authority("llvm.org"), Body("/")));
EXPECT_THAT(parseOrDie("http://llvm.org/D"),
AllOf(Scheme("http"), Authority("llvm.org"), Body("/D")));
EXPECT_THAT(parseOrDie("http:/"),
AllOf(Scheme("http"), Authority(""), Body("/")));
EXPECT_THAT(parseOrDie("urn:isbn:0451450523"),
AllOf(Scheme("urn"), Authority(""), Body("isbn:0451450523")));
EXPECT_THAT(
parseOrDie("file:///c:/windows/system32/"),
AllOf(Scheme("file"), Authority(""), Body("/c:/windows/system32/")));
}
TEST(URITest, ParseFailed) {
auto FailedParse = [](llvm::StringRef U) {
auto URI = FileURI::parse(U);
if (!URI) {
llvm::consumeError(URI.takeError());
return true;
}
return false;
};
// Expect ':' in URI.
EXPECT_TRUE(FailedParse("file//x/y/z"));
// Empty.
EXPECT_TRUE(FailedParse(""));
EXPECT_TRUE(FailedParse(":/a/b/c"));
}
TEST(URITest, Resolve) {
#ifdef LLVM_ON_WIN32
EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:\\x\\y\\z");
#else
EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c");
EXPECT_EQ(resolveOrDie(parseOrDie("file://auth/a/b/c")), "/a/b/c");
EXPECT_EQ(resolveOrDie(parseOrDie("test:a/b/c"), "/dir/test-root/x/y/z"),
"/dir/test-root/a/b/c");
EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth/%28x%29/y/%20z")),
"/(x)/y/ z");
EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:/x/y/z");
#endif
}
TEST(URITest, Platform) {
auto Path = getVirtualTestFilePath("x");
auto U = FileURI::create(Path, "file");
EXPECT_TRUE(static_cast<bool>(U));
EXPECT_THAT(resolveOrDie(*U), Path.str());
}
TEST(URITest, ResolveFailed) {
auto FailedResolve = [](llvm::StringRef Uri) {
auto Path = FileURI::resolve(parseOrDie(Uri));
if (!Path) {
llvm::consumeError(Path.takeError());
return true;
}
return false;
};
// Invalid scheme.
EXPECT_TRUE(FailedResolve("no:/a/b/c"));
// File path needs to be absolute.
EXPECT_TRUE(FailedResolve("file:a/b/c"));
}
} // namespace
} // namespace clangd
} // namespace clang