[clangd] Resolve driver symlinks, and look up unknown relative drivers in PATH.

Summary:
This fixes a reported bug: if clang and libc++ are installed under
/usr/lib/llvm-11/...  but there'- a symlink /usr/bin/clang++-11, then a
compile_commands.json with "/usr/bin/clang++-11 -stdlib=libc++" would previously
look for libc++ under /usr/include instead of /usr/lib/llvm-11/include.
The PATH change makes this work if the compiler is just "clang++-11" too.

As this is now doing IO potentially on every getCompileCommand(), we cache
the results for each distinct driver.

While here:
- Added a Memoize helper for this as multithreaded caching is a bit noisy.
- Used this helper to simplify QueryDriverDatabase and reduce blocking there.
  (This makes use of the fact that llvm::Regex is now threadsafe)

Reviewers: kadircet

Subscribers: jyknight, ormris, ilya-biryukov, MaskRay, jkorous, arphaman, jfb, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D75414
This commit is contained in:
Sam McCall 2020-05-28 14:24:30 +02:00
parent af7587d755
commit 806342b8ef
7 changed files with 242 additions and 37 deletions

View File

@ -514,7 +514,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
if (ClangdServerOpts.ResourceDir)
Mangler.ResourceDir = *ClangdServerOpts.ResourceDir;
CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags,
tooling::ArgumentsAdjuster(Mangler));
tooling::ArgumentsAdjuster(std::move(Mangler)));
{
// Switch caller's context with LSPServer's background context. Since we
// rather want to propagate information from LSPServer's context into the

View File

@ -119,6 +119,48 @@ std::string detectStandardResourceDir() {
return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy);
}
// The path passed to argv[0] is important:
// - its parent directory is Driver::Dir, used for library discovery
// - its basename affects CLI parsing (clang-cl) and other settings
// Where possible it should be an absolute path with sensible directory, but
// with the original basename.
static std::string resolveDriver(llvm::StringRef Driver, bool FollowSymlink,
llvm::Optional<std::string> ClangPath) {
auto SiblingOf = [&](llvm::StringRef AbsPath) {
llvm::SmallString<128> Result = llvm::sys::path::parent_path(AbsPath);
llvm::sys::path::append(Result, llvm::sys::path::filename(Driver));
return Result.str().str();
};
// First, eliminate relative paths.
std::string Storage;
if (!llvm::sys::path::is_absolute(Driver)) {
// If the driver is a generic like "g++" with no path, add clang dir.
if (ClangPath &&
(Driver == "clang" || Driver == "clang++" || Driver == "gcc" ||
Driver == "g++" || Driver == "cc" || Driver == "c++")) {
return SiblingOf(*ClangPath);
}
// Otherwise try to look it up on PATH. This won't change basename.
auto Absolute = llvm::sys::findProgramByName(Driver);
if (Absolute && llvm::sys::path::is_absolute(*Absolute))
Driver = Storage = std::move(*Absolute);
else if (ClangPath) // If we don't find it, use clang dir again.
return SiblingOf(*ClangPath);
else // Nothing to do: can't find the command and no detected dir.
return Driver.str();
}
// Now we have an absolute path, but it may be a symlink.
assert(llvm::sys::path::is_absolute(Driver));
if (FollowSymlink) {
llvm::SmallString<256> Resolved;
if (!llvm::sys::fs::real_path(Driver, Resolved))
return SiblingOf(Resolved);
}
return Driver.str();
}
} // namespace
CommandMangler CommandMangler::detect() {
@ -162,25 +204,22 @@ void CommandMangler::adjust(std::vector<std::string> &Cmd) const {
Cmd.push_back(*Sysroot);
}
// If the driver is a generic name like "g++" with no path, add a clang path.
// This makes it easier for us to find the standard libraries on mac.
if (ClangPath && llvm::sys::path::is_absolute(*ClangPath) && !Cmd.empty()) {
std::string &Driver = Cmd.front();
if (Driver == "clang" || Driver == "clang++" || Driver == "gcc" ||
Driver == "g++" || Driver == "cc" || Driver == "c++") {
llvm::SmallString<128> QualifiedDriver =
llvm::sys::path::parent_path(*ClangPath);
llvm::sys::path::append(QualifiedDriver, Driver);
Driver = std::string(QualifiedDriver.str());
}
if (!Cmd.empty()) {
bool FollowSymlink = !Has("-no-canonical-prefixes");
Cmd.front() =
(FollowSymlink ? ResolvedDrivers : ResolvedDriversNoFollow)
.get(Cmd.front(), [&, this] {
return resolveDriver(Cmd.front(), FollowSymlink, ClangPath);
});
}
}
CommandMangler::operator clang::tooling::ArgumentsAdjuster() {
return [Mangler{*this}](const std::vector<std::string> &Args,
llvm::StringRef File) {
CommandMangler::operator clang::tooling::ArgumentsAdjuster() && {
// ArgumentsAdjuster is a std::function and so must be copyable.
return [Mangler = std::make_shared<CommandMangler>(std::move(*this))](
const std::vector<std::string> &Args, llvm::StringRef File) {
auto Result = Args;
Mangler.adjust(Result);
Mangler->adjust(Result);
return Result;
};
}

View File

@ -8,8 +8,10 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H
#include "support/Threading.h"
#include "clang/Tooling/ArgumentsAdjusters.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/StringMap.h"
#include <string>
#include <vector>
@ -40,10 +42,12 @@ struct CommandMangler {
static CommandMangler detect();
void adjust(std::vector<std::string> &Cmd) const;
explicit operator clang::tooling::ArgumentsAdjuster();
explicit operator clang::tooling::ArgumentsAdjuster() &&;
private:
CommandMangler() = default;
Memoize<llvm::StringMap<std::string>> ResolvedDrivers;
Memoize<llvm::StringMap<std::string>> ResolvedDriversNoFollow;
};
} // namespace clangd

View File

@ -88,7 +88,7 @@ std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
std::vector<std::string>
extractSystemIncludes(PathRef Driver, llvm::StringRef Lang,
llvm::ArrayRef<std::string> CommandLine,
llvm::Regex &QueryDriverRegex) {
const llvm::Regex &QueryDriverRegex) {
trace::Span Tracer("Extract system includes");
SPAN_ATTACH(Tracer, "driver", Driver);
SPAN_ATTACH(Tracer, "lang", Lang);
@ -267,19 +267,12 @@ public:
llvm::SmallString<128> Driver(Cmd->CommandLine.front());
llvm::sys::fs::make_absolute(Cmd->Directory, Driver);
auto Key = std::make_pair(Driver.str().str(), Lang.str());
std::vector<std::string> SystemIncludes;
{
std::lock_guard<std::mutex> Lock(Mu);
auto It = DriverToIncludesCache.find(Key);
if (It != DriverToIncludesCache.end())
SystemIncludes = It->second;
else
DriverToIncludesCache[Key] = SystemIncludes = extractSystemIncludes(
Key.first, Key.second, Cmd->CommandLine, QueryDriverRegex);
}
std::vector<std::string> SystemIncludes =
QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] {
return extractSystemIncludes(Driver, Lang, Cmd->CommandLine,
QueryDriverRegex);
});
return addSystemIncludes(*Cmd, SystemIncludes);
}
@ -289,12 +282,9 @@ public:
}
private:
mutable std::mutex Mu;
// Caches includes extracted from a driver.
mutable std::map<std::pair<std::string, std::string>,
std::vector<std::string>>
DriverToIncludesCache;
mutable llvm::Regex QueryDriverRegex;
// Caches includes extracted from a driver. Key is driver:lang.
Memoize<llvm::StringMap<std::vector<std::string>>> QueriedDrivers;
llvm::Regex QueryDriverRegex;
std::unique_ptr<GlobalCompilationDatabase> Base;
CommandChanged::Subscription BaseChanged;

View File

@ -131,6 +131,44 @@ std::future<T> runAsync(llvm::unique_function<T()> Action) {
std::move(Action), Context::current().clone());
}
/// Memoize is a cache to store and reuse computation results based on a key.
///
/// Memoize<DenseMap<int, bool>> PrimeCache;
/// for (int I : RepetitiveNumbers)
/// if (PrimeCache.get(I, [&] { return expensiveIsPrime(I); }))
/// llvm::errs() << "Prime: " << I << "\n";
///
/// The computation will only be run once for each key.
/// This class is threadsafe. Concurrent calls for the same key may run the
/// computation multiple times, but each call will return the same result.
template <typename Container> class Memoize {
mutable Container Cache;
std::unique_ptr<std::mutex> Mu;
public:
Memoize() : Mu(std::make_unique<std::mutex>()) {}
template <typename T, typename Func>
typename Container::mapped_type get(T &&Key, Func Compute) const {
{
std::lock_guard<std::mutex> Lock(*Mu);
auto It = Cache.find(Key);
if (It != Cache.end())
return It->second;
}
// Don't hold the mutex while computing.
auto V = Compute();
{
std::lock_guard<std::mutex> Lock(*Mu);
auto R = Cache.try_emplace(std::forward<T>(Key), V);
// Insert into cache may fail if we raced with another thread.
if (!R.second)
return R.first->second; // Canonical value, from other thread.
}
return V;
}
};
} // namespace clangd
} // namespace clang
#endif

View File

@ -9,7 +9,11 @@
#include "CompileCommands.h"
#include "TestFS.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@ -103,13 +107,82 @@ TEST(CommandMangler, ClangPath) {
Cmd = {"unknown-binary", "foo.cc"};
Mangler.adjust(Cmd);
EXPECT_EQ("unknown-binary", Cmd.front());
EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front());
Cmd = {testPath("path/clang++"), "foo.cc"};
Mangler.adjust(Cmd);
EXPECT_EQ(testPath("path/clang++"), Cmd.front());
Cmd = {"foo/unknown-binary", "foo.cc"};
Mangler.adjust(Cmd);
EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front());
}
// Only run the PATH/symlink resolving test on unix, we need to fiddle
// with permissions and environment variables...
#ifdef LLVM_ON_UNIX
MATCHER(Ok, "") {
if (arg) {
*result_listener << arg.message();
return false;
}
return true;
}
TEST(CommandMangler, ClangPathResolve) {
// Set up filesystem:
// /temp/
// bin/
// foo -> temp/lib/bar
// lib/
// bar
llvm::SmallString<256> TempDir;
ASSERT_THAT(llvm::sys::fs::createUniqueDirectory("ClangPathResolve", TempDir),
Ok());
auto CleanDir = llvm::make_scope_exit(
[&] { llvm::sys::fs::remove_directories(TempDir); });
ASSERT_THAT(llvm::sys::fs::create_directory(TempDir + "/bin"), Ok());
ASSERT_THAT(llvm::sys::fs::create_directory(TempDir + "/lib"), Ok());
int FD;
ASSERT_THAT(llvm::sys::fs::openFileForWrite(TempDir + "/lib/bar", FD), Ok());
ASSERT_THAT(llvm::sys::Process::SafelyCloseFileDescriptor(FD), Ok());
::chmod((TempDir + "/lib/bar").str().c_str(), 0755); // executable
ASSERT_THAT(
llvm::sys::fs::create_link(TempDir + "/lib/bar", TempDir + "/bin/foo"),
Ok());
// Test the case where the driver is an absolute path to a symlink.
auto Mangler = CommandMangler::forTests();
Mangler.ClangPath = testPath("fake/clang");
std::vector<std::string> Cmd = {(TempDir + "/bin/foo").str(), "foo.cc"};
Mangler.adjust(Cmd);
// Directory based on resolved symlink, basename preserved.
EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front());
// Set PATH to point to temp/bin so we can find 'foo' on it.
ASSERT_TRUE(::getenv("PATH"));
auto RestorePath =
llvm::make_scope_exit([OldPath = std::string(::getenv("PATH"))] {
::setenv("PATH", OldPath.c_str(), 1);
});
::setenv("PATH", (TempDir + "/bin").str().c_str(), /*overwrite=*/1);
// Test the case where the driver is a $PATH-relative path to a symlink.
Mangler = CommandMangler::forTests();
Mangler.ClangPath = testPath("fake/clang");
// Driver found on PATH.
Cmd = {"foo", "foo.cc"};
Mangler.adjust(Cmd);
// Found the symlink and resolved the path as above.
EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front());
// Symlink not resolved with -no-canonical-prefixes.
Cmd = {"foo", "-no-canonical-prefixes", "foo.cc"};
Mangler.adjust(Cmd);
EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.front());
}
#endif
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "support/Threading.h"
#include "llvm/ADT/DenseMap.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <mutex>
@ -60,5 +62,64 @@ TEST_F(ThreadingTest, TaskRunner) {
std::lock_guard<std::mutex> Lock(Mutex);
ASSERT_EQ(Counter, TasksCnt * IncrementsPerTask);
}
TEST_F(ThreadingTest, Memoize) {
const unsigned NumThreads = 5;
const unsigned NumKeys = 100;
const unsigned NumIterations = 100;
Memoize<llvm::DenseMap<int, int>> Cache;
std::atomic<unsigned> ComputeCount(0);
std::atomic<int> ComputeResult[NumKeys];
std::fill(std::begin(ComputeResult), std::end(ComputeResult), -1);
AsyncTaskRunner Tasks;
for (unsigned I = 0; I < NumThreads; ++I)
Tasks.runAsync("worker" + std::to_string(I), [&] {
for (unsigned J = 0; J < NumIterations; J++)
for (unsigned K = 0; K < NumKeys; K++) {
int Result = Cache.get(K, [&] { return ++ComputeCount; });
EXPECT_THAT(ComputeResult[K].exchange(Result),
testing::AnyOf(-1, Result))
<< "Got inconsistent results from memoize";
}
});
Tasks.wait();
EXPECT_GE(ComputeCount, NumKeys) << "Computed each key once";
EXPECT_LE(ComputeCount, NumThreads * NumKeys)
<< "Worst case, computed each key in every thread";
for (int Result : ComputeResult)
EXPECT_GT(Result, 0) << "All results in expected domain";
}
TEST_F(ThreadingTest, MemoizeDeterministic) {
Memoize<llvm::DenseMap<int, char>> Cache;
// Spawn two parallel computations, A and B.
// Force concurrency: neither can finish until both have started.
// Verify that cache returns consistent results.
AsyncTaskRunner Tasks;
std::atomic<char> ValueA(0), ValueB(0);
Notification ReleaseA, ReleaseB;
Tasks.runAsync("A", [&] {
ValueA = Cache.get(0, [&] {
ReleaseB.notify();
ReleaseA.wait();
return 'A';
});
});
Tasks.runAsync("A", [&] {
ValueB = Cache.get(0, [&] {
ReleaseA.notify();
ReleaseB.wait();
return 'B';
});
});
Tasks.wait();
ASSERT_EQ(ValueA, ValueB);
ASSERT_THAT(ValueA.load(), testing::AnyOf('A', 'B'));
}
} // namespace clangd
} // namespace clang