From 7d9e1e125930404a7ebce2e8d584b34a9065d530 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 11 Aug 2015 15:51:40 +0000 Subject: [PATCH] [Windows] Use llvm-symbolizer before using dbghelp Summary: llvm-symbolizer understands both PDBs and DWARF, so it's a better bet if it's available. It prints out the function parameter types and column numbers, so I needed to churn the expected test output a bit. This makes most of the llvm-symbolizer subprocessing code target-independent. Pipes on all platforms use fd_t, and we can use the portable ReadFromFile / WriteToFile wrappers in symbolizer_sanitizer.cc. Only the pipe creation and process spawning is Windows-specific. Please check that the libcdep layering is still correct. I don't know how to reproduce the build configuration that relies on that. Reviewers: samsonov Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11791 llvm-svn: 244616 --- .../lib/sanitizer_common/CMakeLists.txt | 2 - .../sanitizer_symbolizer_internal.h | 57 +++- .../sanitizer_symbolizer_libcdep.cc | 241 ++++++++++++++++ .../sanitizer_symbolizer_mac.cc | 23 +- .../sanitizer_symbolizer_posix_libcdep.cc | 267 +++++++++--------- .../sanitizer_symbolizer_process_libcdep.cc | 231 --------------- .../sanitizer_symbolizer_win.cc | 134 ++++++++- .../sanitizer_symbolizer_win.h | 31 -- compiler-rt/test/asan/CMakeLists.txt | 17 +- .../test/asan/TestCases/Windows/fuse-lld.cc | 23 ++ .../test/asan/TestCases/Windows/null_deref.cc | 2 +- .../Windows/report_after_syminitialize.cc | 6 +- 12 files changed, 604 insertions(+), 430 deletions(-) delete mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_process_libcdep.cc delete mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.h create mode 100644 compiler-rt/test/asan/TestCases/Windows/fuse-lld.cc diff --git a/compiler-rt/lib/sanitizer_common/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/CMakeLists.txt index 0c83d71153ea..737c48e4a24d 100644 --- a/compiler-rt/lib/sanitizer_common/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/CMakeLists.txt @@ -49,7 +49,6 @@ set(SANITIZER_LIBCDEP_SOURCES sanitizer_stoptheworld_linux_libcdep.cc sanitizer_symbolizer_libcdep.cc sanitizer_symbolizer_posix_libcdep.cc - sanitizer_symbolizer_process_libcdep.cc sanitizer_unwind_linux_libcdep.cc) # Explicitly list all sanitizer_common headers. Not all of these are @@ -103,7 +102,6 @@ set(SANITIZER_HEADERS sanitizer_symbolizer_internal.h sanitizer_symbolizer_libbacktrace.h sanitizer_symbolizer_mac.h - sanitizer_symbolizer_win.h sanitizer_syscall_generic.inc sanitizer_syscall_linux_x86_64.inc sanitizer_thread_registry.h) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h index 350e2a785dde..ac9a5e4a7664 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h @@ -74,6 +74,20 @@ class SymbolizerProcess { explicit SymbolizerProcess(const char *path, bool use_forkpty = false); const char *SendCommand(const char *command); + protected: + virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { + UNIMPLEMENTED(); + } + + /// The maximum number of arguments required to invoke a tool process. + enum { kArgVMax = 6 }; + + /// Fill in an argv array to invoke the child process. + virtual void GetArgV(const char *path_to_binary, + const char *(&argv)[kArgVMax]) const { + UNIMPLEMENTED(); + } + private: bool Restart(); const char *SendCommandImpl(const char *command); @@ -81,14 +95,6 @@ class SymbolizerProcess { bool WriteToSymbolizer(const char *buffer, uptr length); bool StartSymbolizerSubprocess(); - virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { - UNIMPLEMENTED(); - } - - virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const { - UNIMPLEMENTED(); - } - const char *path_; fd_t input_fd_; fd_t output_fd_; @@ -104,6 +110,41 @@ class SymbolizerProcess { bool use_forkpty_; }; +class LLVMSymbolizerProcess; + +// This tool invokes llvm-symbolizer in a subprocess. It should be as portable +// as the llvm-symbolizer tool is. +class LLVMSymbolizer : public SymbolizerTool { + public: + explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator); + + bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; + + bool SymbolizeData(uptr addr, DataInfo *info) override; + + private: + const char *SendCommand(bool is_data, const char *module_name, + uptr module_offset); + + LLVMSymbolizerProcess *symbolizer_process_; + static const uptr kBufferSize = 16 * 1024; + char buffer_[kBufferSize]; +}; + +// Parses one or more two-line strings in the following format: +// +// :[:] +// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of +// them use the same output format. Returns true if any useful debug +// information was found. +void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res); + +// Parses a two-line string in the following format: +// +// +// Used by LLVMSymbolizer and InternalSymbolizer. +void ParseSymbolizeDataOutput(const char *str, DataInfo *info); + } // namespace __sanitizer #endif // SANITIZER_SYMBOLIZER_INTERNAL_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc index 160f55d422ca..8c3ad81f952a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc @@ -184,4 +184,245 @@ Symbolizer *Symbolizer::GetOrInit() { return symbolizer_; } +// For now we assume the following protocol: +// For each request of the form +// +// passed to STDIN, external symbolizer prints to STDOUT response: +// +// :: +// +// :: +// ... +// +class LLVMSymbolizerProcess : public SymbolizerProcess { + public: + explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} + + private: + bool ReachedEndOfOutput(const char *buffer, uptr length) const override { + // Empty line marks the end of llvm-symbolizer output. + return length >= 2 && buffer[length - 1] == '\n' && + buffer[length - 2] == '\n'; + } + + void GetArgV(const char *path_to_binary, + const char *(&argv)[kArgVMax]) const override { +#if defined(__x86_64h__) + const char* const kSymbolizerArch = "--default-arch=x86_64h"; +#elif defined(__x86_64__) + const char* const kSymbolizerArch = "--default-arch=x86_64"; +#elif defined(__i386__) + const char* const kSymbolizerArch = "--default-arch=i386"; +#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__) + const char* const kSymbolizerArch = "--default-arch=powerpc64"; +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) + const char* const kSymbolizerArch = "--default-arch=powerpc64le"; +#else + const char* const kSymbolizerArch = "--default-arch=unknown"; +#endif + + const char *const inline_flag = common_flags()->symbolize_inline_frames + ? "--inlining=true" + : "--inlining=false"; + int i = 0; + argv[i++] = path_to_binary; + argv[i++] = inline_flag; + argv[i++] = kSymbolizerArch; + argv[i++] = nullptr; + } +}; + +LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) + : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} + +// Parse a :[:] buffer. The file path may contain colons on +// Windows, so extract tokens from the right hand side first. The column info is +// also optional. +static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { + char *file_line_info = 0; + str = ExtractToken(str, "\n", &file_line_info); + CHECK(file_line_info); + // Parse the last :, which must be there. + char *last_colon = internal_strrchr(file_line_info, ':'); + CHECK(last_colon); + int line_or_column = internal_atoll(last_colon + 1); + // Truncate the string at the last colon and find the next-to-last colon. + *last_colon = '\0'; + last_colon = internal_strrchr(file_line_info, ':'); + if (last_colon && IsDigit(last_colon[1])) { + // If the second-to-last colon is followed by a digit, it must be the line + // number, and the previous parsed number was a column. + info->line = internal_atoll(last_colon + 1); + info->column = line_or_column; + *last_colon = '\0'; + } else { + // Otherwise, we have line info but no column info. + info->line = line_or_column; + info->column = 0; + } + ExtractToken(file_line_info, "", &info->file); + InternalFree(file_line_info); + return str; +} + +// Parses one or more two-line strings in the following format: +// +// :[:] +// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of +// them use the same output format. +void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { + bool top_frame = true; + SymbolizedStack *last = res; + while (true) { + char *function_name = 0; + str = ExtractToken(str, "\n", &function_name); + CHECK(function_name); + if (function_name[0] == '\0') { + // There are no more frames. + InternalFree(function_name); + break; + } + SymbolizedStack *cur; + if (top_frame) { + cur = res; + top_frame = false; + } else { + cur = SymbolizedStack::New(res->info.address); + cur->info.FillModuleInfo(res->info.module, res->info.module_offset); + last->next = cur; + last = cur; + } + + AddressInfo *info = &cur->info; + info->function = function_name; + str = ParseFileLineInfo(info, str); + + // Functions and filenames can be "??", in which case we write 0 + // to address info to mark that names are unknown. + if (0 == internal_strcmp(info->function, "??")) { + InternalFree(info->function); + info->function = 0; + } + if (0 == internal_strcmp(info->file, "??")) { + InternalFree(info->file); + info->file = 0; + } + } +} + +// Parses a two-line string in the following format: +// +// +// Used by LLVMSymbolizer and InternalSymbolizer. +void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { + str = ExtractToken(str, "\n", &info->name); + str = ExtractUptr(str, " ", &info->start); + str = ExtractUptr(str, "\n", &info->size); +} + +bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { + if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module, + stack->info.module_offset)) { + ParseSymbolizePCOutput(buf, stack); + return true; + } + return false; +} + +bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { + if (const char *buf = + SendCommand(/*is_data*/ true, info->module, info->module_offset)) { + ParseSymbolizeDataOutput(buf, info); + info->start += (addr - info->module_offset); // Add the base address. + return true; + } + return false; +} + +const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name, + uptr module_offset) { + CHECK(module_name); + internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", + is_data ? "DATA " : "", module_name, module_offset); + return symbolizer_process_->SendCommand(buffer_); +} + +SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) + : path_(path), + input_fd_(kInvalidFd), + output_fd_(kInvalidFd), + times_restarted_(0), + failed_to_start_(false), + reported_invalid_path_(false), + use_forkpty_(use_forkpty) { + CHECK(path_); + CHECK_NE(path_[0], '\0'); +} + +const char *SymbolizerProcess::SendCommand(const char *command) { + for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { + // Start or restart symbolizer if we failed to send command to it. + if (const char *res = SendCommandImpl(command)) + return res; + Restart(); + } + if (!failed_to_start_) { + Report("WARNING: Failed to use and restart external symbolizer!\n"); + failed_to_start_ = true; + } + return 0; +} + +const char *SymbolizerProcess::SendCommandImpl(const char *command) { + if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) + return 0; + if (!WriteToSymbolizer(command, internal_strlen(command))) + return 0; + if (!ReadFromSymbolizer(buffer_, kBufferSize)) + return 0; + return buffer_; +} + +bool SymbolizerProcess::Restart() { + if (input_fd_ != kInvalidFd) + CloseFile(input_fd_); + if (output_fd_ != kInvalidFd) + CloseFile(output_fd_); + return StartSymbolizerSubprocess(); +} + +bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { + if (max_length == 0) + return true; + uptr read_len = 0; + while (true) { + uptr just_read = 0; + bool success = ReadFromFile(input_fd_, buffer + read_len, + max_length - read_len - 1, &just_read); + // We can't read 0 bytes, as we don't expect external symbolizer to close + // its stdout. + if (!success || just_read == 0) { + Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); + return false; + } + read_len += just_read; + if (ReachedEndOfOutput(buffer, read_len)) + break; + } + buffer[read_len] = '\0'; + return true; +} + +bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { + if (length == 0) + return true; + uptr write_len = 0; + bool success = WriteToFile(output_fd_, buffer, length, &write_len); + if (!success || write_len != length) { + Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); + return false; + } + return true; +} + } // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cc index 9a64192b0353..d1307368cf94 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cc @@ -44,28 +44,33 @@ bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { class AtosSymbolizerProcess : public SymbolizerProcess { public: explicit AtosSymbolizerProcess(const char *path, pid_t parent_pid) - : SymbolizerProcess(path, /*use_forkpty*/ true), - parent_pid_(parent_pid) {} + : SymbolizerProcess(path, /*use_forkpty*/ true) { + // Put the string command line argument in the object so that it outlives + // the call to GetArgV. + internal_snprintf(pid_str_, sizeof(pid_str_), "%d", parent_pid_); + } private: bool ReachedEndOfOutput(const char *buffer, uptr length) const override { return (length >= 1 && buffer[length - 1] == '\n'); } - void ExecuteWithDefaultArgs(const char *path_to_binary) const override { - char pid_str[16]; - internal_snprintf(pid_str, sizeof(pid_str), "%d", parent_pid_); + void GetArgV(const char *path_to_binary, + const char *(&argv)[kArgVMax]) const override { + int i = 0; + argv[i++] = path_to_binary; + argv[i++] = "-p"; + argv[i++] = &pid_str_[0]; if (GetMacosVersion() == MACOS_VERSION_MAVERICKS) { // On Mavericks atos prints a deprecation warning which we suppress by // passing -d. The warning isn't present on other OSX versions, even the // newer ones. - execl(path_to_binary, path_to_binary, "-p", pid_str, "-d", (char *)0); - } else { - execl(path_to_binary, path_to_binary, "-p", pid_str, (char *)0); + argv[i++] = "-d"; } + argv[i++] = nullptr; } - pid_t parent_pid_; + char pid_str_[16]; }; static const char *kAtosErrorMessages[] = { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc index cb8455a21ae2..da27a9ad93d5 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc @@ -25,8 +25,15 @@ #include "sanitizer_symbolizer_libbacktrace.h" #include "sanitizer_symbolizer_mac.h" +#include +#include +#include #include +#if SANITIZER_MAC +#include // for forkpty() +#endif // SANITIZER_MAC + // C++ demangling function, as required by Itanium C++ ABI. This is weak, // because we do not require a C++ ABI library to be linked to a program // using sanitizers; if it's not present, we'll just use the mangled name. @@ -53,149 +60,130 @@ const char *DemangleCXXABI(const char *name) { return name; } -// Parses one or more two-line strings in the following format: -// -// :[:] -// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of -// them use the same output format. -static void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { - bool top_frame = true; - SymbolizedStack *last = res; - while (true) { - char *function_name = 0; - str = ExtractToken(str, "\n", &function_name); - CHECK(function_name); - if (function_name[0] == '\0') { - // There are no more frames. - InternalFree(function_name); - break; - } - SymbolizedStack *cur; - if (top_frame) { - cur = res; - top_frame = false; - } else { - cur = SymbolizedStack::New(res->info.address); - cur->info.FillModuleInfo(res->info.module, res->info.module_offset); - last->next = cur; - last = cur; - } - - AddressInfo *info = &cur->info; - info->function = function_name; - // Parse :: buffer. - char *file_line_info = 0; - str = ExtractToken(str, "\n", &file_line_info); - CHECK(file_line_info); - const char *line_info = ExtractToken(file_line_info, ":", &info->file); - line_info = ExtractInt(line_info, ":", &info->line); - line_info = ExtractInt(line_info, "", &info->column); - InternalFree(file_line_info); - - // Functions and filenames can be "??", in which case we write 0 - // to address info to mark that names are unknown. - if (0 == internal_strcmp(info->function, "??")) { - InternalFree(info->function); - info->function = 0; - } - if (0 == internal_strcmp(info->file, "??")) { - InternalFree(info->file); - info->file = 0; - } - } -} - -// Parses a two-line string in the following format: -// -// -// Used by LLVMSymbolizer and InternalSymbolizer. -static void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { - str = ExtractToken(str, "\n", &info->name); - str = ExtractUptr(str, " ", &info->start); - str = ExtractUptr(str, "\n", &info->size); -} - -// For now we assume the following protocol: -// For each request of the form -// -// passed to STDIN, external symbolizer prints to STDOUT response: -// -// :: -// -// :: -// ... -// -class LLVMSymbolizerProcess : public SymbolizerProcess { - public: - explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} - - private: - bool ReachedEndOfOutput(const char *buffer, uptr length) const override { - // Empty line marks the end of llvm-symbolizer output. - return length >= 2 && buffer[length - 1] == '\n' && - buffer[length - 2] == '\n'; - } - - void ExecuteWithDefaultArgs(const char *path_to_binary) const override { -#if defined(__x86_64h__) - const char* const kSymbolizerArch = "--default-arch=x86_64h"; -#elif defined(__x86_64__) - const char* const kSymbolizerArch = "--default-arch=x86_64"; -#elif defined(__i386__) - const char* const kSymbolizerArch = "--default-arch=i386"; -#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__) - const char* const kSymbolizerArch = "--default-arch=powerpc64"; -#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) - const char* const kSymbolizerArch = "--default-arch=powerpc64le"; -#else - const char* const kSymbolizerArch = "--default-arch=unknown"; -#endif - - const char *const inline_flag = common_flags()->symbolize_inline_frames - ? "--inlining=true" - : "--inlining=false"; - execl(path_to_binary, path_to_binary, inline_flag, kSymbolizerArch, - (char *)0); - } -}; - -class LLVMSymbolizer : public SymbolizerTool { - public: - explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) - : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} - - bool SymbolizePC(uptr addr, SymbolizedStack *stack) override { - if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module, - stack->info.module_offset)) { - ParseSymbolizePCOutput(buf, stack); - return true; +bool SymbolizerProcess::StartSymbolizerSubprocess() { + if (!FileExists(path_)) { + if (!reported_invalid_path_) { + Report("WARNING: invalid path to external symbolizer!\n"); + reported_invalid_path_ = true; } return false; } - bool SymbolizeData(uptr addr, DataInfo *info) override { - if (const char *buf = - SendCommand(/*is_data*/ true, info->module, info->module_offset)) { - ParseSymbolizeDataOutput(buf, info); - info->start += (addr - info->module_offset); // Add the base address. - return true; + int pid; + if (use_forkpty_) { +#if SANITIZER_MAC + fd_t fd = kInvalidFd; + // Use forkpty to disable buffering in the new terminal. + pid = forkpty(&fd, 0, 0, 0); + if (pid == -1) { + // forkpty() failed. + Report("WARNING: failed to fork external symbolizer (errno: %d)\n", + errno); + return false; + } else if (pid == 0) { + // Child subprocess. + const char *argv[kArgVMax]; + GetArgV(path_, argv); + execv(path_, const_cast(&argv[0])); + internal__exit(1); } + + // Continue execution in parent process. + input_fd_ = output_fd_ = fd; + + // Disable echo in the new terminal, disable CR. + struct termios termflags; + tcgetattr(fd, &termflags); + termflags.c_oflag &= ~ONLCR; + termflags.c_lflag &= ~ECHO; + tcsetattr(fd, TCSANOW, &termflags); +#else // SANITIZER_MAC + UNIMPLEMENTED(); +#endif // SANITIZER_MAC + } else { + int *infd = NULL; + int *outfd = NULL; + // The client program may close its stdin and/or stdout and/or stderr + // thus allowing socketpair to reuse file descriptors 0, 1 or 2. + // In this case the communication between the forked processes may be + // broken if either the parent or the child tries to close or duplicate + // these descriptors. The loop below produces two pairs of file + // descriptors, each greater than 2 (stderr). + int sock_pair[5][2]; + for (int i = 0; i < 5; i++) { + if (pipe(sock_pair[i]) == -1) { + for (int j = 0; j < i; j++) { + internal_close(sock_pair[j][0]); + internal_close(sock_pair[j][1]); + } + Report("WARNING: Can't create a socket pair to start " + "external symbolizer (errno: %d)\n", errno); + return false; + } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) { + if (infd == NULL) { + infd = sock_pair[i]; + } else { + outfd = sock_pair[i]; + for (int j = 0; j < i; j++) { + if (sock_pair[j] == infd) continue; + internal_close(sock_pair[j][0]); + internal_close(sock_pair[j][1]); + } + break; + } + } + } + CHECK(infd); + CHECK(outfd); + + // Real fork() may call user callbacks registered with pthread_atfork(). + pid = internal_fork(); + if (pid == -1) { + // Fork() failed. + internal_close(infd[0]); + internal_close(infd[1]); + internal_close(outfd[0]); + internal_close(outfd[1]); + Report("WARNING: failed to fork external symbolizer " + " (errno: %d)\n", errno); + return false; + } else if (pid == 0) { + // Child subprocess. + internal_close(STDOUT_FILENO); + internal_close(STDIN_FILENO); + internal_dup2(outfd[0], STDIN_FILENO); + internal_dup2(infd[1], STDOUT_FILENO); + internal_close(outfd[0]); + internal_close(outfd[1]); + internal_close(infd[0]); + internal_close(infd[1]); + for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) + internal_close(fd); + const char *argv[kArgVMax]; + GetArgV(path_, argv); + execv(path_, const_cast(&argv[0])); + internal__exit(1); + } + + // Continue execution in parent process. + internal_close(outfd[0]); + internal_close(infd[1]); + input_fd_ = infd[0]; + output_fd_ = outfd[1]; + } + + // Check that symbolizer subprocess started successfully. + int pid_status; + SleepForMillis(kSymbolizerStartupTimeMillis); + int exited_pid = waitpid(pid, &pid_status, WNOHANG); + if (exited_pid != 0) { + // Either waitpid failed, or child has already exited. + Report("WARNING: external symbolizer didn't start up correctly!\n"); return false; } - private: - const char *SendCommand(bool is_data, const char *module_name, - uptr module_offset) { - CHECK(module_name); - internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", - is_data ? "DATA " : "", module_name, module_offset); - return symbolizer_process_->SendCommand(buffer_); - } - - LLVMSymbolizerProcess *symbolizer_process_; - static const uptr kBufferSize = 16 * 1024; - char buffer_[kBufferSize]; -}; + return true; +} class Addr2LineProcess : public SymbolizerProcess { public: @@ -217,8 +205,13 @@ class Addr2LineProcess : public SymbolizerProcess { return false; } - void ExecuteWithDefaultArgs(const char *path_to_binary) const override { - execl(path_to_binary, path_to_binary, "-Cfe", module_name_, (char *)0); + void GetArgV(const char *path_to_binary, + const char *(&argv)[kArgVMax]) const override { + int i = 0; + argv[i++] = path_to_binary; + argv[i++] = "-Cfe"; + argv[i++] = module_name_; + argv[i++] = nullptr; } const char *module_name_; // Owned, leaked. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_process_libcdep.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_process_libcdep.cc deleted file mode 100644 index 87ac491412b5..000000000000 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_process_libcdep.cc +++ /dev/null @@ -1,231 +0,0 @@ -//===-- sanitizer_symbolizer_process_libcdep.cc ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of SymbolizerProcess used by external symbolizers. -// -//===----------------------------------------------------------------------===// - -#include "sanitizer_platform.h" -#if SANITIZER_POSIX -#include "sanitizer_posix.h" -#include "sanitizer_symbolizer_internal.h" - -#include -#include -#include -#include - -#if SANITIZER_MAC -#include // for forkpty() -#endif // SANITIZER_MAC - -namespace __sanitizer { - -SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) - : path_(path), - input_fd_(kInvalidFd), - output_fd_(kInvalidFd), - times_restarted_(0), - failed_to_start_(false), - reported_invalid_path_(false), - use_forkpty_(use_forkpty) { - CHECK(path_); - CHECK_NE(path_[0], '\0'); -} - -const char *SymbolizerProcess::SendCommand(const char *command) { - for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { - // Start or restart symbolizer if we failed to send command to it. - if (const char *res = SendCommandImpl(command)) - return res; - Restart(); - } - if (!failed_to_start_) { - Report("WARNING: Failed to use and restart external symbolizer!\n"); - failed_to_start_ = true; - } - return 0; -} - -bool SymbolizerProcess::Restart() { - if (input_fd_ != kInvalidFd) - CloseFile(input_fd_); - if (output_fd_ != kInvalidFd) - CloseFile(output_fd_); - return StartSymbolizerSubprocess(); -} - -const char *SymbolizerProcess::SendCommandImpl(const char *command) { - if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) - return 0; - if (!WriteToSymbolizer(command, internal_strlen(command))) - return 0; - if (!ReadFromSymbolizer(buffer_, kBufferSize)) - return 0; - return buffer_; -} - -bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { - if (max_length == 0) - return true; - uptr read_len = 0; - while (true) { - uptr just_read = 0; - bool success = ReadFromFile(input_fd_, buffer + read_len, - max_length - read_len - 1, &just_read); - // We can't read 0 bytes, as we don't expect external symbolizer to close - // its stdout. - if (!success || just_read == 0) { - Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); - return false; - } - read_len += just_read; - if (ReachedEndOfOutput(buffer, read_len)) - break; - } - buffer[read_len] = '\0'; - return true; -} - -bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { - if (length == 0) - return true; - uptr write_len = 0; - bool success = WriteToFile(output_fd_, buffer, length, &write_len); - if (!success || write_len != length) { - Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); - return false; - } - return true; -} - -bool SymbolizerProcess::StartSymbolizerSubprocess() { - if (!FileExists(path_)) { - if (!reported_invalid_path_) { - Report("WARNING: invalid path to external symbolizer!\n"); - reported_invalid_path_ = true; - } - return false; - } - - int pid; - if (use_forkpty_) { -#if SANITIZER_MAC - fd_t fd = kInvalidFd; - // Use forkpty to disable buffering in the new terminal. - pid = forkpty(&fd, 0, 0, 0); - if (pid == -1) { - // forkpty() failed. - Report("WARNING: failed to fork external symbolizer (errno: %d)\n", - errno); - return false; - } else if (pid == 0) { - // Child subprocess. - ExecuteWithDefaultArgs(path_); - internal__exit(1); - } - - // Continue execution in parent process. - input_fd_ = output_fd_ = fd; - - // Disable echo in the new terminal, disable CR. - struct termios termflags; - tcgetattr(fd, &termflags); - termflags.c_oflag &= ~ONLCR; - termflags.c_lflag &= ~ECHO; - tcsetattr(fd, TCSANOW, &termflags); -#else // SANITIZER_MAC - UNIMPLEMENTED(); -#endif // SANITIZER_MAC - } else { - int *infd = NULL; - int *outfd = NULL; - // The client program may close its stdin and/or stdout and/or stderr - // thus allowing socketpair to reuse file descriptors 0, 1 or 2. - // In this case the communication between the forked processes may be - // broken if either the parent or the child tries to close or duplicate - // these descriptors. The loop below produces two pairs of file - // descriptors, each greater than 2 (stderr). - int sock_pair[5][2]; - for (int i = 0; i < 5; i++) { - if (pipe(sock_pair[i]) == -1) { - for (int j = 0; j < i; j++) { - internal_close(sock_pair[j][0]); - internal_close(sock_pair[j][1]); - } - Report("WARNING: Can't create a socket pair to start " - "external symbolizer (errno: %d)\n", errno); - return false; - } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) { - if (infd == NULL) { - infd = sock_pair[i]; - } else { - outfd = sock_pair[i]; - for (int j = 0; j < i; j++) { - if (sock_pair[j] == infd) continue; - internal_close(sock_pair[j][0]); - internal_close(sock_pair[j][1]); - } - break; - } - } - } - CHECK(infd); - CHECK(outfd); - - // Real fork() may call user callbacks registered with pthread_atfork(). - pid = internal_fork(); - if (pid == -1) { - // Fork() failed. - internal_close(infd[0]); - internal_close(infd[1]); - internal_close(outfd[0]); - internal_close(outfd[1]); - Report("WARNING: failed to fork external symbolizer " - " (errno: %d)\n", errno); - return false; - } else if (pid == 0) { - // Child subprocess. - internal_close(STDOUT_FILENO); - internal_close(STDIN_FILENO); - internal_dup2(outfd[0], STDIN_FILENO); - internal_dup2(infd[1], STDOUT_FILENO); - internal_close(outfd[0]); - internal_close(outfd[1]); - internal_close(infd[0]); - internal_close(infd[1]); - for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) - internal_close(fd); - ExecuteWithDefaultArgs(path_); - internal__exit(1); - } - - // Continue execution in parent process. - internal_close(outfd[0]); - internal_close(infd[1]); - input_fd_ = infd[0]; - output_fd_ = outfd[1]; - } - - // Check that symbolizer subprocess started successfully. - int pid_status; - SleepForMillis(kSymbolizerStartupTimeMillis); - int exited_pid = waitpid(pid, &pid_status, WNOHANG); - if (exited_pid != 0) { - // Either waitpid failed, or child has already exited. - Report("WARNING: external symbolizer didn't start up correctly!\n"); - return false; - } - - return true; -} - -} // namespace __sanitizer - -#endif // SANITIZER_POSIX diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cc index 31f374687e96..6df9b7e4cc53 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cc @@ -18,13 +18,21 @@ #include #pragma comment(lib, "dbghelp.lib") -#include "sanitizer_symbolizer_win.h" #include "sanitizer_symbolizer_internal.h" namespace __sanitizer { namespace { +class WinSymbolizerTool : public SymbolizerTool { + public: + bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; + bool SymbolizeData(uptr addr, DataInfo *info) override { + return false; + } + const char *Demangle(const char *name) override; +}; + bool is_dbghelp_initialized = false; bool TrySymInitialize() { @@ -115,7 +123,9 @@ bool WinSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *frame) { frame->info.file = internal_strdup(line_info.FileName); frame->info.line = line_info.LineNumber; } - return true; + // Only consider this a successful symbolization attempt if we got file info. + // Otherwise, try llvm-symbolizer. + return got_fileline; } const char *WinSymbolizerTool::Demangle(const char *name) { @@ -137,10 +147,128 @@ void Symbolizer::PlatformPrepareForSandboxing() { // Do nothing. } +namespace { +struct ScopedHandle { + ScopedHandle() : h_(nullptr) {} + explicit ScopedHandle(HANDLE h) : h_(h) {} + ~ScopedHandle() { + if (h_) + ::CloseHandle(h_); + } + HANDLE get() { return h_; } + HANDLE *receive() { return &h_; } + HANDLE release() { + HANDLE h = h_; + h_ = nullptr; + return h; + } + HANDLE h_; +}; +} // namespace + +bool SymbolizerProcess::StartSymbolizerSubprocess() { + // Create inherited pipes for stdin and stdout. + ScopedHandle stdin_read, stdin_write; + ScopedHandle stdout_read, stdout_write; + SECURITY_ATTRIBUTES attrs; + attrs.nLength = sizeof(SECURITY_ATTRIBUTES); + attrs.bInheritHandle = TRUE; + attrs.lpSecurityDescriptor = nullptr; + if (!::CreatePipe(stdin_read.receive(), stdin_write.receive(), &attrs, 0) || + !::CreatePipe(stdout_read.receive(), stdout_write.receive(), &attrs, 0)) + return false; + + // Don't inherit the writing end of stdin or the reading end of stdout. + if (!SetHandleInformation(stdin_write.get(), HANDLE_FLAG_INHERIT, 0) || + !SetHandleInformation(stdout_read.get(), HANDLE_FLAG_INHERIT, 0)) + return false; + + // Compute the command line. Wrap double quotes around everything. + const char *argv[kArgVMax]; + GetArgV(path_, argv); + InternalScopedString command_line(kMaxPathLength * 3); + for (int i = 0; argv[i]; i++) { + const char *arg = argv[i]; + int arglen = internal_strlen(arg); + // Check that tool command lines are simple and that complete escaping is + // unnecessary. + CHECK(!internal_strchr(arg, '"') && "quotes in args unsupported"); + CHECK(!internal_strstr(arg, "\\\\") && + "double backslashes in args unsupported"); + CHECK(arglen > 0 && arg[arglen - 1] != '\\' && + "args ending in backslash and empty args unsupported"); + command_line.append("\"%s\" ", arg); + } + VReport(3, "Launching symbolizer command: %s\n", command_line.data()); + + // Launch llvm-symbolizer with stdin and stdout redirected. + STARTUPINFOA si; + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags |= STARTF_USESTDHANDLES; + si.hStdInput = stdin_read.get(); + si.hStdOutput = stdout_write.get(); + PROCESS_INFORMATION pi; + memset(&pi, 0, sizeof(pi)); + if (!CreateProcessA(path_, // Executable + command_line.data(), // Command line + nullptr, // Process handle not inheritable + nullptr, // Thread handle not inheritable + TRUE, // Set handle inheritance to TRUE + 0, // Creation flags + nullptr, // Use parent's environment block + nullptr, // Use parent's starting directory + &si, &pi)) { + VReport(2, "WARNING: %s failed to create process for %s (error code: %d)\n", + SanitizerToolName, path_, GetLastError()); + return false; + } + + // Process creation succeeded, so transfer handle ownership into the fields. + input_fd_ = stdout_read.release(); + output_fd_ = stdin_write.release(); + + // The llvm-symbolizer process is responsible for quitting itself when the + // stdin pipe is closed, so we don't need these handles. Close them to prevent + // leaks. If we ever want to try to kill the symbolizer process from the + // parent, we'll want to hang on to these handles. + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + return true; +} + +static void ChooseSymbolizerTools(IntrusiveList *list, + LowLevelAllocator *allocator) { + if (!common_flags()->symbolize) { + VReport(2, "Symbolizer is disabled.\n"); + return; + } + + // Add llvm-symbolizer in case the binary has dwarf. + const char *user_path = common_flags()->external_symbolizer_path; + const char *path = + user_path ? user_path : FindPathToBinary("llvm-symbolizer.exe"); + if (path) { + VReport(2, "Using llvm-symbolizer at %spath: %s\n", + user_path ? "user-specified " : "", path); + list->push_back(new(*allocator) LLVMSymbolizer(path, allocator)); + } else { + if (user_path && user_path[0] == '\0') { + VReport(2, "External symbolizer is explicitly disabled.\n"); + } else { + VReport(2, "External symbolizer is not present.\n"); + } + } + + // Add the dbghelp based symbolizer. + list->push_back(new(*allocator) WinSymbolizerTool()); +} + Symbolizer *Symbolizer::PlatformInit() { IntrusiveList list; list.clear(); - list.push_back(new(symbolizer_allocator_) WinSymbolizerTool()); + ChooseSymbolizerTools(&list, &symbolizer_allocator_); + return new(symbolizer_allocator_) Symbolizer(list); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.h deleted file mode 100644 index 72ac5e5ee104..000000000000 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- sanitizer_symbolizer_win.h ------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Header file for the Windows symbolizer tool. -// -//===----------------------------------------------------------------------===// -#ifndef SANITIZER_SYMBOLIZER_WIN_H -#define SANITIZER_SYMBOLIZER_WIN_H - -#include "sanitizer_symbolizer_internal.h" - -namespace __sanitizer { - -class WinSymbolizerTool : public SymbolizerTool { - public: - bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; - bool SymbolizeData(uptr addr, DataInfo *info) override { - return false; - } - const char *Demangle(const char *name) override; -}; - -} // namespace __sanitizer - -#endif // SANITIZER_SYMBOLIZER_WIN_H diff --git a/compiler-rt/test/asan/CMakeLists.txt b/compiler-rt/test/asan/CMakeLists.txt index aff54db1e77a..03da4af6c1cc 100644 --- a/compiler-rt/test/asan/CMakeLists.txt +++ b/compiler-rt/test/asan/CMakeLists.txt @@ -13,6 +13,17 @@ macro(get_bits_for_arch arch bits) endif() endmacro() +set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) +if(NOT COMPILER_RT_STANDALONE_BUILD) + list(APPEND ASAN_TEST_DEPS asan) + if(WIN32 AND COMPILER_RT_HAS_LLD_SOURCES) + list(APPEND ASAN_TEST_DEPS + lld + ) + endif() +endif() +set(ASAN_DYNAMIC_TEST_DEPS ${ASAN_TEST_DEPS}) + foreach(arch ${ASAN_SUPPORTED_ARCH}) if(ANDROID) set(ASAN_TEST_TARGET_ARCH ${arch}-android) @@ -55,12 +66,6 @@ foreach(arch ${ASAN_SUPPORTED_ARCH}) endif() endforeach() -set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) -if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND ASAN_TEST_DEPS asan) -endif() -set(ASAN_DYNAMIC_TEST_DEPS ${ASAN_TEST_DEPS}) - # Add unit tests. if(COMPILER_RT_INCLUDE_TESTS) set(ASAN_TEST_DYNAMIC False) diff --git a/compiler-rt/test/asan/TestCases/Windows/fuse-lld.cc b/compiler-rt/test/asan/TestCases/Windows/fuse-lld.cc new file mode 100644 index 000000000000..b4b3d3907ffd --- /dev/null +++ b/compiler-rt/test/asan/TestCases/Windows/fuse-lld.cc @@ -0,0 +1,23 @@ +// If we have LLD, see that things more or less work. +// +// REQUIRES: lld +// +// FIXME: Use -fuse-ld=lld after the old COFF linker is removed. +// FIXME: Test will fail until we add flags for requesting dwarf or cv. +// RUNX: %clangxx_asan -O2 %s -o %t.exe -fuse-ld=lld -Wl,-debug +// RUN: %clangxx_asan -c -O2 %s -o %t.o -gdwarf +// RUN: lld-link2 %t.o -out:%t.exe -debug -defaultlib:libcmt %asan_lib %asan_cxx_lib +// RUN: not %run %t.exe 2>&1 | FileCheck %s + +#include + +int main() { + char *x = (char*)malloc(10 * sizeof(char)); + free(x); + return x[5]; + // CHECK: heap-use-after-free + // CHECK: free + // CHECK: main{{.*}}fuse-lld.cc:[[@LINE-4]]:3 + // CHECK: malloc + // CHECK: main{{.*}}fuse-lld.cc:[[@LINE-7]]:20 +} diff --git a/compiler-rt/test/asan/TestCases/Windows/null_deref.cc b/compiler-rt/test/asan/TestCases/Windows/null_deref.cc index 202000f59db7..9515602ce89c 100644 --- a/compiler-rt/test/asan/TestCases/Windows/null_deref.cc +++ b/compiler-rt/test/asan/TestCases/Windows/null_deref.cc @@ -10,6 +10,6 @@ static void NullDeref(int *ptr) { } int main() { NullDeref((int*)0); - // CHECK: {{ #1 0x.* in main.*null_deref.cc:}}[[@LINE-1]] + // CHECK: {{ #1 0x.* in main.*null_deref.cc:}}[[@LINE-1]]:3 // CHECK: AddressSanitizer can not provide additional info. } diff --git a/compiler-rt/test/asan/TestCases/Windows/report_after_syminitialize.cc b/compiler-rt/test/asan/TestCases/Windows/report_after_syminitialize.cc index faf5e35db5f5..3062318e0db1 100644 --- a/compiler-rt/test/asan/TestCases/Windows/report_after_syminitialize.cc +++ b/compiler-rt/test/asan/TestCases/Windows/report_after_syminitialize.cc @@ -1,4 +1,5 @@ -// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O0 %s -o %t +// RUN: env ASAN_OPTIONS=external_symbolizer_path=asdf not %run %t 2>&1 | FileCheck %s #include #include @@ -13,7 +14,8 @@ int main() { *(volatile int*)0 = 42; // CHECK: ERROR: AddressSanitizer: access-violation on unknown address + // CHECK-NEXT: {{WARNING: Failed to use and restart external symbolizer}} // CHECK-NEXT: {{WARNING: .*DbgHelp}} - // CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-3]] + // CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-4]] // CHECK: AddressSanitizer can not provide additional info. }