[Windows] Use llvm-symbolizer before using dbghelp

Summary:
llvm-symbolizer understands both PDBs and DWARF, so it's a better bet if
it's available. It prints out the function parameter types and column
numbers, so I needed to churn the expected test output a bit.

This makes most of the llvm-symbolizer subprocessing code
target-independent. Pipes on all platforms use fd_t, and we can use the
portable ReadFromFile / WriteToFile wrappers in symbolizer_sanitizer.cc.
Only the pipe creation and process spawning is Windows-specific.

Please check that the libcdep layering is still correct. I don't know
how to reproduce the build configuration that relies on that.

Reviewers: samsonov

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D11791

llvm-svn: 244616
This commit is contained in:
Reid Kleckner 2015-08-11 15:51:40 +00:00
parent 863bfdbfb4
commit 7d9e1e1259
12 changed files with 604 additions and 430 deletions

View File

@ -49,7 +49,6 @@ set(SANITIZER_LIBCDEP_SOURCES
sanitizer_stoptheworld_linux_libcdep.cc
sanitizer_symbolizer_libcdep.cc
sanitizer_symbolizer_posix_libcdep.cc
sanitizer_symbolizer_process_libcdep.cc
sanitizer_unwind_linux_libcdep.cc)
# Explicitly list all sanitizer_common headers. Not all of these are
@ -103,7 +102,6 @@ set(SANITIZER_HEADERS
sanitizer_symbolizer_internal.h
sanitizer_symbolizer_libbacktrace.h
sanitizer_symbolizer_mac.h
sanitizer_symbolizer_win.h
sanitizer_syscall_generic.inc
sanitizer_syscall_linux_x86_64.inc
sanitizer_thread_registry.h)

View File

@ -74,6 +74,20 @@ class SymbolizerProcess {
explicit SymbolizerProcess(const char *path, bool use_forkpty = false);
const char *SendCommand(const char *command);
protected:
virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
UNIMPLEMENTED();
}
/// The maximum number of arguments required to invoke a tool process.
enum { kArgVMax = 6 };
/// Fill in an argv array to invoke the child process.
virtual void GetArgV(const char *path_to_binary,
const char *(&argv)[kArgVMax]) const {
UNIMPLEMENTED();
}
private:
bool Restart();
const char *SendCommandImpl(const char *command);
@ -81,14 +95,6 @@ class SymbolizerProcess {
bool WriteToSymbolizer(const char *buffer, uptr length);
bool StartSymbolizerSubprocess();
virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
UNIMPLEMENTED();
}
virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const {
UNIMPLEMENTED();
}
const char *path_;
fd_t input_fd_;
fd_t output_fd_;
@ -104,6 +110,41 @@ class SymbolizerProcess {
bool use_forkpty_;
};
class LLVMSymbolizerProcess;
// This tool invokes llvm-symbolizer in a subprocess. It should be as portable
// as the llvm-symbolizer tool is.
class LLVMSymbolizer : public SymbolizerTool {
public:
explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
bool SymbolizeData(uptr addr, DataInfo *info) override;
private:
const char *SendCommand(bool is_data, const char *module_name,
uptr module_offset);
LLVMSymbolizerProcess *symbolizer_process_;
static const uptr kBufferSize = 16 * 1024;
char buffer_[kBufferSize];
};
// Parses one or more two-line strings in the following format:
// <function_name>
// <file_name>:<line_number>[:<column_number>]
// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
// them use the same output format. Returns true if any useful debug
// information was found.
void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
// Parses a two-line string in the following format:
// <symbol_name>
// <start_address> <size>
// Used by LLVMSymbolizer and InternalSymbolizer.
void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
} // namespace __sanitizer
#endif // SANITIZER_SYMBOLIZER_INTERNAL_H

View File

@ -184,4 +184,245 @@ Symbolizer *Symbolizer::GetOrInit() {
return symbolizer_;
}
// For now we assume the following protocol:
// For each request of the form
// <module_name> <module_offset>
// passed to STDIN, external symbolizer prints to STDOUT response:
// <function_name>
// <file_name>:<line_number>:<column_number>
// <function_name>
// <file_name>:<line_number>:<column_number>
// ...
// <empty line>
class LLVMSymbolizerProcess : public SymbolizerProcess {
public:
explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
private:
bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
// Empty line marks the end of llvm-symbolizer output.
return length >= 2 && buffer[length - 1] == '\n' &&
buffer[length - 2] == '\n';
}
void GetArgV(const char *path_to_binary,
const char *(&argv)[kArgVMax]) const override {
#if defined(__x86_64h__)
const char* const kSymbolizerArch = "--default-arch=x86_64h";
#elif defined(__x86_64__)
const char* const kSymbolizerArch = "--default-arch=x86_64";
#elif defined(__i386__)
const char* const kSymbolizerArch = "--default-arch=i386";
#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
const char* const kSymbolizerArch = "--default-arch=powerpc64";
#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
const char* const kSymbolizerArch = "--default-arch=powerpc64le";
#else
const char* const kSymbolizerArch = "--default-arch=unknown";
#endif
const char *const inline_flag = common_flags()->symbolize_inline_frames
? "--inlining=true"
: "--inlining=false";
int i = 0;
argv[i++] = path_to_binary;
argv[i++] = inline_flag;
argv[i++] = kSymbolizerArch;
argv[i++] = nullptr;
}
};
LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
: symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
// Windows, so extract tokens from the right hand side first. The column info is
// also optional.
static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
char *file_line_info = 0;
str = ExtractToken(str, "\n", &file_line_info);
CHECK(file_line_info);
// Parse the last :<int>, which must be there.
char *last_colon = internal_strrchr(file_line_info, ':');
CHECK(last_colon);
int line_or_column = internal_atoll(last_colon + 1);
// Truncate the string at the last colon and find the next-to-last colon.
*last_colon = '\0';
last_colon = internal_strrchr(file_line_info, ':');
if (last_colon && IsDigit(last_colon[1])) {
// If the second-to-last colon is followed by a digit, it must be the line
// number, and the previous parsed number was a column.
info->line = internal_atoll(last_colon + 1);
info->column = line_or_column;
*last_colon = '\0';
} else {
// Otherwise, we have line info but no column info.
info->line = line_or_column;
info->column = 0;
}
ExtractToken(file_line_info, "", &info->file);
InternalFree(file_line_info);
return str;
}
// Parses one or more two-line strings in the following format:
// <function_name>
// <file_name>:<line_number>[:<column_number>]
// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
// them use the same output format.
void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
bool top_frame = true;
SymbolizedStack *last = res;
while (true) {
char *function_name = 0;
str = ExtractToken(str, "\n", &function_name);
CHECK(function_name);
if (function_name[0] == '\0') {
// There are no more frames.
InternalFree(function_name);
break;
}
SymbolizedStack *cur;
if (top_frame) {
cur = res;
top_frame = false;
} else {
cur = SymbolizedStack::New(res->info.address);
cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
last->next = cur;
last = cur;
}
AddressInfo *info = &cur->info;
info->function = function_name;
str = ParseFileLineInfo(info, str);
// Functions and filenames can be "??", in which case we write 0
// to address info to mark that names are unknown.
if (0 == internal_strcmp(info->function, "??")) {
InternalFree(info->function);
info->function = 0;
}
if (0 == internal_strcmp(info->file, "??")) {
InternalFree(info->file);
info->file = 0;
}
}
}
// Parses a two-line string in the following format:
// <symbol_name>
// <start_address> <size>
// Used by LLVMSymbolizer and InternalSymbolizer.
void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
str = ExtractToken(str, "\n", &info->name);
str = ExtractUptr(str, " ", &info->start);
str = ExtractUptr(str, "\n", &info->size);
}
bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
stack->info.module_offset)) {
ParseSymbolizePCOutput(buf, stack);
return true;
}
return false;
}
bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
if (const char *buf =
SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
ParseSymbolizeDataOutput(buf, info);
info->start += (addr - info->module_offset); // Add the base address.
return true;
}
return false;
}
const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name,
uptr module_offset) {
CHECK(module_name);
internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
is_data ? "DATA " : "", module_name, module_offset);
return symbolizer_process_->SendCommand(buffer_);
}
SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
: path_(path),
input_fd_(kInvalidFd),
output_fd_(kInvalidFd),
times_restarted_(0),
failed_to_start_(false),
reported_invalid_path_(false),
use_forkpty_(use_forkpty) {
CHECK(path_);
CHECK_NE(path_[0], '\0');
}
const char *SymbolizerProcess::SendCommand(const char *command) {
for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
// Start or restart symbolizer if we failed to send command to it.
if (const char *res = SendCommandImpl(command))
return res;
Restart();
}
if (!failed_to_start_) {
Report("WARNING: Failed to use and restart external symbolizer!\n");
failed_to_start_ = true;
}
return 0;
}
const char *SymbolizerProcess::SendCommandImpl(const char *command) {
if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
return 0;
if (!WriteToSymbolizer(command, internal_strlen(command)))
return 0;
if (!ReadFromSymbolizer(buffer_, kBufferSize))
return 0;
return buffer_;
}
bool SymbolizerProcess::Restart() {
if (input_fd_ != kInvalidFd)
CloseFile(input_fd_);
if (output_fd_ != kInvalidFd)
CloseFile(output_fd_);
return StartSymbolizerSubprocess();
}
bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
if (max_length == 0)
return true;
uptr read_len = 0;
while (true) {
uptr just_read = 0;
bool success = ReadFromFile(input_fd_, buffer + read_len,
max_length - read_len - 1, &just_read);
// We can't read 0 bytes, as we don't expect external symbolizer to close
// its stdout.
if (!success || just_read == 0) {
Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
return false;
}
read_len += just_read;
if (ReachedEndOfOutput(buffer, read_len))
break;
}
buffer[read_len] = '\0';
return true;
}
bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
if (length == 0)
return true;
uptr write_len = 0;
bool success = WriteToFile(output_fd_, buffer, length, &write_len);
if (!success || write_len != length) {
Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
return false;
}
return true;
}
} // namespace __sanitizer

View File

@ -44,28 +44,33 @@ bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
class AtosSymbolizerProcess : public SymbolizerProcess {
public:
explicit AtosSymbolizerProcess(const char *path, pid_t parent_pid)
: SymbolizerProcess(path, /*use_forkpty*/ true),
parent_pid_(parent_pid) {}
: SymbolizerProcess(path, /*use_forkpty*/ true) {
// Put the string command line argument in the object so that it outlives
// the call to GetArgV.
internal_snprintf(pid_str_, sizeof(pid_str_), "%d", parent_pid_);
}
private:
bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
return (length >= 1 && buffer[length - 1] == '\n');
}
void ExecuteWithDefaultArgs(const char *path_to_binary) const override {
char pid_str[16];
internal_snprintf(pid_str, sizeof(pid_str), "%d", parent_pid_);
void GetArgV(const char *path_to_binary,
const char *(&argv)[kArgVMax]) const override {
int i = 0;
argv[i++] = path_to_binary;
argv[i++] = "-p";
argv[i++] = &pid_str_[0];
if (GetMacosVersion() == MACOS_VERSION_MAVERICKS) {
// On Mavericks atos prints a deprecation warning which we suppress by
// passing -d. The warning isn't present on other OSX versions, even the
// newer ones.
execl(path_to_binary, path_to_binary, "-p", pid_str, "-d", (char *)0);
} else {
execl(path_to_binary, path_to_binary, "-p", pid_str, (char *)0);
argv[i++] = "-d";
}
argv[i++] = nullptr;
}
pid_t parent_pid_;
char pid_str_[16];
};
static const char *kAtosErrorMessages[] = {

View File

@ -25,8 +25,15 @@
#include "sanitizer_symbolizer_libbacktrace.h"
#include "sanitizer_symbolizer_mac.h"
#include <errno.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#if SANITIZER_MAC
#include <util.h> // for forkpty()
#endif // SANITIZER_MAC
// C++ demangling function, as required by Itanium C++ ABI. This is weak,
// because we do not require a C++ ABI library to be linked to a program
// using sanitizers; if it's not present, we'll just use the mangled name.
@ -53,149 +60,130 @@ const char *DemangleCXXABI(const char *name) {
return name;
}
// Parses one or more two-line strings in the following format:
// <function_name>
// <file_name>:<line_number>[:<column_number>]
// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
// them use the same output format.
static void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
bool top_frame = true;
SymbolizedStack *last = res;
while (true) {
char *function_name = 0;
str = ExtractToken(str, "\n", &function_name);
CHECK(function_name);
if (function_name[0] == '\0') {
// There are no more frames.
InternalFree(function_name);
break;
}
SymbolizedStack *cur;
if (top_frame) {
cur = res;
top_frame = false;
} else {
cur = SymbolizedStack::New(res->info.address);
cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
last->next = cur;
last = cur;
}
AddressInfo *info = &cur->info;
info->function = function_name;
// Parse <file>:<line>:<column> buffer.
char *file_line_info = 0;
str = ExtractToken(str, "\n", &file_line_info);
CHECK(file_line_info);
const char *line_info = ExtractToken(file_line_info, ":", &info->file);
line_info = ExtractInt(line_info, ":", &info->line);
line_info = ExtractInt(line_info, "", &info->column);
InternalFree(file_line_info);
// Functions and filenames can be "??", in which case we write 0
// to address info to mark that names are unknown.
if (0 == internal_strcmp(info->function, "??")) {
InternalFree(info->function);
info->function = 0;
}
if (0 == internal_strcmp(info->file, "??")) {
InternalFree(info->file);
info->file = 0;
}
}
}
// Parses a two-line string in the following format:
// <symbol_name>
// <start_address> <size>
// Used by LLVMSymbolizer and InternalSymbolizer.
static void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
str = ExtractToken(str, "\n", &info->name);
str = ExtractUptr(str, " ", &info->start);
str = ExtractUptr(str, "\n", &info->size);
}
// For now we assume the following protocol:
// For each request of the form
// <module_name> <module_offset>
// passed to STDIN, external symbolizer prints to STDOUT response:
// <function_name>
// <file_name>:<line_number>:<column_number>
// <function_name>
// <file_name>:<line_number>:<column_number>
// ...
// <empty line>
class LLVMSymbolizerProcess : public SymbolizerProcess {
public:
explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
private:
bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
// Empty line marks the end of llvm-symbolizer output.
return length >= 2 && buffer[length - 1] == '\n' &&
buffer[length - 2] == '\n';
}
void ExecuteWithDefaultArgs(const char *path_to_binary) const override {
#if defined(__x86_64h__)
const char* const kSymbolizerArch = "--default-arch=x86_64h";
#elif defined(__x86_64__)
const char* const kSymbolizerArch = "--default-arch=x86_64";
#elif defined(__i386__)
const char* const kSymbolizerArch = "--default-arch=i386";
#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
const char* const kSymbolizerArch = "--default-arch=powerpc64";
#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
const char* const kSymbolizerArch = "--default-arch=powerpc64le";
#else
const char* const kSymbolizerArch = "--default-arch=unknown";
#endif
const char *const inline_flag = common_flags()->symbolize_inline_frames
? "--inlining=true"
: "--inlining=false";
execl(path_to_binary, path_to_binary, inline_flag, kSymbolizerArch,
(char *)0);
}
};
class LLVMSymbolizer : public SymbolizerTool {
public:
explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
: symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
bool SymbolizePC(uptr addr, SymbolizedStack *stack) override {
if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
stack->info.module_offset)) {
ParseSymbolizePCOutput(buf, stack);
return true;
bool SymbolizerProcess::StartSymbolizerSubprocess() {
if (!FileExists(path_)) {
if (!reported_invalid_path_) {
Report("WARNING: invalid path to external symbolizer!\n");
reported_invalid_path_ = true;
}
return false;
}
bool SymbolizeData(uptr addr, DataInfo *info) override {
if (const char *buf =
SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
ParseSymbolizeDataOutput(buf, info);
info->start += (addr - info->module_offset); // Add the base address.
return true;
int pid;
if (use_forkpty_) {
#if SANITIZER_MAC
fd_t fd = kInvalidFd;
// Use forkpty to disable buffering in the new terminal.
pid = forkpty(&fd, 0, 0, 0);
if (pid == -1) {
// forkpty() failed.
Report("WARNING: failed to fork external symbolizer (errno: %d)\n",
errno);
return false;
} else if (pid == 0) {
// Child subprocess.
const char *argv[kArgVMax];
GetArgV(path_, argv);
execv(path_, const_cast<char **>(&argv[0]));
internal__exit(1);
}
// Continue execution in parent process.
input_fd_ = output_fd_ = fd;
// Disable echo in the new terminal, disable CR.
struct termios termflags;
tcgetattr(fd, &termflags);
termflags.c_oflag &= ~ONLCR;
termflags.c_lflag &= ~ECHO;
tcsetattr(fd, TCSANOW, &termflags);
#else // SANITIZER_MAC
UNIMPLEMENTED();
#endif // SANITIZER_MAC
} else {
int *infd = NULL;
int *outfd = NULL;
// The client program may close its stdin and/or stdout and/or stderr
// thus allowing socketpair to reuse file descriptors 0, 1 or 2.
// In this case the communication between the forked processes may be
// broken if either the parent or the child tries to close or duplicate
// these descriptors. The loop below produces two pairs of file
// descriptors, each greater than 2 (stderr).
int sock_pair[5][2];
for (int i = 0; i < 5; i++) {
if (pipe(sock_pair[i]) == -1) {
for (int j = 0; j < i; j++) {
internal_close(sock_pair[j][0]);
internal_close(sock_pair[j][1]);
}
Report("WARNING: Can't create a socket pair to start "
"external symbolizer (errno: %d)\n", errno);
return false;
} else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
if (infd == NULL) {
infd = sock_pair[i];
} else {
outfd = sock_pair[i];
for (int j = 0; j < i; j++) {
if (sock_pair[j] == infd) continue;
internal_close(sock_pair[j][0]);
internal_close(sock_pair[j][1]);
}
break;
}
}
}
CHECK(infd);
CHECK(outfd);
// Real fork() may call user callbacks registered with pthread_atfork().
pid = internal_fork();
if (pid == -1) {
// Fork() failed.
internal_close(infd[0]);
internal_close(infd[1]);
internal_close(outfd[0]);
internal_close(outfd[1]);
Report("WARNING: failed to fork external symbolizer "
" (errno: %d)\n", errno);
return false;
} else if (pid == 0) {
// Child subprocess.
internal_close(STDOUT_FILENO);
internal_close(STDIN_FILENO);
internal_dup2(outfd[0], STDIN_FILENO);
internal_dup2(infd[1], STDOUT_FILENO);
internal_close(outfd[0]);
internal_close(outfd[1]);
internal_close(infd[0]);
internal_close(infd[1]);
for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--)
internal_close(fd);
const char *argv[kArgVMax];
GetArgV(path_, argv);
execv(path_, const_cast<char **>(&argv[0]));
internal__exit(1);
}
// Continue execution in parent process.
internal_close(outfd[0]);
internal_close(infd[1]);
input_fd_ = infd[0];
output_fd_ = outfd[1];
}
// Check that symbolizer subprocess started successfully.
int pid_status;
SleepForMillis(kSymbolizerStartupTimeMillis);
int exited_pid = waitpid(pid, &pid_status, WNOHANG);
if (exited_pid != 0) {
// Either waitpid failed, or child has already exited.
Report("WARNING: external symbolizer didn't start up correctly!\n");
return false;
}
private:
const char *SendCommand(bool is_data, const char *module_name,
uptr module_offset) {
CHECK(module_name);
internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
is_data ? "DATA " : "", module_name, module_offset);
return symbolizer_process_->SendCommand(buffer_);
}
LLVMSymbolizerProcess *symbolizer_process_;
static const uptr kBufferSize = 16 * 1024;
char buffer_[kBufferSize];
};
return true;
}
class Addr2LineProcess : public SymbolizerProcess {
public:
@ -217,8 +205,13 @@ class Addr2LineProcess : public SymbolizerProcess {
return false;
}
void ExecuteWithDefaultArgs(const char *path_to_binary) const override {
execl(path_to_binary, path_to_binary, "-Cfe", module_name_, (char *)0);
void GetArgV(const char *path_to_binary,
const char *(&argv)[kArgVMax]) const override {
int i = 0;
argv[i++] = path_to_binary;
argv[i++] = "-Cfe";
argv[i++] = module_name_;
argv[i++] = nullptr;
}
const char *module_name_; // Owned, leaked.

View File

@ -1,231 +0,0 @@
//===-- sanitizer_symbolizer_process_libcdep.cc ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implementation of SymbolizerProcess used by external symbolizers.
//
//===----------------------------------------------------------------------===//
#include "sanitizer_platform.h"
#if SANITIZER_POSIX
#include "sanitizer_posix.h"
#include "sanitizer_symbolizer_internal.h"
#include <errno.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#if SANITIZER_MAC
#include <util.h> // for forkpty()
#endif // SANITIZER_MAC
namespace __sanitizer {
SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
: path_(path),
input_fd_(kInvalidFd),
output_fd_(kInvalidFd),
times_restarted_(0),
failed_to_start_(false),
reported_invalid_path_(false),
use_forkpty_(use_forkpty) {
CHECK(path_);
CHECK_NE(path_[0], '\0');
}
const char *SymbolizerProcess::SendCommand(const char *command) {
for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
// Start or restart symbolizer if we failed to send command to it.
if (const char *res = SendCommandImpl(command))
return res;
Restart();
}
if (!failed_to_start_) {
Report("WARNING: Failed to use and restart external symbolizer!\n");
failed_to_start_ = true;
}
return 0;
}
bool SymbolizerProcess::Restart() {
if (input_fd_ != kInvalidFd)
CloseFile(input_fd_);
if (output_fd_ != kInvalidFd)
CloseFile(output_fd_);
return StartSymbolizerSubprocess();
}
const char *SymbolizerProcess::SendCommandImpl(const char *command) {
if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
return 0;
if (!WriteToSymbolizer(command, internal_strlen(command)))
return 0;
if (!ReadFromSymbolizer(buffer_, kBufferSize))
return 0;
return buffer_;
}
bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
if (max_length == 0)
return true;
uptr read_len = 0;
while (true) {
uptr just_read = 0;
bool success = ReadFromFile(input_fd_, buffer + read_len,
max_length - read_len - 1, &just_read);
// We can't read 0 bytes, as we don't expect external symbolizer to close
// its stdout.
if (!success || just_read == 0) {
Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
return false;
}
read_len += just_read;
if (ReachedEndOfOutput(buffer, read_len))
break;
}
buffer[read_len] = '\0';
return true;
}
bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
if (length == 0)
return true;
uptr write_len = 0;
bool success = WriteToFile(output_fd_, buffer, length, &write_len);
if (!success || write_len != length) {
Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
return false;
}
return true;
}
bool SymbolizerProcess::StartSymbolizerSubprocess() {
if (!FileExists(path_)) {
if (!reported_invalid_path_) {
Report("WARNING: invalid path to external symbolizer!\n");
reported_invalid_path_ = true;
}
return false;
}
int pid;
if (use_forkpty_) {
#if SANITIZER_MAC
fd_t fd = kInvalidFd;
// Use forkpty to disable buffering in the new terminal.
pid = forkpty(&fd, 0, 0, 0);
if (pid == -1) {
// forkpty() failed.
Report("WARNING: failed to fork external symbolizer (errno: %d)\n",
errno);
return false;
} else if (pid == 0) {
// Child subprocess.
ExecuteWithDefaultArgs(path_);
internal__exit(1);
}
// Continue execution in parent process.
input_fd_ = output_fd_ = fd;
// Disable echo in the new terminal, disable CR.
struct termios termflags;
tcgetattr(fd, &termflags);
termflags.c_oflag &= ~ONLCR;
termflags.c_lflag &= ~ECHO;
tcsetattr(fd, TCSANOW, &termflags);
#else // SANITIZER_MAC
UNIMPLEMENTED();
#endif // SANITIZER_MAC
} else {
int *infd = NULL;
int *outfd = NULL;
// The client program may close its stdin and/or stdout and/or stderr
// thus allowing socketpair to reuse file descriptors 0, 1 or 2.
// In this case the communication between the forked processes may be
// broken if either the parent or the child tries to close or duplicate
// these descriptors. The loop below produces two pairs of file
// descriptors, each greater than 2 (stderr).
int sock_pair[5][2];
for (int i = 0; i < 5; i++) {
if (pipe(sock_pair[i]) == -1) {
for (int j = 0; j < i; j++) {
internal_close(sock_pair[j][0]);
internal_close(sock_pair[j][1]);
}
Report("WARNING: Can't create a socket pair to start "
"external symbolizer (errno: %d)\n", errno);
return false;
} else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
if (infd == NULL) {
infd = sock_pair[i];
} else {
outfd = sock_pair[i];
for (int j = 0; j < i; j++) {
if (sock_pair[j] == infd) continue;
internal_close(sock_pair[j][0]);
internal_close(sock_pair[j][1]);
}
break;
}
}
}
CHECK(infd);
CHECK(outfd);
// Real fork() may call user callbacks registered with pthread_atfork().
pid = internal_fork();
if (pid == -1) {
// Fork() failed.
internal_close(infd[0]);
internal_close(infd[1]);
internal_close(outfd[0]);
internal_close(outfd[1]);
Report("WARNING: failed to fork external symbolizer "
" (errno: %d)\n", errno);
return false;
} else if (pid == 0) {
// Child subprocess.
internal_close(STDOUT_FILENO);
internal_close(STDIN_FILENO);
internal_dup2(outfd[0], STDIN_FILENO);
internal_dup2(infd[1], STDOUT_FILENO);
internal_close(outfd[0]);
internal_close(outfd[1]);
internal_close(infd[0]);
internal_close(infd[1]);
for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--)
internal_close(fd);
ExecuteWithDefaultArgs(path_);
internal__exit(1);
}
// Continue execution in parent process.
internal_close(outfd[0]);
internal_close(infd[1]);
input_fd_ = infd[0];
output_fd_ = outfd[1];
}
// Check that symbolizer subprocess started successfully.
int pid_status;
SleepForMillis(kSymbolizerStartupTimeMillis);
int exited_pid = waitpid(pid, &pid_status, WNOHANG);
if (exited_pid != 0) {
// Either waitpid failed, or child has already exited.
Report("WARNING: external symbolizer didn't start up correctly!\n");
return false;
}
return true;
}
} // namespace __sanitizer
#endif // SANITIZER_POSIX

View File

@ -18,13 +18,21 @@
#include <dbghelp.h>
#pragma comment(lib, "dbghelp.lib")
#include "sanitizer_symbolizer_win.h"
#include "sanitizer_symbolizer_internal.h"
namespace __sanitizer {
namespace {
class WinSymbolizerTool : public SymbolizerTool {
public:
bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
bool SymbolizeData(uptr addr, DataInfo *info) override {
return false;
}
const char *Demangle(const char *name) override;
};
bool is_dbghelp_initialized = false;
bool TrySymInitialize() {
@ -115,7 +123,9 @@ bool WinSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *frame) {
frame->info.file = internal_strdup(line_info.FileName);
frame->info.line = line_info.LineNumber;
}
return true;
// Only consider this a successful symbolization attempt if we got file info.
// Otherwise, try llvm-symbolizer.
return got_fileline;
}
const char *WinSymbolizerTool::Demangle(const char *name) {
@ -137,10 +147,128 @@ void Symbolizer::PlatformPrepareForSandboxing() {
// Do nothing.
}
namespace {
struct ScopedHandle {
ScopedHandle() : h_(nullptr) {}
explicit ScopedHandle(HANDLE h) : h_(h) {}
~ScopedHandle() {
if (h_)
::CloseHandle(h_);
}
HANDLE get() { return h_; }
HANDLE *receive() { return &h_; }
HANDLE release() {
HANDLE h = h_;
h_ = nullptr;
return h;
}
HANDLE h_;
};
} // namespace
bool SymbolizerProcess::StartSymbolizerSubprocess() {
// Create inherited pipes for stdin and stdout.
ScopedHandle stdin_read, stdin_write;
ScopedHandle stdout_read, stdout_write;
SECURITY_ATTRIBUTES attrs;
attrs.nLength = sizeof(SECURITY_ATTRIBUTES);
attrs.bInheritHandle = TRUE;
attrs.lpSecurityDescriptor = nullptr;
if (!::CreatePipe(stdin_read.receive(), stdin_write.receive(), &attrs, 0) ||
!::CreatePipe(stdout_read.receive(), stdout_write.receive(), &attrs, 0))
return false;
// Don't inherit the writing end of stdin or the reading end of stdout.
if (!SetHandleInformation(stdin_write.get(), HANDLE_FLAG_INHERIT, 0) ||
!SetHandleInformation(stdout_read.get(), HANDLE_FLAG_INHERIT, 0))
return false;
// Compute the command line. Wrap double quotes around everything.
const char *argv[kArgVMax];
GetArgV(path_, argv);
InternalScopedString command_line(kMaxPathLength * 3);
for (int i = 0; argv[i]; i++) {
const char *arg = argv[i];
int arglen = internal_strlen(arg);
// Check that tool command lines are simple and that complete escaping is
// unnecessary.
CHECK(!internal_strchr(arg, '"') && "quotes in args unsupported");
CHECK(!internal_strstr(arg, "\\\\") &&
"double backslashes in args unsupported");
CHECK(arglen > 0 && arg[arglen - 1] != '\\' &&
"args ending in backslash and empty args unsupported");
command_line.append("\"%s\" ", arg);
}
VReport(3, "Launching symbolizer command: %s\n", command_line.data());
// Launch llvm-symbolizer with stdin and stdout redirected.
STARTUPINFOA si;
memset(&si, 0, sizeof(si));
si.cb = sizeof(si);
si.dwFlags |= STARTF_USESTDHANDLES;
si.hStdInput = stdin_read.get();
si.hStdOutput = stdout_write.get();
PROCESS_INFORMATION pi;
memset(&pi, 0, sizeof(pi));
if (!CreateProcessA(path_, // Executable
command_line.data(), // Command line
nullptr, // Process handle not inheritable
nullptr, // Thread handle not inheritable
TRUE, // Set handle inheritance to TRUE
0, // Creation flags
nullptr, // Use parent's environment block
nullptr, // Use parent's starting directory
&si, &pi)) {
VReport(2, "WARNING: %s failed to create process for %s (error code: %d)\n",
SanitizerToolName, path_, GetLastError());
return false;
}
// Process creation succeeded, so transfer handle ownership into the fields.
input_fd_ = stdout_read.release();
output_fd_ = stdin_write.release();
// The llvm-symbolizer process is responsible for quitting itself when the
// stdin pipe is closed, so we don't need these handles. Close them to prevent
// leaks. If we ever want to try to kill the symbolizer process from the
// parent, we'll want to hang on to these handles.
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
return true;
}
static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
LowLevelAllocator *allocator) {
if (!common_flags()->symbolize) {
VReport(2, "Symbolizer is disabled.\n");
return;
}
// Add llvm-symbolizer in case the binary has dwarf.
const char *user_path = common_flags()->external_symbolizer_path;
const char *path =
user_path ? user_path : FindPathToBinary("llvm-symbolizer.exe");
if (path) {
VReport(2, "Using llvm-symbolizer at %spath: %s\n",
user_path ? "user-specified " : "", path);
list->push_back(new(*allocator) LLVMSymbolizer(path, allocator));
} else {
if (user_path && user_path[0] == '\0') {
VReport(2, "External symbolizer is explicitly disabled.\n");
} else {
VReport(2, "External symbolizer is not present.\n");
}
}
// Add the dbghelp based symbolizer.
list->push_back(new(*allocator) WinSymbolizerTool());
}
Symbolizer *Symbolizer::PlatformInit() {
IntrusiveList<SymbolizerTool> list;
list.clear();
list.push_back(new(symbolizer_allocator_) WinSymbolizerTool());
ChooseSymbolizerTools(&list, &symbolizer_allocator_);
return new(symbolizer_allocator_) Symbolizer(list);
}

View File

@ -1,31 +0,0 @@
//===-- sanitizer_symbolizer_win.h ------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Header file for the Windows symbolizer tool.
//
//===----------------------------------------------------------------------===//
#ifndef SANITIZER_SYMBOLIZER_WIN_H
#define SANITIZER_SYMBOLIZER_WIN_H
#include "sanitizer_symbolizer_internal.h"
namespace __sanitizer {
class WinSymbolizerTool : public SymbolizerTool {
public:
bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
bool SymbolizeData(uptr addr, DataInfo *info) override {
return false;
}
const char *Demangle(const char *name) override;
};
} // namespace __sanitizer
#endif // SANITIZER_SYMBOLIZER_WIN_H

View File

@ -13,6 +13,17 @@ macro(get_bits_for_arch arch bits)
endif()
endmacro()
set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
if(NOT COMPILER_RT_STANDALONE_BUILD)
list(APPEND ASAN_TEST_DEPS asan)
if(WIN32 AND COMPILER_RT_HAS_LLD_SOURCES)
list(APPEND ASAN_TEST_DEPS
lld
)
endif()
endif()
set(ASAN_DYNAMIC_TEST_DEPS ${ASAN_TEST_DEPS})
foreach(arch ${ASAN_SUPPORTED_ARCH})
if(ANDROID)
set(ASAN_TEST_TARGET_ARCH ${arch}-android)
@ -55,12 +66,6 @@ foreach(arch ${ASAN_SUPPORTED_ARCH})
endif()
endforeach()
set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
if(NOT COMPILER_RT_STANDALONE_BUILD)
list(APPEND ASAN_TEST_DEPS asan)
endif()
set(ASAN_DYNAMIC_TEST_DEPS ${ASAN_TEST_DEPS})
# Add unit tests.
if(COMPILER_RT_INCLUDE_TESTS)
set(ASAN_TEST_DYNAMIC False)

View File

@ -0,0 +1,23 @@
// If we have LLD, see that things more or less work.
//
// REQUIRES: lld
//
// FIXME: Use -fuse-ld=lld after the old COFF linker is removed.
// FIXME: Test will fail until we add flags for requesting dwarf or cv.
// RUNX: %clangxx_asan -O2 %s -o %t.exe -fuse-ld=lld -Wl,-debug
// RUN: %clangxx_asan -c -O2 %s -o %t.o -gdwarf
// RUN: lld-link2 %t.o -out:%t.exe -debug -defaultlib:libcmt %asan_lib %asan_cxx_lib
// RUN: not %run %t.exe 2>&1 | FileCheck %s
#include <stdlib.h>
int main() {
char *x = (char*)malloc(10 * sizeof(char));
free(x);
return x[5];
// CHECK: heap-use-after-free
// CHECK: free
// CHECK: main{{.*}}fuse-lld.cc:[[@LINE-4]]:3
// CHECK: malloc
// CHECK: main{{.*}}fuse-lld.cc:[[@LINE-7]]:20
}

View File

@ -10,6 +10,6 @@ static void NullDeref(int *ptr) {
}
int main() {
NullDeref((int*)0);
// CHECK: {{ #1 0x.* in main.*null_deref.cc:}}[[@LINE-1]]
// CHECK: {{ #1 0x.* in main.*null_deref.cc:}}[[@LINE-1]]:3
// CHECK: AddressSanitizer can not provide additional info.
}

View File

@ -1,4 +1,5 @@
// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
// RUN: %clangxx_asan -O0 %s -o %t
// RUN: env ASAN_OPTIONS=external_symbolizer_path=asdf not %run %t 2>&1 | FileCheck %s
#include <windows.h>
#include <dbghelp.h>
@ -13,7 +14,8 @@ int main() {
*(volatile int*)0 = 42;
// CHECK: ERROR: AddressSanitizer: access-violation on unknown address
// CHECK-NEXT: {{WARNING: Failed to use and restart external symbolizer}}
// CHECK-NEXT: {{WARNING: .*DbgHelp}}
// CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-3]]
// CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-4]]
// CHECK: AddressSanitizer can not provide additional info.
}