forked from OSchip/llvm-project
412 lines
13 KiB
C++
412 lines
13 KiB
C++
//===-- sanitizer_symbolizer.cc -------------------------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is shared between AddressSanitizer and ThreadSanitizer
|
|
// run-time libraries. See sanitizer_symbolizer.h for details.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "sanitizer_common.h"
|
|
#include "sanitizer_placement_new.h"
|
|
#include "sanitizer_procmaps.h"
|
|
#include "sanitizer_symbolizer.h"
|
|
|
|
namespace __sanitizer {
|
|
|
|
void AddressInfo::Clear() {
|
|
InternalFree(module);
|
|
InternalFree(function);
|
|
InternalFree(file);
|
|
internal_memset(this, 0, sizeof(AddressInfo));
|
|
}
|
|
|
|
LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
|
|
full_name_ = internal_strdup(module_name);
|
|
base_address_ = base_address;
|
|
n_ranges_ = 0;
|
|
}
|
|
|
|
void LoadedModule::addAddressRange(uptr beg, uptr end) {
|
|
CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
|
|
ranges_[n_ranges_].beg = beg;
|
|
ranges_[n_ranges_].end = end;
|
|
n_ranges_++;
|
|
}
|
|
|
|
bool LoadedModule::containsAddress(uptr address) const {
|
|
for (uptr i = 0; i < n_ranges_; i++) {
|
|
if (ranges_[i].beg <= address && address < ranges_[i].end)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Extracts the prefix of "str" that consists of any characters not
|
|
// present in "delims" string, and copies this prefix to "result", allocating
|
|
// space for it.
|
|
// Returns a pointer to "str" after skipping extracted prefix and first
|
|
// delimiter char.
|
|
static const char *ExtractToken(const char *str, const char *delims,
|
|
char **result) {
|
|
uptr prefix_len = internal_strcspn(str, delims);
|
|
*result = (char*)InternalAlloc(prefix_len + 1);
|
|
internal_memcpy(*result, str, prefix_len);
|
|
(*result)[prefix_len] = '\0';
|
|
const char *prefix_end = str + prefix_len;
|
|
if (*prefix_end != '\0') prefix_end++;
|
|
return prefix_end;
|
|
}
|
|
|
|
// Same as ExtractToken, but converts extracted token to integer.
|
|
static const char *ExtractInt(const char *str, const char *delims,
|
|
int *result) {
|
|
char *buff;
|
|
const char *ret = ExtractToken(str, delims, &buff);
|
|
if (buff != 0) {
|
|
*result = (int)internal_atoll(buff);
|
|
}
|
|
InternalFree(buff);
|
|
return ret;
|
|
}
|
|
|
|
static const char *ExtractUptr(const char *str, const char *delims,
|
|
uptr *result) {
|
|
char *buff;
|
|
const char *ret = ExtractToken(str, delims, &buff);
|
|
if (buff != 0) {
|
|
*result = (uptr)internal_atoll(buff);
|
|
}
|
|
InternalFree(buff);
|
|
return ret;
|
|
}
|
|
|
|
// ExternalSymbolizer encapsulates communication between the tool and
|
|
// external symbolizer program, running in a different subprocess,
|
|
// For now we assume the following protocol:
|
|
// For each request of the form
|
|
// <module_name> <module_offset>
|
|
// passed to STDIN, external symbolizer prints to STDOUT response:
|
|
// <function_name>
|
|
// <file_name>:<line_number>:<column_number>
|
|
// <function_name>
|
|
// <file_name>:<line_number>:<column_number>
|
|
// ...
|
|
// <empty line>
|
|
class ExternalSymbolizer {
|
|
public:
|
|
ExternalSymbolizer(const char *path, int input_fd, int output_fd)
|
|
: path_(path),
|
|
input_fd_(input_fd),
|
|
output_fd_(output_fd),
|
|
times_restarted_(0) {
|
|
CHECK(path_);
|
|
CHECK_NE(input_fd_, kInvalidFd);
|
|
CHECK_NE(output_fd_, kInvalidFd);
|
|
}
|
|
|
|
char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
|
|
CHECK(module_name);
|
|
internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
|
|
is_data ? "DATA " : "", module_name, module_offset);
|
|
if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
|
|
return 0;
|
|
if (!readFromSymbolizer(buffer_, kBufferSize))
|
|
return 0;
|
|
return buffer_;
|
|
}
|
|
|
|
bool Restart() {
|
|
if (times_restarted_ >= kMaxTimesRestarted) return false;
|
|
times_restarted_++;
|
|
internal_close(input_fd_);
|
|
internal_close(output_fd_);
|
|
return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
|
|
}
|
|
|
|
private:
|
|
bool readFromSymbolizer(char *buffer, uptr max_length) {
|
|
if (max_length == 0)
|
|
return true;
|
|
uptr read_len = 0;
|
|
while (true) {
|
|
uptr just_read = internal_read(input_fd_, buffer + read_len,
|
|
max_length - read_len);
|
|
// We can't read 0 bytes, as we don't expect external symbolizer to close
|
|
// its stdout.
|
|
if (just_read == 0 || just_read == (uptr)-1) {
|
|
Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
|
|
return false;
|
|
}
|
|
read_len += just_read;
|
|
// Empty line marks the end of symbolizer output.
|
|
if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
|
|
buffer[read_len - 2] == '\n') {
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool writeToSymbolizer(const char *buffer, uptr length) {
|
|
if (length == 0)
|
|
return true;
|
|
uptr write_len = internal_write(output_fd_, buffer, length);
|
|
if (write_len == 0 || write_len == (uptr)-1) {
|
|
Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
const char *path_;
|
|
int input_fd_;
|
|
int output_fd_;
|
|
|
|
static const uptr kBufferSize = 16 * 1024;
|
|
char buffer_[kBufferSize];
|
|
|
|
static const uptr kMaxTimesRestarted = 5;
|
|
uptr times_restarted_;
|
|
};
|
|
|
|
static LowLevelAllocator symbolizer_allocator; // Linker initialized.
|
|
|
|
#if SANITIZER_SUPPORTS_WEAK_HOOKS
|
|
extern "C" {
|
|
SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
|
|
bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
|
|
char *Buffer, int MaxLength);
|
|
SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
|
|
bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
|
|
char *Buffer, int MaxLength);
|
|
} // extern "C"
|
|
|
|
class InternalSymbolizer {
|
|
public:
|
|
typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
|
|
static InternalSymbolizer *get() {
|
|
if (__sanitizer_symbolize_code != 0 &&
|
|
__sanitizer_symbolize_data != 0) {
|
|
void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
|
|
return new(mem) InternalSymbolizer();
|
|
}
|
|
return 0;
|
|
}
|
|
char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
|
|
SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
|
|
: __sanitizer_symbolize_code;
|
|
if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
|
|
return buffer_;
|
|
return 0;
|
|
}
|
|
|
|
private:
|
|
InternalSymbolizer() { }
|
|
|
|
static const int kBufferSize = 16 * 1024;
|
|
char buffer_[kBufferSize];
|
|
};
|
|
#else // SANITIZER_SUPPORTS_WEAK_HOOKS
|
|
|
|
class InternalSymbolizer {
|
|
public:
|
|
static InternalSymbolizer *get() { return 0; }
|
|
char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
#endif // SANITIZER_SUPPORTS_WEAK_HOOKS
|
|
|
|
class Symbolizer {
|
|
public:
|
|
uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
|
|
if (max_frames == 0)
|
|
return 0;
|
|
LoadedModule *module = FindModuleForAddress(addr);
|
|
if (module == 0)
|
|
return 0;
|
|
const char *module_name = module->full_name();
|
|
uptr module_offset = addr - module->base_address();
|
|
const char *str = SendCommand(false, module_name, module_offset);
|
|
if (str == 0) {
|
|
// External symbolizer was not initialized or failed. Fill only data
|
|
// about module name and offset.
|
|
AddressInfo *info = &frames[0];
|
|
info->Clear();
|
|
info->FillAddressAndModuleInfo(addr, module_name, module_offset);
|
|
return 1;
|
|
}
|
|
uptr frame_id = 0;
|
|
for (frame_id = 0; frame_id < max_frames; frame_id++) {
|
|
AddressInfo *info = &frames[frame_id];
|
|
char *function_name = 0;
|
|
str = ExtractToken(str, "\n", &function_name);
|
|
CHECK(function_name);
|
|
if (function_name[0] == '\0') {
|
|
// There are no more frames.
|
|
break;
|
|
}
|
|
info->Clear();
|
|
info->FillAddressAndModuleInfo(addr, module_name, module_offset);
|
|
info->function = function_name;
|
|
// Parse <file>:<line>:<column> buffer.
|
|
char *file_line_info = 0;
|
|
str = ExtractToken(str, "\n", &file_line_info);
|
|
CHECK(file_line_info);
|
|
const char *line_info = ExtractToken(file_line_info, ":", &info->file);
|
|
line_info = ExtractInt(line_info, ":", &info->line);
|
|
line_info = ExtractInt(line_info, "", &info->column);
|
|
InternalFree(file_line_info);
|
|
|
|
// Functions and filenames can be "??", in which case we write 0
|
|
// to address info to mark that names are unknown.
|
|
if (0 == internal_strcmp(info->function, "??")) {
|
|
InternalFree(info->function);
|
|
info->function = 0;
|
|
}
|
|
if (0 == internal_strcmp(info->file, "??")) {
|
|
InternalFree(info->file);
|
|
info->file = 0;
|
|
}
|
|
}
|
|
if (frame_id == 0) {
|
|
// Make sure we return at least one frame.
|
|
AddressInfo *info = &frames[0];
|
|
info->Clear();
|
|
info->FillAddressAndModuleInfo(addr, module_name, module_offset);
|
|
frame_id = 1;
|
|
}
|
|
return frame_id;
|
|
}
|
|
|
|
bool SymbolizeData(uptr addr, DataInfo *info) {
|
|
LoadedModule *module = FindModuleForAddress(addr);
|
|
if (module == 0)
|
|
return false;
|
|
const char *module_name = module->full_name();
|
|
uptr module_offset = addr - module->base_address();
|
|
internal_memset(info, 0, sizeof(*info));
|
|
info->address = addr;
|
|
info->module = internal_strdup(module_name);
|
|
info->module_offset = module_offset;
|
|
const char *str = SendCommand(true, module_name, module_offset);
|
|
if (str == 0)
|
|
return true;
|
|
str = ExtractToken(str, "\n", &info->name);
|
|
str = ExtractUptr(str, " ", &info->start);
|
|
str = ExtractUptr(str, "\n", &info->size);
|
|
info->start += module->base_address();
|
|
return true;
|
|
}
|
|
|
|
bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
|
|
int input_fd, output_fd;
|
|
if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
|
|
return false;
|
|
void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
|
|
external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
|
|
input_fd, output_fd);
|
|
return true;
|
|
}
|
|
|
|
bool IsSymbolizerAvailable() {
|
|
if (internal_symbolizer_ == 0)
|
|
internal_symbolizer_ = InternalSymbolizer::get();
|
|
return internal_symbolizer_ || external_symbolizer_;
|
|
}
|
|
|
|
private:
|
|
char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
|
|
// First, try to use internal symbolizer.
|
|
if (!IsSymbolizerAvailable()) {
|
|
return 0;
|
|
}
|
|
if (internal_symbolizer_) {
|
|
return internal_symbolizer_->SendCommand(is_data, module_name,
|
|
module_offset);
|
|
}
|
|
// Otherwise, fall back to external symbolizer.
|
|
if (external_symbolizer_ == 0) {
|
|
ReportExternalSymbolizerError(
|
|
"WARNING: Trying to symbolize code, but external "
|
|
"symbolizer is not initialized!\n");
|
|
return 0;
|
|
}
|
|
for (;;) {
|
|
char *reply = external_symbolizer_->SendCommand(is_data, module_name,
|
|
module_offset);
|
|
if (reply)
|
|
return reply;
|
|
// Try to restart symbolizer subprocess. If we don't succeed, forget
|
|
// about it and don't try to use it later.
|
|
if (!external_symbolizer_->Restart()) {
|
|
ReportExternalSymbolizerError(
|
|
"WARNING: Failed to use and restart external symbolizer!\n");
|
|
external_symbolizer_ = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
LoadedModule *FindModuleForAddress(uptr address) {
|
|
if (modules_ == 0) {
|
|
modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
|
|
kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
|
|
CHECK(modules_);
|
|
n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
|
|
// FIXME: Return this check when GetListOfModules is implemented on Mac.
|
|
// CHECK_GT(n_modules_, 0);
|
|
CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
|
|
}
|
|
for (uptr i = 0; i < n_modules_; i++) {
|
|
if (modules_[i].containsAddress(address)) {
|
|
return &modules_[i];
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
void ReportExternalSymbolizerError(const char *msg) {
|
|
// Don't use atomics here for now, as SymbolizeCode can't be called
|
|
// from multiple threads anyway.
|
|
static bool reported;
|
|
if (!reported) {
|
|
Report(msg);
|
|
reported = true;
|
|
}
|
|
}
|
|
|
|
// 16K loaded modules should be enough for everyone.
|
|
static const uptr kMaxNumberOfModuleContexts = 1 << 14;
|
|
LoadedModule *modules_; // Array of module descriptions is leaked.
|
|
uptr n_modules_;
|
|
|
|
ExternalSymbolizer *external_symbolizer_; // Leaked.
|
|
InternalSymbolizer *internal_symbolizer_; // Leaked.
|
|
};
|
|
|
|
static Symbolizer symbolizer; // Linker initialized.
|
|
|
|
uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
|
|
return symbolizer.SymbolizeCode(address, frames, max_frames);
|
|
}
|
|
|
|
bool SymbolizeData(uptr address, DataInfo *info) {
|
|
return symbolizer.SymbolizeData(address, info);
|
|
}
|
|
|
|
bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
|
|
return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
|
|
}
|
|
|
|
bool IsSymbolizerAvailable() {
|
|
return symbolizer.IsSymbolizerAvailable();
|
|
}
|
|
|
|
} // namespace __sanitizer
|