[sanitizer] Use architecture/slice information when symbolizing fat Mach-O files on Darwin

This patch starts passing architecture information about a module to llvm-symbolizer and into text reports. This fixes the longstanding x86_64/x86_64h mismatch issue on Darwin.

Differential Revision: https://reviews.llvm.org/D27390

llvm-svn: 291287
This commit is contained in:
Kuba Mracek 2017-01-06 21:45:05 +00:00
parent 541f9e2830
commit b38f1ca2d5
12 changed files with 184 additions and 36 deletions

View File

@ -93,7 +93,7 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
vs_style, strip_path_prefix);
} else if (info.module) {
RenderModuleLocation(buffer, info.module, info.module_offset,
strip_path_prefix);
info.module_arch, strip_path_prefix);
} else {
buffer->append("(<unknown module>)");
}
@ -103,8 +103,9 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
if (info.address & kExternalPCBit)
{} // There PCs are not meaningful.
else if (info.module)
buffer->append("(%s+%p)", StripModuleName(info.module),
(void *)info.module_offset);
// Always strip the module name for %M.
RenderModuleLocation(buffer, StripModuleName(info.module),
info.module_offset, info.module_arch, "");
else
buffer->append("(%p)", (void *)info.address);
break;
@ -165,9 +166,13 @@ void RenderSourceLocation(InternalScopedString *buffer, const char *file,
}
void RenderModuleLocation(InternalScopedString *buffer, const char *module,
uptr offset, const char *strip_path_prefix) {
buffer->append("(%s+0x%zx)", StripPathPrefix(module, strip_path_prefix),
offset);
uptr offset, ModuleArch arch,
const char *strip_path_prefix) {
buffer->append("(%s", StripPathPrefix(module, strip_path_prefix));
if (arch != kModuleArchUnknown) {
buffer->append(":%s", ModuleArchToString(arch));
}
buffer->append("+0x%zx)", offset);
}
} // namespace __sanitizer

View File

@ -57,7 +57,8 @@ void RenderSourceLocation(InternalScopedString *buffer, const char *file,
const char *strip_path_prefix);
void RenderModuleLocation(InternalScopedString *buffer, const char *module,
uptr offset, const char *strip_path_prefix);
uptr offset, ModuleArch arch,
const char *strip_path_prefix);
// Same as RenderFrame, but for data section (global variables).
// Accepts %s, %l from above.

View File

@ -33,9 +33,11 @@ void AddressInfo::Clear() {
function_offset = kUnknown;
}
void AddressInfo::FillModuleInfo(const char *mod_name, uptr mod_offset) {
void AddressInfo::FillModuleInfo(const char *mod_name, uptr mod_offset,
ModuleArch mod_arch) {
module = internal_strdup(mod_name);
module_offset = mod_offset;
module_arch = mod_arch;
}
SymbolizedStack::SymbolizedStack() : next(nullptr), info() {}

View File

@ -31,6 +31,7 @@ struct AddressInfo {
char *module;
uptr module_offset;
ModuleArch module_arch;
static const uptr kUnknown = ~(uptr)0;
char *function;
@ -43,7 +44,7 @@ struct AddressInfo {
AddressInfo();
// Deletes all strings and resets all fields.
void Clear();
void FillModuleInfo(const char *mod_name, uptr mod_offset);
void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
};
// Linked list of symbolized frames (each frame is described by AddressInfo).
@ -65,6 +66,8 @@ struct DataInfo {
// (de)allocated using sanitizer internal allocator.
char *module;
uptr module_offset;
ModuleArch module_arch;
char *file;
uptr line;
char *name;
@ -143,7 +146,8 @@ class Symbolizer final {
static Symbolizer *PlatformInit();
bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
uptr *module_offset);
uptr *module_offset,
ModuleArch *module_arch);
ListOfModules modules_;
// If stale, need to reload the modules before looking up addresses.
bool modules_fresh_;

View File

@ -124,8 +124,8 @@ class LLVMSymbolizer : public SymbolizerTool {
bool SymbolizeData(uptr addr, DataInfo *info) override;
private:
const char *SendCommand(bool is_data, const char *module_name,
uptr module_offset);
const char *FormatAndSendCommand(bool is_data, const char *module_name,
uptr module_offset, ModuleArch arch);
LLVMSymbolizerProcess *symbolizer_process_;
static const uptr kBufferSize = 16 * 1024;

View File

@ -64,11 +64,13 @@ SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
BlockingMutexLock l(&mu_);
const char *module_name;
uptr module_offset;
ModuleArch arch;
SymbolizedStack *res = SymbolizedStack::New(addr);
if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
&arch))
return res;
// Always fill data about module name and offset.
res->info.FillModuleInfo(module_name, module_offset);
res->info.FillModuleInfo(module_name, module_offset, arch);
for (auto &tool : tools_) {
SymbolizerScope sym_scope(this);
if (tool.SymbolizePC(addr, res)) {
@ -82,11 +84,14 @@ bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
BlockingMutexLock l(&mu_);
const char *module_name;
uptr module_offset;
if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
ModuleArch arch;
if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
&arch))
return false;
info->Clear();
info->module = internal_strdup(module_name);
info->module_offset = module_offset;
info->module_arch = arch;
for (auto &tool : tools_) {
SymbolizerScope sym_scope(this);
if (tool.SymbolizeData(addr, info)) {
@ -100,8 +105,9 @@ bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
uptr *module_address) {
BlockingMutexLock l(&mu_);
const char *internal_module_name = nullptr;
ModuleArch arch;
if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
module_address))
module_address, &arch))
return false;
if (module_name)
@ -134,12 +140,14 @@ void Symbolizer::PrepareForSandboxing() {
bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
const char **module_name,
uptr *module_offset) {
uptr *module_offset,
ModuleArch *module_arch) {
const LoadedModule *module = FindModuleForAddress(address);
if (module == nullptr)
return false;
*module_name = module->full_name();
*module_offset = address - module->base_address();
*module_arch = module->arch();
return true;
}
@ -197,6 +205,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess {
buffer[length - 2] == '\n';
}
// When adding a new architecture, don't forget to also update
// script/asan_symbolize.py and sanitizer_common.h.
void GetArgV(const char *path_to_binary,
const char *(&argv)[kArgVMax]) const override {
#if defined(__x86_64h__)
@ -284,7 +294,8 @@ void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
top_frame = false;
} else {
cur = SymbolizedStack::New(res->info.address);
cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
res->info.module_arch);
last->next = cur;
last = cur;
}
@ -317,8 +328,10 @@ void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
}
bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
stack->info.module_offset)) {
AddressInfo *info = &stack->info;
const char *buf = FormatAndSendCommand(
/*is_data*/ false, info->module, info->module_offset, info->module_arch);
if (buf) {
ParseSymbolizePCOutput(buf, stack);
return true;
}
@ -326,8 +339,9 @@ bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
}
bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
if (const char *buf =
SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
const char *buf = FormatAndSendCommand(
/*is_data*/ true, info->module, info->module_offset, info->module_arch);
if (buf) {
ParseSymbolizeDataOutput(buf, info);
info->start += (addr - info->module_offset); // Add the base address.
return true;
@ -335,11 +349,19 @@ bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
return false;
}
const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name,
uptr module_offset) {
const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data,
const char *module_name,
uptr module_offset,
ModuleArch arch) {
CHECK(module_name);
internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
is_data ? "DATA " : "", module_name, module_offset);
const char *is_data_str = is_data ? "DATA " : "";
if (arch == kModuleArchUnknown) {
internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str,
module_name, module_offset);
} else {
internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n", is_data_str,
module_name, ModuleArchToString(arch), module_offset);
}
return symbolizer_process_->SendCommand(buffer_);
}

View File

@ -52,13 +52,18 @@ TEST(SanitizerStacktracePrinter, RenderSourceLocation) {
TEST(SanitizerStacktracePrinter, RenderModuleLocation) {
InternalScopedString str(128);
RenderModuleLocation(&str, "/dir/exe", 0x123, "");
RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchUnknown, "");
EXPECT_STREQ("(/dir/exe+0x123)", str.data());
// Check that we strip file prefix if necessary.
str.clear();
RenderModuleLocation(&str, "/dir/exe", 0x123, "/dir/");
RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchUnknown, "/dir/");
EXPECT_STREQ("(exe+0x123)", str.data());
// Check that we render the arch.
str.clear();
RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchX86_64H, "/dir/");
EXPECT_STREQ("(exe:x86_64h+0x123)", str.data());
}
TEST(SanitizerStacktracePrinter, RenderFrame) {

View File

@ -157,7 +157,7 @@ static void RenderLocation(InternalScopedString *Buffer, Location Loc) {
common_flags()->strip_path_prefix);
else if (Info.module)
RenderModuleLocation(Buffer, Info.module, Info.module_offset,
common_flags()->strip_path_prefix);
Info.module_arch, common_flags()->strip_path_prefix);
else
Buffer->append("%p", Info.address);
return;

View File

@ -0,0 +1,99 @@
// RUN: %clangxx_asan -arch x86_64 -arch x86_64h -g -O0 %s -c -o %t.o.fat
// RUN: %clangxx_asan -arch x86_64 -arch x86_64h -g %t.o.fat -o %t.fat
// RUN: lipo %t.fat -thin x86_64 -output %t.thin.x86_64 && lipo %t.thin.x86_64 -info | FileCheck %s --check-prefix=CHECK-LIPO-THIN-X86_64
// RUN: lipo %t.fat -thin x86_64h -output %t.thin.x86_64h && lipo %t.thin.x86_64h -info | FileCheck %s --check-prefix=CHECK-LIPO-THIN-X86_64H
// RUN: lipo %t.fat -extract x86_64 -output %t.fat.x86_64 && lipo %t.fat.x86_64 -info | FileCheck %s --check-prefix=CHECK-LIPO-FAT-X86_64
// RUN: lipo %t.fat -extract x86_64h -output %t.fat.x86_64h && lipo %t.fat.x86_64h -info | FileCheck %s --check-prefix=CHECK-LIPO-FAT-X86_64H
// CHECK-LIPO-THIN-X86_64: Non-fat file: {{.*}} is architecture: x86_64
// CHECK-LIPO-THIN-X86_64H: Non-fat file: {{.*}} is architecture: x86_64h
// CHECK-LIPO-FAT-X86_64: Architectures in the fat file: {{.*}} are: x86_64
// CHECK-LIPO-FAT-X86_64H: Architectures in the fat file: {{.*}} are: x86_64h
// RUN: dsymutil %t.thin.x86_64
// RUN: dsymutil %t.thin.x86_64h
// RUN: dsymutil %t.fat.x86_64
// RUN: dsymutil %t.fat.x86_64h
// Check LLVM symbolizer
// RUN: %env_asan_opts=external_symbolizer_path=$(which llvm-symbolizer) not %run %t.thin.x86_64 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which llvm-symbolizer) not %run %t.thin.x86_64h 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which llvm-symbolizer) not %run %t.fat.x86_64 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which llvm-symbolizer) not %run %t.fat.x86_64h 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// Check atos
// RUN: %env_asan_opts=external_symbolizer_path=$(which atos) not %run %t.thin.x86_64 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which atos) not %run %t.thin.x86_64h 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which atos) not %run %t.fat.x86_64 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path=$(which atos) not %run %t.fat.x86_64h 2>&1 | FileCheck %s --check-prefixes CHECK,CHECK-LI,CHECK-DATA
// Check dladdr
// RUN: %env_asan_opts=external_symbolizer_path= not %run %t.thin.x86_64 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOLI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path= not %run %t.thin.x86_64h 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOLI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path= not %run %t.fat.x86_64 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOLI,CHECK-DATA
// RUN: %env_asan_opts=external_symbolizer_path= not %run %t.fat.x86_64h 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOLI,CHECK-DATA
// Check asan_symbolize.py with llvm-symbolizer
// RUN: %env_asan_opts=symbolize=0 not %run %t.thin.x86_64 2>&1 | %asan_symbolize | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.thin.x86_64h 2>&1 | %asan_symbolize | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.fat.x86_64 2>&1 | %asan_symbolize | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.fat.x86_64h 2>&1 | %asan_symbolize | FileCheck %s --check-prefixes CHECK,CHECK-LI
// Check asan_symbolize.py with atos
// RUN: %env_asan_opts=symbolize=0 not %run %t.thin.x86_64 2>&1 | %asan_symbolize --force-system-symbolizer | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.thin.x86_64h 2>&1 | %asan_symbolize --force-system-symbolizer | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.fat.x86_64 2>&1 | %asan_symbolize --force-system-symbolizer | FileCheck %s --check-prefixes CHECK,CHECK-LI
// RUN: %env_asan_opts=symbolize=0 not %run %t.fat.x86_64h 2>&1 | %asan_symbolize --force-system-symbolizer | FileCheck %s --check-prefixes CHECK,CHECK-LI
// REQUIRES: x86-target-arch
// REQUIRES: x86_64h
#include <sanitizer/common_interface_defs.h>
#include <stdio.h>
#include <stdlib.h>
#if __x86_64h__
// Unused functions and globals, just to mess up the offsets in x86_64h.
void dummy(char *a, char *b) {
while (*a == *b) {
a[0] = b[0];
a[1] = b[1];
a[2] = b[2];
a[3] = b[3];
fprintf(stderr, "dummy\n");
}
fprintf(stderr, "dummy\n");
}
long dummy_global;
long dummy_global2[100];
#endif
extern "C"
long faulty_global = 10;
void check_data_symbolication() {
char data[100];
__sanitizer_symbolize_global(&faulty_global, "%g", data, sizeof(data));
fprintf(stderr, "symbolized global: %s\n", data);
// CHECK-DATA: symbolized global: faulty_global
}
extern "C"
void faulty_func(char *p) {
*p = 'x'; // BOOM
// CHECK: AddressSanitizer: global-buffer-overflow
// CHECK-LI: #0 0x{{.*}} in faulty_func{{.*}} {{.*}}haswell-symbolication.cc:[[@LINE-2]]
// CHECK-NOLI: #0 0x{{.*}} in faulty_func{{.*}} {{.*}}haswell-symbolication
// CHECK: is located 2 bytes to the right of global variable 'faulty_global'
// CHECK-NOT: LLVMSymbolizer: error reading file
}
int main() {
check_data_symbolication();
char *p = (char *)(void *)&faulty_global;
p += 10;
faulty_func(p);
return 0;
}

View File

@ -7,7 +7,6 @@
// RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %t-so.so
// RUN: %clangxx_asan -O0 %s %libdl -o %t
// RUN: %env_asan_opts=symbolize=0 not %run %t 2>&1 | %asan_symbolize | FileCheck %s
// UNSUPPORTED: x86_64h-darwin,x86_64-darwin
// REQUIRES: stable-runtime
#if !defined(SHARED_LIB)

View File

@ -141,6 +141,17 @@ if config.host_os == 'Darwin':
except:
pass
# Detect x86_64h
try:
output = subprocess.check_output(["sysctl", "hw.cpusubtype"])
output_re = re.match("^hw.cpusubtype: ([0-9]+)$", output)
if output_re:
cpu_subtype = int(output_re.group(1))
if cpu_subtype == 8: # x86_64h
config.available_features.add('x86_64h')
except:
pass
config.substitutions.append( ("%macos_min_target_10_11", "-mmacosx-version-min=10.11") )
else:
config.substitutions.append( ("%macos_min_target_10_11", "") )

View File

@ -44,10 +44,10 @@ int main() {
// CHECK: WARNING: ThreadSanitizer: data race
// CHECK-NEXT: Write of size 4 at {{.*}} by thread T1:
// CHECK-NEXT: #0 foo1{{.*}} {{.*}}simple_stack2.cc:7{{(:10)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #1 bar1{{.*}} {{.*}}simple_stack2.cc:14{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #2 Thread1{{.*}} {{.*}}simple_stack2.cc:32{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #0 foo1{{.*}} {{.*}}simple_stack2.cc:7{{(:10)?}} ({{.*}})
// CHECK-NEXT: #1 bar1{{.*}} {{.*}}simple_stack2.cc:14{{(:3)?}} ({{.*}})
// CHECK-NEXT: #2 Thread1{{.*}} {{.*}}simple_stack2.cc:32{{(:3)?}} ({{.*}})
// CHECK: Previous read of size 4 at {{.*}} by main thread:
// CHECK-NEXT: #0 foo2{{.*}} {{.*}}simple_stack2.cc:18{{(:22)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #1 bar2{{.*}} {{.*}}simple_stack2.cc:27{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #2 main{{.*}} {{.*}}simple_stack2.cc:40{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
// CHECK-NEXT: #0 foo2{{.*}} {{.*}}simple_stack2.cc:18{{(:22)?}} ({{.*}})
// CHECK-NEXT: #1 bar2{{.*}} {{.*}}simple_stack2.cc:27{{(:3)?}} ({{.*}})
// CHECK-NEXT: #2 main{{.*}} {{.*}}simple_stack2.cc:40{{(:3)?}} ({{.*}})