[asan] Teach asan_symbolize.py to use :arch with atos and llvm-symbolizer on Darwin

This patch teaches asan_symbolize.py to read an architecture suffix on module names (e.g. ":x86_64") and pass that option to atos and llvm-symbolizer.

Differential Revision: https://reviews.llvm.org/D27378

llvm-svn: 291280
This commit is contained in:
Kuba Mracek 2017-01-06 21:02:50 +00:00
parent 06200bd7bc
commit 218ecacbf1
1 changed files with 51 additions and 26 deletions

View File

@ -24,6 +24,7 @@ binary_name_filter = None
fix_filename_patterns = None fix_filename_patterns = None
logfile = sys.stdin logfile = sys.stdin
allow_system_symbolizer = True allow_system_symbolizer = True
force_system_symbolizer = False
# FIXME: merge the code that calls fix_filename(). # FIXME: merge the code that calls fix_filename().
def fix_filename(file_name): def fix_filename(file_name):
@ -37,6 +38,10 @@ def fix_filename(file_name):
def sysroot_path_filter(binary_name): def sysroot_path_filter(binary_name):
return sysroot_path + binary_name return sysroot_path + binary_name
def is_valid_arch(s):
return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
"armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
def guess_arch(addr): def guess_arch(addr):
# Guess which arch we're running. 10 = len('0x') + 8 hex digits. # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
if len(addr) > 10: if len(addr) > 10:
@ -206,10 +211,10 @@ class UnbufferedLineConverter(object):
class DarwinSymbolizer(Symbolizer): class DarwinSymbolizer(Symbolizer):
def __init__(self, addr, binary): def __init__(self, addr, binary, arch):
super(DarwinSymbolizer, self).__init__() super(DarwinSymbolizer, self).__init__()
self.binary = binary self.binary = binary
self.arch = guess_arch(addr) self.arch = arch
self.open_atos() self.open_atos()
def open_atos(self): def open_atos(self):
@ -268,9 +273,9 @@ def BreakpadSymbolizerFactory(binary):
return None return None
def SystemSymbolizerFactory(system, addr, binary): def SystemSymbolizerFactory(system, addr, binary, arch):
if system == 'Darwin': if system == 'Darwin':
return DarwinSymbolizer(addr, binary) return DarwinSymbolizer(addr, binary, arch)
elif system == 'Linux' or system == 'FreeBSD': elif system == 'Linux' or system == 'FreeBSD':
return Addr2LineSymbolizer(binary) return Addr2LineSymbolizer(binary)
@ -369,7 +374,7 @@ class SymbolizationLoop(object):
self.frame_no = 0 self.frame_no = 0
self.process_line = self.process_line_posix self.process_line = self.process_line_posix
def symbolize_address(self, addr, binary, offset): def symbolize_address(self, addr, binary, offset, arch):
# On non-Darwin (i.e. on platforms without .dSYM debug info) always use # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
# a single symbolizer binary. # a single symbolizer binary.
# On Darwin, if the dsym hint producer is present: # On Darwin, if the dsym hint producer is present:
@ -381,6 +386,8 @@ class SymbolizationLoop(object):
# if so, reuse |last_llvm_symbolizer| which has the full set of hints; # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
# 3. otherwise create a new symbolizer and pass all currently known # 3. otherwise create a new symbolizer and pass all currently known
# .dSYM hints to it. # .dSYM hints to it.
result = None
if not force_system_symbolizer:
if not binary in self.llvm_symbolizers: if not binary in self.llvm_symbolizers:
use_new_symbolizer = True use_new_symbolizer = True
if self.system == 'Darwin' and self.dsym_hint_producer: if self.system == 'Darwin' and self.dsym_hint_producer:
@ -391,7 +398,7 @@ class SymbolizationLoop(object):
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
else: else:
self.last_llvm_symbolizer = LLVMSymbolizerFactory( self.last_llvm_symbolizer = LLVMSymbolizerFactory(
self.system, guess_arch(addr), self.dsym_hints) self.system, arch, self.dsym_hints)
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
# Use the chain of symbolizers: # Use the chain of symbolizers:
# Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
@ -400,12 +407,14 @@ class SymbolizationLoop(object):
symbolizers[binary] = ChainSymbolizer( symbolizers[binary] = ChainSymbolizer(
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
result = symbolizers[binary].symbolize(addr, binary, offset) result = symbolizers[binary].symbolize(addr, binary, offset)
else:
symbolizers[binary] = ChainSymbolizer([])
if result is None: if result is None:
if not allow_system_symbolizer: if not allow_system_symbolizer:
raise Exception('Failed to launch or use llvm-symbolizer.') raise Exception('Failed to launch or use llvm-symbolizer.')
# Initialize system symbolizer only if other symbolizers failed. # Initialize system symbolizer only if other symbolizers failed.
symbolizers[binary].append_symbolizer( symbolizers[binary].append_symbolizer(
SystemSymbolizerFactory(self.system, addr, binary)) SystemSymbolizerFactory(self.system, addr, binary, arch))
result = symbolizers[binary].symbolize(addr, binary, offset) result = symbolizers[binary].symbolize(addr, binary, offset)
# The system symbolizer must produce some result. # The system symbolizer must produce some result.
assert result assert result
@ -441,16 +450,26 @@ class SymbolizationLoop(object):
if DEBUG: if DEBUG:
print line print line
_, frameno_str, addr, binary, offset = match.groups() _, frameno_str, addr, binary, offset = match.groups()
arch = ""
# Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
colon_pos = binary.rfind(":")
if colon_pos != -1:
maybe_arch = binary[colon_pos+1:]
if is_valid_arch(maybe_arch):
arch = maybe_arch
binary = binary[0:colon_pos]
if arch == "":
arch = guess_arch(addr)
if frameno_str == '0': if frameno_str == '0':
# Assume that frame #0 is the first frame of new stack trace. # Assume that frame #0 is the first frame of new stack trace.
self.frame_no = 0 self.frame_no = 0
original_binary = binary original_binary = binary
if self.binary_name_filter: if self.binary_name_filter:
binary = self.binary_name_filter(binary) binary = self.binary_name_filter(binary)
symbolized_line = self.symbolize_address(addr, binary, offset) symbolized_line = self.symbolize_address(addr, binary, offset, arch)
if not symbolized_line: if not symbolized_line:
if original_binary != binary: if original_binary != binary:
symbolized_line = self.symbolize_address(addr, binary, offset) symbolized_line = self.symbolize_address(addr, binary, offset, arch)
return self.get_symbolized_lines(symbolized_line) return self.get_symbolized_lines(symbolized_line)
@ -472,6 +491,8 @@ if __name__ == '__main__':
parser.add_argument('-l','--logfile', default=sys.stdin, parser.add_argument('-l','--logfile', default=sys.stdin,
type=argparse.FileType('r'), type=argparse.FileType('r'),
help='set log file name to parse, default is stdin') help='set log file name to parse, default is stdin')
parser.add_argument('--force-system-symbolizer', action='store_true',
help='don\'t use llvm-symbolizer')
args = parser.parse_args() args = parser.parse_args()
if args.path_to_cut: if args.path_to_cut:
fix_filename_patterns = args.path_to_cut fix_filename_patterns = args.path_to_cut
@ -486,5 +507,9 @@ if __name__ == '__main__':
logfile = args.logfile logfile = args.logfile
else: else:
logfile = sys.stdin logfile = sys.stdin
if args.force_system_symbolizer:
force_system_symbolizer = True
if force_system_symbolizer:
assert(allow_system_symbolizer)
loop = SymbolizationLoop(binary_name_filter) loop = SymbolizationLoop(binary_name_filter)
loop.process_logfile() loop.process_logfile()