[ASan] Allow the users of SymbolizationLoop to make use of the --dsym_hint option in llvm-symbolizer

Let the users of SymbolizationLoop define a function that produces the list of .dSYM hints (possible path to the .dSYM bundle) for the given binary.
Because the hints can't be added to an existing llvm-symbolizer process, we spawn a new symbolizer process ones each time a new hint appears.
Those can only appear for binaries that we haven't seen before.

llvm-svn: 222535
This commit is contained in:
Alexander Potapenko 2014-11-21 14:12:00 +00:00
parent bcb55eec3a
commit 7a0f81a9b7
1 changed files with 39 additions and 11 deletions

View File

@ -66,10 +66,12 @@ class Symbolizer(object):
class LLVMSymbolizer(Symbolizer):
def __init__(self, symbolizer_path, addr):
def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
super(LLVMSymbolizer, self).__init__()
self.symbolizer_path = symbolizer_path
self.default_arch = guess_arch(addr)
self.default_arch = default_arch
self.system = system
self.dsym_hints = dsym_hints
self.pipe = self.open_llvm_symbolizer()
def open_llvm_symbolizer(self):
@ -79,6 +81,9 @@ class LLVMSymbolizer(Symbolizer):
'--functions=short',
'--inlining=true',
'--default-arch=%s' % self.default_arch]
if self.system == 'Darwin':
for hint in self.dsym_hints:
cmd.append('--dsym-hint=%s' % hint)
if DEBUG:
print ' '.join(cmd)
try:
@ -94,7 +99,7 @@ class LLVMSymbolizer(Symbolizer):
return None
result = []
try:
symbolizer_input = '%s %s' % (binary, offset)
symbolizer_input = '"%s" %s' % (binary, offset)
if DEBUG:
print symbolizer_input
print >> self.pipe.stdin, symbolizer_input
@ -116,14 +121,14 @@ class LLVMSymbolizer(Symbolizer):
return result
def LLVMSymbolizerFactory(system, addr):
def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
if not symbolizer_path:
symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
if not symbolizer_path:
# Assume llvm-symbolizer is in PATH.
symbolizer_path = 'llvm-symbolizer'
return LLVMSymbolizer(symbolizer_path, addr)
return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
class Addr2LineSymbolizer(Symbolizer):
@ -335,26 +340,49 @@ class BreakpadSymbolizer(Symbolizer):
class SymbolizationLoop(object):
def __init__(self, binary_name_filter=None):
def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
# Used by clients who may want to supply a different binary name.
# E.g. in Chrome several binaries may share a single .dSYM.
self.binary_name_filter = binary_name_filter
self.dsym_hint_producer = dsym_hint_producer
self.system = os.uname()[0]
if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
raise Exception('Unknown system')
self.llvm_symbolizer = None
self.llvm_symbolizers = {}
self.last_llvm_symbolizer = None
self.dsym_hints = set([])
self.frame_no = 0
def symbolize_address(self, addr, binary, offset):
# Initialize llvm-symbolizer lazily.
if not self.llvm_symbolizer:
self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr)
# On non-Darwin (i.e. on platforms without .dSYM debug info) always use
# a single symbolizer binary.
# On Darwin, if the dsym hint producer is present:
# 1. check whether we've seen this binary already; if so,
# use |llvm_symbolizers[binary]|, which has already loaded the debug
# info for this binary (might not be the case for
# |last_llvm_symbolizer|);
# 2. otherwise check if we've seen all the hints for this binary already;
# if so, reuse |last_llvm_symbolizer| which has the full set of hints;
# 3. otherwise create a new symbolizer and pass all currently known
# .dSYM hints to it.
if not binary in self.llvm_symbolizers:
use_last_symbolizer = True
if self.system == 'Darwin' and self.dsym_hint_producer:
dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
use_last_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
self.dsym_hints |= dsym_hints_for_binary
if self.last_llvm_symbolizer and use_last_symbolizer:
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
else:
self.last_llvm_symbolizer = LLVMSymbolizerFactory(
self.system, guess_arch(addr), self.dsym_hints)
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
# Use the chain of symbolizers:
# Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
# (fall back to next symbolizer if the previous one fails).
if not binary in symbolizers:
symbolizers[binary] = ChainSymbolizer(
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
result = symbolizers[binary].symbolize(addr, binary, offset)
if result is None:
# Initialize system symbolizer only if other symbolizers failed.