llvm-project/lldb/examples/python/crashlog.py

624 lines
28 KiB
Python
Executable File

#!/usr/bin/python
#----------------------------------------------------------------------
# Be sure to add the python path that points to the LLDB shared library.
#
# To use this in the embedded python interpreter using "lldb":
#
# cd /path/containing/crashlog.py
# lldb
# (lldb) script import crashlog
# "crashlog" command installed, type "crashlog --help" for detailed help
# (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
#
# The benefit of running the crashlog command inside lldb in the
# embedded python interpreter is when the command completes, there
# will be a target with all of the files loaded at the locations
# described in the crash log. Only the files that have stack frames
# in the backtrace will be loaded unless the "--load-all" option
# has been specified. This allows users to explore the program in the
# state it was in right at crash time.
#
# On MacOSX csh, tcsh:
# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
#
# On MacOSX sh, bash:
# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
#----------------------------------------------------------------------
import lldb
import commands
import cmd
import glob
import optparse
import os
import plistlib
import pprint # pp = pprint.PrettyPrinter(indent=4); pp.pprint(command_args)
import re
import shlex
import string
import sys
import time
import uuid
from lldb.utils import symbolication
PARSE_MODE_NORMAL = 0
PARSE_MODE_THREAD = 1
PARSE_MODE_IMAGES = 2
PARSE_MODE_THREGS = 3
PARSE_MODE_SYSTEM = 4
class CrashLog(symbolication.Symbolicator):
"""Class that does parses darwin crash logs"""
thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
frame_regex = re.compile('^([0-9]+) +([^ ]+) *\t(0x[0-9a-fA-F]+) +(.*)')
image_regex_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^<]+)<([-0-9a-fA-F]+)> (.*)');
image_regex_no_uuid = re.compile('(0x[0-9a-fA-F]+)[- ]+(0x[0-9a-fA-F]+) +[+]?([^ ]+) +([^/]+)/(.*)');
empty_line_regex = re.compile('^$')
class Thread:
"""Class that represents a thread in a darwin crash log"""
def __init__(self, index):
self.index = index
self.frames = list()
self.registers = dict()
self.reason = None
self.queue = None
def dump(self, prefix):
print "%sThread[%u] %s" % (prefix, self.index, self.reason)
if self.frames:
print "%s Frames:" % (prefix)
for frame in self.frames:
frame.dump(prefix + ' ')
if self.registers:
print "%s Registers:" % (prefix)
for reg in self.registers.keys():
print "%s %-5s = %#16.16x" % (prefix, reg, self.registers[reg])
def did_crash(self):
return self.reason != None
def __str__(self):
s = "Thread[%u]" % self.index
if self.reason:
s += ' %s' % self.reason
return s
class Frame:
"""Class that represents a stack frame in a thread in a darwin crash log"""
def __init__(self, index, pc, description):
self.pc = pc
self.description = description
self.index = index
def __str__(self):
if self.description:
return "[%3u] 0x%16.16x %s" % (self.index, self.pc, self.description)
else:
return "[%3u] 0x%16.16x" % (self.index, self.pc)
def dump(self, prefix):
print "%s%s" % (prefix, str(self))
class DarwinImage(symbolication.Image):
"""Class that represents a binary images in a darwin crash log"""
dsymForUUIDBinary = os.path.expanduser('~rc/bin/dsymForUUID')
if not os.path.exists(dsymForUUIDBinary):
dsymForUUIDBinary = commands.getoutput('which dsymForUUID')
dwarfdump_uuid_regex = re.compile('UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
def __init__(self, text_addr_lo, text_addr_hi, identifier, version, uuid, path):
symbolication.Image.__init__(self, path, uuid);
self.add_section (symbolication.Section(text_addr_lo, text_addr_hi, "__TEXT"))
self.identifier = identifier
self.version = version
def locate_module_and_debug_symbols(self):
if self.resolved_path:
# Don't load a module twice...
return True
uuid_str = self.get_normalized_uuid_string()
print 'Getting symbols for %s %s...' % (uuid_str, self.path),
if os.path.exists(self.dsymForUUIDBinary):
dsym_for_uuid_command = '%s %s' % (self.dsymForUUIDBinary, uuid_str)
s = commands.getoutput(dsym_for_uuid_command)
if s:
plist_root = plistlib.readPlistFromString (s)
if plist_root:
plist = plist_root[uuid_str]
if plist:
if 'DBGArchitecture' in plist:
self.arch = plist['DBGArchitecture']
if 'DBGDSYMPath' in plist:
self.symfile = os.path.realpath(plist['DBGDSYMPath'])
if 'DBGSymbolRichExecutable' in plist:
self.resolved_path = os.path.expanduser (plist['DBGSymbolRichExecutable'])
if not self.resolved_path and os.path.exists(self.path):
dwarfdump_cmd_output = commands.getoutput('dwarfdump --uuid "%s"' % self.path)
self_uuid = self.get_uuid()
for line in dwarfdump_cmd_output.splitlines():
match = self.dwarfdump_uuid_regex.search (line)
if match:
dwarf_uuid_str = match.group(1)
dwarf_uuid = uuid.UUID(dwarf_uuid_str)
if self_uuid == dwarf_uuid:
self.resolved_path = self.path
self.arch = match.group(2)
break;
if not self.resolved_path:
print "error: file %s '%s' doesn't match the UUID in the installed file" % (uuid_str, self.path)
return False
if (self.resolved_path and os.path.exists(self.resolved_path)) or (self.path and os.path.exists(self.path)):
print 'ok'
# if self.resolved_path:
# print ' exe = "%s"' % self.resolved_path
# if self.symfile:
# print ' dsym = "%s"' % self.symfile
return True
return False
def __init__(self, path):
"""CrashLog constructor that take a path to a darwin crash log file"""
symbolication.Symbolicator.__init__(self);
self.path = os.path.expanduser(path);
self.info_lines = list()
self.system_profile = list()
self.threads = list()
self.idents = list() # A list of the required identifiers for doing all stack backtraces
self.crashed_thread_idx = -1
self.version = -1
self.error = None
# With possible initial component of ~ or ~user replaced by that user's home directory.
try:
f = open(self.path)
except IOError:
self.error = 'error: cannot open "%s"' % self.path
return
self.file_lines = f.read().splitlines()
parse_mode = PARSE_MODE_NORMAL
thread = None
for line in self.file_lines:
# print line
line_len = len(line)
if line_len == 0:
if thread:
if parse_mode == PARSE_MODE_THREAD:
if thread.index == self.crashed_thread_idx:
thread.reason = ''
if self.thread_exception:
thread.reason += self.thread_exception
if self.thread_exception_data:
thread.reason += " (%s)" % self.thread_exception_data
self.threads.append(thread)
thread = None
else:
# only append an extra empty line if the previous line
# in the info_lines wasn't empty
if len(self.info_lines) > 0 and len(self.info_lines[-1]):
self.info_lines.append(line)
parse_mode = PARSE_MODE_NORMAL
# print 'PARSE_MODE_NORMAL'
elif parse_mode == PARSE_MODE_NORMAL:
if line.startswith ('Process:'):
(self.process_name, pid_with_brackets) = line[8:].strip().split()
self.process_id = pid_with_brackets.strip('[]')
elif line.startswith ('Path:'):
self.process_path = line[5:].strip()
elif line.startswith ('Identifier:'):
self.process_identifier = line[11:].strip()
elif line.startswith ('Version:'):
version_string = line[8:].strip()
matched_pair = re.search("(.+)\((.+)\)", version_string)
if matched_pair:
self.process_version = matched_pair.group(1)
self.process_compatability_version = matched_pair.group(2)
else:
self.process = version_string
self.process_compatability_version = version_string
elif line.startswith ('Parent Process:'):
(self.parent_process_name, pid_with_brackets) = line[15:].strip().split()
self.parent_process_id = pid_with_brackets.strip('[]')
elif line.startswith ('Exception Type:'):
self.thread_exception = line[15:].strip()
continue
elif line.startswith ('Exception Codes:'):
self.thread_exception_data = line[16:].strip()
continue
elif line.startswith ('Crashed Thread:'):
self.crashed_thread_idx = int(line[15:].strip().split()[0])
continue
elif line.startswith ('Report Version:'):
self.version = int(line[15:].strip())
continue
elif line.startswith ('System Profile:'):
parse_mode = PARSE_MODE_SYSTEM
continue
elif (line.startswith ('Interval Since Last Report:') or
line.startswith ('Crashes Since Last Report:') or
line.startswith ('Per-App Interval Since Last Report:') or
line.startswith ('Per-App Crashes Since Last Report:') or
line.startswith ('Sleep/Wake UUID:') or
line.startswith ('Anonymous UUID:')):
# ignore these
continue
elif line.startswith ('Thread'):
thread_state_match = self.thread_state_regex.search (line)
if thread_state_match:
thread_state_match = self.thread_regex.search (line)
thread_idx = int(thread_state_match.group(1))
parse_mode = PARSE_MODE_THREGS
thread = self.threads[thread_idx]
else:
thread_match = self.thread_regex.search (line)
if thread_match:
# print 'PARSE_MODE_THREAD'
parse_mode = PARSE_MODE_THREAD
thread_idx = int(thread_match.group(1))
thread = CrashLog.Thread(thread_idx)
continue
elif line.startswith ('Binary Images:'):
parse_mode = PARSE_MODE_IMAGES
continue
self.info_lines.append(line.strip())
elif parse_mode == PARSE_MODE_THREAD:
if line.startswith ('Thread'):
continue
frame_match = self.frame_regex.search(line)
if frame_match:
ident = frame_match.group(2)
if not ident in self.idents:
self.idents.append(ident)
thread.frames.append (CrashLog.Frame(int(frame_match.group(1)), int(frame_match.group(3), 0), frame_match.group(4)))
else:
print 'error: frame regex failed for line: "%s"' % line
elif parse_mode == PARSE_MODE_IMAGES:
image_match = self.image_regex_uuid.search (line)
if image_match:
image = CrashLog.DarwinImage (int(image_match.group(1),0),
int(image_match.group(2),0),
image_match.group(3).strip(),
image_match.group(4).strip(),
uuid.UUID(image_match.group(5)),
image_match.group(6))
self.images.append (image)
else:
image_match = self.image_regex_no_uuid.search (line)
if image_match:
image = CrashLog.DarwinImage (int(image_match.group(1),0),
int(image_match.group(2),0),
image_match.group(3).strip(),
image_match.group(4).strip(),
None,
image_match.group(5))
self.images.append (image)
else:
print "error: image regex failed for: %s" % line
elif parse_mode == PARSE_MODE_THREGS:
stripped_line = line.strip()
reg_values = re.split(' +', stripped_line);
for reg_value in reg_values:
#print 'reg_value = "%s"' % reg_value
(reg, value) = reg_value.split(': ')
#print 'reg = "%s"' % reg
#print 'value = "%s"' % value
thread.registers[reg.strip()] = int(value, 0)
elif parse_mode == PARSE_MODE_SYSTEM:
self.system_profile.append(line)
f.close()
def dump(self):
print "Crash Log File: %s" % (self.path)
print "\nThreads:"
for thread in self.threads:
thread.dump(' ')
print "\nImages:"
for image in self.images:
image.dump(' ')
def find_image_with_identifier(self, identifier):
for image in self.images:
if image.identifier == identifier:
return image
return None
def create_target(self):
#print 'crashlog.create_target()...'
target = symbolication.Symbolicator.create_target(self)
if target:
return target
# We weren't able to open the main executable as, but we can still symbolicate
print 'crashlog.create_target()...2'
if self.idents:
for ident in self.idents:
image = self.find_image_with_identifier (ident)
if image:
target = image.create_target ()
if target:
return target # success
print 'crashlog.create_target()...3'
for image in self.images:
target = image.create_target ()
if target:
return target # success
print 'crashlog.create_target()...4'
print 'error: unable to locate any executables from the crash log'
return None
def usage():
print "Usage: lldb-symbolicate.py [-n name] executable-image"
sys.exit(0)
class Interactive(cmd.Cmd):
'''Interactive prompt for analyzing one or more Darwin crash logs, type "help" to see a list of supported commands.'''
image_option_parser = None
def __init__(self, crash_logs):
cmd.Cmd.__init__(self)
self.intro = 'Interactive crashlogs prompt, type "help" to see a list of supported commands.'
self.crash_logs = crash_logs
self.prompt = '% '
def default(self, line):
'''Catch all for unknown command, which will exit the interpreter.'''
print "uknown command: %s" % line
return True
def do_q(self, line):
'''Quit command'''
return True
def do_quit(self, line):
'''Quit command'''
return True
def do_symbolicate(self, line):
description='''Symbolicate one or more darwin crash log files by index to provide source file and line information,
inlined stack frames back to the concrete functions, and disassemble the location of the crash
for the first frame of the crashed thread.'''
option_parser = CreateSymbolicateCrashLogOptions ('symbolicate', description, False)
command_args = shlex.split(line)
try:
(options, args) = option_parser.parse_args(command_args)
except:
return
for idx_str in args:
idx = int(idx_str)
if idx < len(self.crash_logs):
SymbolicateCrashLog (self.crash_logs[idx], options)
else:
print 'error: crash log index %u is out of range' % (idx)
def do_list(self, line=None):
'''Dump a list of all crash logs that are currently loaded.
USAGE: list'''
print '%u crash logs are loaded:' % len(self.crash_logs)
for (crash_log_idx, crash_log) in enumerate(self.crash_logs):
print '[%u] = %s' % (crash_log_idx, crash_log.path)
def do_image(self, line):
'''Dump information about an image in the crash log given an image basename.
USAGE: image <basename>'''
usage = "usage: %prog [options] <PATH> [PATH ...]"
description='''Dump information about one or more images in all crash logs. The <PATH>
can be a full path or a image basename.'''
command_args = shlex.split(line)
if not self.image_option_parser:
self.image_option_parser = optparse.OptionParser(description=description, prog='image',usage=usage)
self.image_option_parser.add_option('-a', '--all', action='store_true', help='show all images', default=False)
try:
(options, args) = self.image_option_parser.parse_args(command_args)
except:
return
if args:
for image_path in args:
fullpath_search = image_path[0] == '/'
for crash_log in self.crash_logs:
matches_found = 0
for (image_idx, image) in enumerate(crash_log.images):
if fullpath_search:
if image.get_resolved_path() == image_path:
matches_found += 1
print image
else:
image_basename = image.get_resolved_path_basename()
if image_basename == image_path:
matches_found += 1
print image
if matches_found == 0:
for (image_idx, image) in enumerate(crash_log.images):
resolved_image_path = image.get_resolved_path()
if resolved_image_path and string.find(image.get_resolved_path(), image_path) >= 0:
print image
else:
for crash_log in self.crash_logs:
for (image_idx, image) in enumerate(crash_log.images):
print '[%u] %s' % (image_idx, image)
return False
def interactive_crashlogs(options, args):
crash_log_files = list()
for arg in args:
for resolved_path in glob.glob(arg):
crash_log_files.append(resolved_path)
crash_logs = list();
for crash_log_file in crash_log_files:
#print 'crash_log_file = "%s"' % crash_log_file
crash_log = CrashLog(crash_log_file)
if crash_log.error:
print crash_log.error
continue
if options.verbose:
crash_log.dump()
if not crash_log.images:
print 'error: no images in crash log "%s"' % (crash_log)
continue
else:
crash_logs.append(crash_log)
interpreter = Interactive(crash_logs)
# List all crash logs that were imported
interpreter.do_list()
interpreter.cmdloop()
def Symbolicate(debugger, command, result, dict):
print 'def Symbolicate(debugger, command, result, dict): called with "%s"' % (command)
try:
SymbolicateCrashLogs (shlex.split(command))
except:
result.PutCString ("error: python exception %s" % sys.exc_info()[0])
def SymbolicateCrashLog(crash_log, options):
if crash_log.error:
print crash_log.error
return
if options.verbose:
crash_log.dump()
if not crash_log.images:
print 'error: no images in crash log'
return
target = crash_log.create_target ()
if not target:
return
exe_module = target.GetModuleAtIndex(0)
images_to_load = list()
loaded_images = list()
if options.load_all_images:
# --load-all option was specified, load everything up
for image in crash_log.images:
images_to_load.append(image)
else:
# Only load the images found in stack frames for the crashed threads
for ident in crash_log.idents:
images = crash_log.find_images_with_identifier (ident)
if images:
for image in images:
images_to_load.append(image)
else:
print 'error: can\'t find image for identifier "%s"' % ident
for image in images_to_load:
if image in loaded_images:
print "warning: skipping %s loaded at %#16.16x duplicate entry (probably commpage)" % (image.path, image.text_addr_lo)
else:
err = image.add_module (target)
if err:
print err
else:
#print 'loaded %s' % image
loaded_images.append(image)
for thread in crash_log.threads:
this_thread_crashed = thread.did_crash()
if options.crashed_only and this_thread_crashed == False:
continue
print "%s" % thread
#prev_frame_index = -1
for frame_idx, frame in enumerate(thread.frames):
disassemble = (this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth;
if frame_idx == 0:
symbolicated_frame_addresses = crash_log.symbolicate (frame.pc)
else:
# Any frame above frame zero and we have to subtract one to get the previous line entry
symbolicated_frame_addresses = crash_log.symbolicate (frame.pc - 1)
if symbolicated_frame_addresses:
symbolicated_frame_address_idx = 0
for symbolicated_frame_address in symbolicated_frame_addresses:
print '[%3u] %s' % (frame_idx, symbolicated_frame_address)
if symbolicated_frame_address_idx == 0:
if disassemble:
instructions = symbolicated_frame_address.get_instructions()
if instructions:
print
symbolication.disassemble_instructions (target,
instructions,
frame.pc,
options.disassemble_before,
options.disassemble_after, frame.index > 0)
print
symbolicated_frame_address_idx += 1
else:
print frame
print
if options.dump_image_list:
print "Binary Images:"
for image in crash_log.images:
print image
def CreateSymbolicateCrashLogOptions(command_name, description, add_interactive_options):
usage = "usage: %prog [options] <FILE> [FILE ...]"
option_parser = optparse.OptionParser(description=description, prog='crashlog',usage=usage)
option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
option_parser.add_option('-a', '--load-all', action='store_true', dest='load_all_images', help='load all executable images, not just the images found in the crashed stack frames', default=False)
option_parser.add_option('--images', action='store_true', dest='dump_image_list', help='show image list', default=False)
option_parser.add_option('-g', '--debug-delay', type='int', dest='debug_delay', metavar='NSEC', help='pause for NSEC seconds for debugger', default=0)
option_parser.add_option('-c', '--crashed-only', action='store_true', dest='crashed_only', help='only symbolicate the crashed thread', default=False)
option_parser.add_option('-d', '--disasm-depth', type='int', dest='disassemble_depth', help='set the depth in stack frames that should be disassembled (default is 1)', default=1)
option_parser.add_option('-D', '--disasm-all', action='store_true', dest='disassemble_all_threads', help='enabled disassembly of frames on all threads (not just the crashed thread)', default=False)
option_parser.add_option('-B', '--disasm-before', type='int', dest='disassemble_before', help='the number of instructions to disassemble before the frame PC', default=4)
option_parser.add_option('-A', '--disasm-after', type='int', dest='disassemble_after', help='the number of instructions to disassemble after the frame PC', default=4)
if add_interactive_options:
option_parser.add_option('-i', '--interactive', action='store_true', help='parse all crash logs and enter interactive mode', default=False)
return option_parser
def SymbolicateCrashLogs(command_args):
description='''Symbolicate one or more darwin crash log files to provide source file and line information,
inlined stack frames back to the concrete functions, and disassemble the location of the crash
for the first frame of the crashed thread.
If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
you to explore the program as if it were stopped at the locations described in the crash log and functions can
be disassembled and lookups can be performed using the addresses found in the crash log.'''
option_parser = CreateSymbolicateCrashLogOptions ('crashlog', description, True)
try:
(options, args) = option_parser.parse_args(command_args)
except:
return
if options.verbose:
print 'command_args = %s' % command_args
print 'options', options
print 'args', args
if options.debug_delay > 0:
print "Waiting %u seconds for debugger to attach..." % options.debug_delay
time.sleep(options.debug_delay)
error = lldb.SBError()
if args:
if options.interactive:
interactive_crashlogs(options, args)
else:
for crash_log_file in args:
crash_log = CrashLog(crash_log_file)
SymbolicateCrashLog (crash_log, options)
if __name__ == '__main__':
# Create a new debugger instance
print 'main'
lldb.debugger = lldb.SBDebugger.Create()
SymbolicateCrashLogs (sys.argv[1:])
elif getattr(lldb, 'debugger', None):
lldb.debugger.HandleCommand('command script add -f lldb.macosx.crashlog.Symbolicate crashlog')
print '"crashlog" command installed, type "crashlog --help" for detailed help'