[crashlog] Move crash log parsing into its own class

Move crash log parsing out of the CrashLog class and into its own class
and add more tests.

Differential revision: https://reviews.llvm.org/D90664
This commit is contained in:
Jonas Devlieghere 2020-11-02 20:29:32 -08:00
parent af9bf14e6b
commit 4b84682044
4 changed files with 232 additions and 221 deletions

View File

@ -71,41 +71,7 @@ def read_plist(s):
else:
return plistlib.readPlistFromString(s)
class CrashLogParseMode:
NORMAL = 0
THREAD = 1
IMAGES = 2
THREGS = 3
SYSTEM = 4
INSTRS = 5
class CrashLog(symbolication.Symbolicator):
"""Class that does parses darwin crash logs"""
parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
app_backtrace_regex = re.compile(
'^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
frame_regex = re.compile(r'^([0-9]+)' r'\s' # id
r'+(.+?)' r'\s+' # img_name
r'(' +version+ r')?' # img_version
r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
r' +(.*)' # offs
)
null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo
r'\s+' '-' r'\s+' # -
r'(0x[0-9a-fA-F]+)' r'\s+' # img_hi
r'[+]?(.+?)' r'\s+' # img_name
r'(' +version+ ')?' # img_version
r'(<([-0-9a-fA-F]+)>\s+)?' # img_uuid
r'(/.*)' # img_path
)
empty_line_regex = re.compile('^$')
class Thread:
"""Class that represents a thread in a darwin crash log"""
@ -355,180 +321,8 @@ class CrashLog(symbolication.Symbolicator):
self.idents = list() # A list of the required identifiers for doing all stack backtraces
self.crashed_thread_idx = -1
self.version = -1
self.error = None
self.target = None
self.verbose = verbose
# With possible initial component of ~ or ~user replaced by that user's
# home directory.
try:
f = open(self.path)
except IOError:
self.error = 'error: cannot open "%s"' % self.path
return
self.file_lines = f.read().splitlines()
parse_mode = CrashLogParseMode.NORMAL
thread = None
app_specific_backtrace = False
for line in self.file_lines:
# print line
line_len = len(line)
if line_len == 0:
if thread:
if parse_mode == CrashLogParseMode.THREAD:
if thread.index == self.crashed_thread_idx:
thread.reason = ''
if self.thread_exception:
thread.reason += self.thread_exception
if self.thread_exception_data:
thread.reason += " (%s)" % self.thread_exception_data
if app_specific_backtrace:
self.backtraces.append(thread)
else:
self.threads.append(thread)
thread = None
else:
# only append an extra empty line if the previous line
# in the info_lines wasn't empty
if len(self.info_lines) > 0 and len(self.info_lines[-1]):
self.info_lines.append(line)
parse_mode = CrashLogParseMode.NORMAL
elif parse_mode == CrashLogParseMode.NORMAL:
if line.startswith('Process:'):
(self.process_name, pid_with_brackets) = line[
8:].strip().split(' [')
self.process_id = pid_with_brackets.strip('[]')
elif line.startswith('Path:'):
self.process_path = line[5:].strip()
elif line.startswith('Identifier:'):
self.process_identifier = line[11:].strip()
elif line.startswith('Version:'):
version_string = line[8:].strip()
matched_pair = re.search("(.+)\((.+)\)", version_string)
if matched_pair:
self.process_version = matched_pair.group(1)
self.process_compatability_version = matched_pair.group(
2)
else:
self.process = version_string
self.process_compatability_version = version_string
elif self.parent_process_regex.search(line):
parent_process_match = self.parent_process_regex.search(
line)
self.parent_process_name = parent_process_match.group(1)
self.parent_process_id = parent_process_match.group(2)
elif line.startswith('Exception Type:'):
self.thread_exception = line[15:].strip()
continue
elif line.startswith('Exception Codes:'):
self.thread_exception_data = line[16:].strip()
continue
elif line.startswith('Exception Subtype:'): # iOS
self.thread_exception_data = line[18:].strip()
continue
elif line.startswith('Crashed Thread:'):
self.crashed_thread_idx = int(line[15:].strip().split()[0])
continue
elif line.startswith('Triggered by Thread:'): # iOS
self.crashed_thread_idx = int(line[20:].strip().split()[0])
continue
elif line.startswith('Report Version:'):
self.version = int(line[15:].strip())
continue
elif line.startswith('System Profile:'):
parse_mode = CrashLogParseMode.SYSTEM
continue
elif (line.startswith('Interval Since Last Report:') or
line.startswith('Crashes Since Last Report:') or
line.startswith('Per-App Interval Since Last Report:') or
line.startswith('Per-App Crashes Since Last Report:') or
line.startswith('Sleep/Wake UUID:') or
line.startswith('Anonymous UUID:')):
# ignore these
continue
elif line.startswith('Thread'):
thread_state_match = self.thread_state_regex.search(line)
if thread_state_match:
app_specific_backtrace = False
thread_state_match = self.thread_regex.search(line)
thread_idx = int(thread_state_match.group(1))
parse_mode = CrashLogParseMode.THREGS
thread = self.threads[thread_idx]
continue
thread_insts_match = self.thread_instrs_regex.search(line)
if thread_insts_match:
parse_mode = CrashLogParseMode.INSTRS
continue
thread_match = self.thread_regex.search(line)
if thread_match:
app_specific_backtrace = False
parse_mode = CrashLogParseMode.THREAD
thread_idx = int(thread_match.group(1))
thread = CrashLog.Thread(thread_idx, False)
continue
continue
elif line.startswith('Binary Images:'):
parse_mode = CrashLogParseMode.IMAGES
continue
elif line.startswith('Application Specific Backtrace'):
app_backtrace_match = self.app_backtrace_regex.search(line)
if app_backtrace_match:
parse_mode = CrashLogParseMode.THREAD
app_specific_backtrace = True
idx = int(app_backtrace_match.group(1))
thread = CrashLog.Thread(idx, True)
elif line.startswith('Last Exception Backtrace:'): # iOS
parse_mode = CrashLogParseMode.THREAD
app_specific_backtrace = True
idx = 1
thread = CrashLog.Thread(idx, True)
self.info_lines.append(line.strip())
elif parse_mode == CrashLogParseMode.THREAD:
if line.startswith('Thread'):
continue
if self.null_frame_regex.search(line):
print('warning: thread parser ignored null-frame: "%s"' % line)
continue
frame_match = self.frame_regex.search(line)
if frame_match:
(frame_id, frame_img_name, _, frame_img_version, _,
frame_addr, frame_ofs) = frame_match.groups()
ident = frame_img_name
thread.add_ident(ident)
if ident not in self.idents:
self.idents.append(ident)
thread.frames.append(CrashLog.Frame(int(frame_id), int(
frame_addr, 0), frame_ofs))
else:
print('error: frame regex failed for line: "%s"' % line)
elif parse_mode == CrashLogParseMode.IMAGES:
image_match = self.image_regex_uuid.search(line)
if image_match:
(img_lo, img_hi, img_name, _, img_version, _,
_, img_uuid, img_path) = image_match.groups()
image = CrashLog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
img_name.strip(),
img_version.strip()
if img_version else "",
uuid.UUID(img_uuid), img_path,
self.verbose)
self.images.append(image)
else:
print("error: image regex failed for: %s" % line)
elif parse_mode == CrashLogParseMode.THREGS:
stripped_line = line.strip()
# "r12: 0x00007fff6b5939c8 r13: 0x0000000007000006 r14: 0x0000000000002a03 r15: 0x0000000000000c00"
reg_values = re.findall(
'([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
for reg_value in reg_values:
(reg, value) = reg_value.split(': ')
thread.registers[reg.strip()] = int(value, 0)
elif parse_mode == CrashLogParseMode.SYSTEM:
self.system_profile.append(line)
elif parse_mode == CrashLogParseMode.INSTRS:
pass
f.close()
def dump(self):
print("Crash Log File: %s" % (self.path))
@ -584,6 +378,213 @@ class CrashLog(symbolication.Symbolicator):
return self.target
class CrashLogParseMode:
NORMAL = 0
THREAD = 1
IMAGES = 2
THREGS = 3
SYSTEM = 4
INSTRS = 5
class CrashLogParser:
parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
frame_regex = re.compile(r'^([0-9]+)' r'\s' # id
r'+(.+?)' r'\s+' # img_name
r'(' +version+ r')?' # img_version
r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
r' +(.*)' # offs
)
null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo
r'\s+' '-' r'\s+' # -
r'(0x[0-9a-fA-F]+)' r'\s+' # img_hi
r'[+]?(.+?)' r'\s+' # img_name
r'(' +version+ ')?' # img_version
r'(<([-0-9a-fA-F]+)>\s+)?' # img_uuid
r'(/.*)' # img_path
)
def __init__(self, path, verbose):
self.path = os.path.expanduser(path)
self.verbose = verbose
self.parse_mode = CrashLogParseMode.NORMAL
self.thread = None
self.app_specific_backtrace = False
self.crashlog = CrashLog(self.path, self.verbose)
def parse(self):
with open(self.path,'r') as f:
lines = f.read().splitlines()
for line in lines:
line_len = len(line)
if line_len == 0:
if self.thread:
if self.parse_mode == CrashLogParseMode.THREAD:
if self.thread.index == self.crashlog.crashed_thread_idx:
self.thread.reason = ''
if self.crashlog.thread_exception:
self.thread.reason += self.crashlog.thread_exception
if self.crashlog.thread_exception_data:
self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
if self.app_specific_backtrace:
self.crashlog.backtraces.append(self.thread)
else:
self.crashlog.threads.append(self.thread)
self.thread = None
else:
# only append an extra empty line if the previous line
# in the info_lines wasn't empty
if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
self.crashlog.info_lines.append(line)
self.parse_mode = CrashLogParseMode.NORMAL
elif self.parse_mode == CrashLogParseMode.NORMAL:
if line.startswith('Process:'):
(self.crashlog.process_name, pid_with_brackets) = line[
8:].strip().split(' [')
self.crashlog.process_id = pid_with_brackets.strip('[]')
elif line.startswith('Path:'):
self.crashlog.process_path = line[5:].strip()
elif line.startswith('Identifier:'):
self.crashlog.process_identifier = line[11:].strip()
elif line.startswith('Version:'):
version_string = line[8:].strip()
matched_pair = re.search("(.+)\((.+)\)", version_string)
if matched_pair:
self.crashlog.process_version = matched_pair.group(1)
self.crashlog.process_compatability_version = matched_pair.group(
2)
else:
self.crashlog.process = version_string
self.crashlog.process_compatability_version = version_string
elif self.parent_process_regex.search(line):
parent_process_match = self.parent_process_regex.search(
line)
self.crashlog.parent_process_name = parent_process_match.group(1)
self.crashlog.parent_process_id = parent_process_match.group(2)
elif line.startswith('Exception Type:'):
self.crashlog.thread_exception = line[15:].strip()
continue
elif line.startswith('Exception Codes:'):
self.crashlog.thread_exception_data = line[16:].strip()
continue
elif line.startswith('Exception Subtype:'): # iOS
self.crashlog.thread_exception_data = line[18:].strip()
continue
elif line.startswith('Crashed Thread:'):
self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
continue
elif line.startswith('Triggered by Thread:'): # iOS
self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
continue
elif line.startswith('Report Version:'):
self.crashlog.version = int(line[15:].strip())
continue
elif line.startswith('System Profile:'):
self.parse_mode = CrashLogParseMode.SYSTEM
continue
elif (line.startswith('Interval Since Last Report:') or
line.startswith('Crashes Since Last Report:') or
line.startswith('Per-App Interval Since Last Report:') or
line.startswith('Per-App Crashes Since Last Report:') or
line.startswith('Sleep/Wake UUID:') or
line.startswith('Anonymous UUID:')):
# ignore these
continue
elif line.startswith('Thread'):
thread_state_match = self.thread_state_regex.search(line)
if thread_state_match:
self.app_specific_backtrace = False
thread_state_match = self.thread_regex.search(line)
thread_idx = int(thread_state_match.group(1))
self.parse_mode = CrashLogParseMode.THREGS
self.thread = self.crashlog.threads[thread_idx]
continue
thread_insts_match = self.thread_instrs_regex.search(line)
if thread_insts_match:
self.parse_mode = CrashLogParseMode.INSTRS
continue
thread_match = self.thread_regex.search(line)
if thread_match:
self.app_specific_backtrace = False
self.parse_mode = CrashLogParseMode.THREAD
thread_idx = int(thread_match.group(1))
self.thread = self.crashlog.Thread(thread_idx, False)
continue
continue
elif line.startswith('Binary Images:'):
self.parse_mode = CrashLogParseMode.IMAGES
continue
elif line.startswith('Application Specific Backtrace'):
app_backtrace_match = self.app_backtrace_regex.search(line)
if app_backtrace_match:
self.parse_mode = CrashLogParseMode.THREAD
self.app_specific_backtrace = True
idx = int(app_backtrace_match.group(1))
self.thread = self.crashlog.Thread(idx, True)
elif line.startswith('Last Exception Backtrace:'): # iOS
self.parse_mode = CrashLogParseMode.THREAD
self.app_specific_backtrace = True
idx = 1
self.thread = self.crashlog.Thread(idx, True)
self.crashlog.info_lines.append(line.strip())
elif self.parse_mode == CrashLogParseMode.THREAD:
if line.startswith('Thread'):
continue
if self.null_frame_regex.search(line):
print('warning: thread parser ignored null-frame: "%s"' % line)
continue
frame_match = self.frame_regex.search(line)
if frame_match:
(frame_id, frame_img_name, _, frame_img_version, _,
frame_addr, frame_ofs) = frame_match.groups()
ident = frame_img_name
self.thread.add_ident(ident)
if ident not in self.crashlog.idents:
self.crashlog.idents.append(ident)
self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
frame_addr, 0), frame_ofs))
else:
print('error: frame regex failed for line: "%s"' % line)
elif self.parse_mode == CrashLogParseMode.IMAGES:
image_match = self.image_regex_uuid.search(line)
if image_match:
(img_lo, img_hi, img_name, _, img_version, _,
_, img_uuid, img_path) = image_match.groups()
image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
img_name.strip(),
img_version.strip()
if img_version else "",
uuid.UUID(img_uuid), img_path,
self.verbose)
self.crashlog.images.append(image)
else:
print("error: image regex failed for: %s" % line)
elif self.parse_mode == CrashLogParseMode.THREGS:
stripped_line = line.strip()
# "r12: 0x00007fff6b5939c8 r13: 0x0000000007000006 r14: 0x0000000000002a03 r15: 0x0000000000000c00"
reg_values = re.findall(
'([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
for reg_value in reg_values:
(reg, value) = reg_value.split(': ')
self.thread.registers[reg.strip()] = int(value, 0)
elif self.parse_mode == CrashLogParseMode.SYSTEM:
self.crashlog.system_profile.append(line)
elif self.parse_mode == CrashLogParseMode.INSTRS:
pass
return self.crashlog
def usage():
print("Usage: lldb-symbolicate.py [-n name] executable-image")
sys.exit(0)
@ -702,9 +703,10 @@ def interactive_crashlogs(options, args):
crash_logs = list()
for crash_log_file in crash_log_files:
crash_log = CrashLog(crash_log_file, options.verbose)
if crash_log.error:
print(crash_log.error)
try:
crash_log = CrashLogParser(crash_log_file, options.verbose).parse()
except Exception as e:
print(e)
continue
if options.debug:
crash_log.dump()
@ -836,9 +838,6 @@ def Symbolicate(debugger, command, result, dict):
def SymbolicateCrashLog(crash_log, options):
if crash_log.error:
print(crash_log.error)
return
if options.debug:
crash_log.dump()
if not crash_log.images:
@ -1040,7 +1039,8 @@ be disassembled and lookups can be performed using the addresses found in the cr
interactive_crashlogs(options, args)
else:
for crash_log_file in args:
crash_log = CrashLog(crash_log_file, options.verbose)
crash_log_parser = CrashLogParser(crash_log_file, options.verbose)
crash_log = crash_log_parser.parse()
SymbolicateCrashLog(crash_log, options)
if __name__ == '__main__':
# Create a new debugger instance
@ -1052,4 +1052,3 @@ elif getattr(lldb, 'debugger', None):
'command script add -f lldb.macosx.crashlog.Symbolicate crashlog')
lldb.debugger.HandleCommand(
'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
print('"crashlog" and "save_crashlog" command installed, use the "--help" option for detailed help')

View File

@ -1,12 +1,11 @@
# -*- python -*-
# REQUIRES: system-darwin
# UNSUPPORTED: lldb-repro
# DEBUG: cd %S/../../../../examples/python && cat %s | %lldb && false
# RUN: cd %S/../../../../examples/python && cat %s | %lldb | FileCheck %s
# DEBUG: cd %S/../../../../../examples/python && cat %s | %lldb && false
# RUN: cd %S/../../../../../examples/python && cat %s | %lldb | FileCheck %s
# CHECK-LABEL: {{S}}KIP BEYOND CHECKS
script
import crashlog
cl = crashlog.CrashLog
crash_log_parser = crashlog.CrashLogParser
crash_log = crashlog.CrashLog
images = [
"0x10b60b000 - 0x10f707fff com.apple.LLDB.framework (1.1000.11.38.2 - 1000.11.38.2) <96E36F5C-1A83-39A1-8713-5FDD9701C3F1> /Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/LLDB",
# CHECK: 0x10b60b000
@ -120,7 +119,7 @@ print("SKIP BEYOND CHECKS")
for image in images:
print('"%s"'%image)
print("--------------")
match = cl.image_regex_uuid.search(image)
match = crash_log_parser.image_regex_uuid.search(image)
for group in match.groups():
print(group)
@ -128,7 +127,7 @@ print("FRAMES")
for frame in frames:
print('"%s"'%frame)
print("--------------")
match = cl.frame_regex.search(frame)
match = crash_log_parser.frame_regex.search(frame)
for group in match.groups():
print(group)

View File

@ -0,0 +1,8 @@
# RUN: echo "quit" | %S/../../../../../examples/python/crashlog.py -i %s 2>&1 | FileCheck %s
# CHECK: 1 crash logs are loaded:
# CHECK: [0] = {{.*}}interactive.test
# CHECK: Interactive crashlogs prompt, type "help" to see a list of supported commands.
Binary Images:
0x10ab87000 - 0x10abdafff +lldb (10.0.0) <87BD1384-BAE9-3625-A838-9D241CBAEF87> /Volumes/VOLUME/*/lldb
0x10ac45000 - 0x10ae94fff com.apple.python3 (3.8.2 - 3.8.2) <20BC3FC4-CAAD-3002-ACDF-423A3188F24C> /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/Python3

View File

@ -0,0 +1,5 @@
if 'system-darwin' not in config.available_features:
config.unsupported = True
if 'lldb-repro' in config.available_features:
config.unsupported = True