From c7cbf32f5770c5eec2a2dd6eb7cf3153e654ed08 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 16 Nov 2020 13:46:44 -0800 Subject: [PATCH] [crashlog] Implement parser for JSON encoded crashlogs Add a parser for JSON crashlogs. The CrashLogParser now defers to either the JSONCrashLogParser or the TextCrashLogParser. It first tries to interpret the input as JSON, and if that fails falling back to the textual parser. Differential revision: https://reviews.llvm.org/D91130 --- lldb/examples/python/crashlog.py | 131 +++++++++++++++++- .../Python/Crashlog/Inputs/Assertion.check | 1 + .../Python/Crashlog/Inputs/a.out.crash | 49 +++++++ .../Python/Crashlog/Inputs/a.out.ips | 96 +++++++++++++ .../Python/Crashlog/Inputs/test.c | 8 ++ .../Python/Crashlog/json.test | 10 ++ .../Python/Crashlog/parser_json.test | 45 ++++++ .../{crashlog.test => parser_text.test} | 2 +- .../Python/Crashlog/patch-crashlog.py | 60 ++++++++ .../Python/Crashlog/text.test | 10 ++ 10 files changed, 407 insertions(+), 5 deletions(-) create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/Assertion.check create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.ips create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/test.c create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_json.test rename lldb/test/Shell/ScriptInterpreter/Python/Crashlog/{crashlog.test => parser_text.test} (98%) create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/patch-crashlog.py create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 7d4de925628c..dbd9cea62e5d 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -41,6 +41,7 @@ import subprocess import sys import time import uuid +import json try: # First try for LLDB in case PYTHONPATH is already correctly setup. @@ -378,6 +379,129 @@ class CrashLog(symbolication.Symbolicator): return self.target +class CrashLogFormatException(Exception): + pass + + +class CrashLogParser: + def parse(self, debugger, path, verbose): + try: + return JSONCrashLogParser(debugger, path, verbose).parse() + except CrashLogFormatException: + return TextCrashLogParser(debugger, path, verbose).parse() + + +class JSONCrashLogParser: + def __init__(self, debugger, path, verbose): + self.path = os.path.expanduser(path) + self.verbose = verbose + self.crashlog = CrashLog(debugger, self.path, self.verbose) + + def parse(self): + with open(self.path, 'r') as f: + buffer = f.read() + + # First line is meta-data. + buffer = buffer[buffer.index('\n') + 1:] + + try: + self.data = json.loads(buffer) + except ValueError: + raise CrashLogFormatException() + + self.parse_process_info(self.data) + self.parse_images(self.data['usedImages']) + self.parse_threads(self.data['threads']) + + thread = self.crashlog.threads[self.crashlog.crashed_thread_idx] + thread.reason = self.parse_crash_reason(self.data['exception']) + thread.registers = self.parse_thread_registers(self.data['threadState']) + + return self.crashlog + + def get_image_extra_info(self, idx): + return self.data['legacyInfo']['imageExtraInfo'][idx] + + def get_used_image(self, idx): + return self.data['usedImages'][idx] + + def parse_process_info(self, json_data): + self.crashlog.process_id = json_data['pid'] + self.crashlog.process_identifier = json_data['procName'] + self.crashlog.process_path = json_data['procPath'] + + def parse_crash_reason(self, json_exception): + exception_type = json_exception['type'] + exception_signal = json_exception['signal'] + if 'codes' in json_exception: + exception_extra = " ({})".format(json_exception['codes']) + elif 'subtype' in json_exception: + exception_extra = " ({})".format(json_exception['subtype']) + else: + exception_extra = "" + return "{} ({}){}".format(exception_type, exception_signal, + exception_extra) + + def parse_images(self, json_images): + idx = 0 + for json_images in json_images: + img_uuid = uuid.UUID(json_images[0]) + low = int(json_images[1]) + high = 0 + extra_info = self.get_image_extra_info(idx) + name = extra_info['name'] + path = extra_info['path'] + version = "" + darwin_image = self.crashlog.DarwinImage(low, high, name, version, + img_uuid, path, + self.verbose) + self.crashlog.images.append(darwin_image) + idx += 1 + + def parse_frames(self, thread, json_frames): + idx = 0 + for json_frame in json_frames: + image_id = int(json_frame[0]) + + ident = self.get_image_extra_info(image_id)['name'] + thread.add_ident(ident) + if ident not in self.crashlog.idents: + self.crashlog.idents.append(ident) + + frame_offset = int(json_frame[1]) + image = self.get_used_image(image_id) + image_addr = int(image[1]) + pc = image_addr + frame_offset + thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset)) + idx += 1 + + def parse_threads(self, json_threads): + idx = 0 + for json_thread in json_threads: + thread = self.crashlog.Thread(idx, False) + if json_thread.get('triggered', False): + self.crashlog.crashed_thread_idx = idx + thread.queue = json_thread.get('queue') + self.parse_frames(thread, json_thread.get('frames', [])) + self.crashlog.threads.append(thread) + idx += 1 + + def parse_thread_registers(self, json_thread_state): + idx = 0 + registers = dict() + for reg in json_thread_state.get('x', []): + key = str('x{}'.format(idx)) + value = int(reg) + registers[key] = value + idx += 1 + + for register in ['lr', 'cpsr', 'fp', 'sp', 'esr', 'pc']: + if register in json_thread_state: + registers[register] = int(json_thread_state[register]) + + return registers + + class CrashLogParseMode: NORMAL = 0 THREAD = 1 @@ -387,7 +511,7 @@ class CrashLogParseMode: INSTRS = 5 -class CrashLogParser: +class TextCrashLogParser: parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]') thread_state_regex = re.compile('^Thread ([0-9]+) crashed with') thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream') @@ -720,7 +844,7 @@ def interactive_crashlogs(debugger, options, args): crash_logs = list() for crash_log_file in crash_log_files: try: - crash_log = CrashLogParser(debugger, crash_log_file, options.verbose).parse() + crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose) except Exception as e: print(e) continue @@ -1055,8 +1179,7 @@ be disassembled and lookups can be performed using the addresses found in the cr interactive_crashlogs(debugger, options, args) else: for crash_log_file in args: - crash_log_parser = CrashLogParser(debugger, crash_log_file, options.verbose) - crash_log = crash_log_parser.parse() + crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose) SymbolicateCrashLog(crash_log, options) if __name__ == '__main__': # Create a new debugger instance diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/Assertion.check b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/Assertion.check new file mode 100644 index 000000000000..d92fb50784cf --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/Assertion.check @@ -0,0 +1 @@ +# CHECK-NOT: AssertionError diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash new file mode 100644 index 000000000000..27ffd9ec0015 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash @@ -0,0 +1,49 @@ +Process: a.out [21606] +Path: /private/tmp/a.out +Identifier: a.out +Version: 0 +Code Type: X86-64 (Native) +Parent Process: fish [88883] +User ID: 501 + +Date/Time: 2020-11-11 14:47:34.600 -0800 +OS Version: macOS 11.0.1 +Report Version: 12 +Bridge OS Version: redacted +Anonymous UUID: DCEF35CB-68D5-F524-FF13-060901F52EA8 + + +Time Awake Since Boot: 400000 seconds + +System Integrity Protection: enabled + +Crashed Thread: 0 Dispatch queue: com.apple.main-thread + +Exception Type: EXC_BAD_ACCESS (SIGSEGV) +Exception Codes: KERN_INVALID_ADDRESS at 0x0000000000000000 +Exception Note: EXC_CORPSE_NOTIFY + +Termination Signal: Segmentation fault: 11 +Termination Reason: Namespace SIGNAL, Code 0xb +Terminating Process: exc handler [21606] + +Thread 0 Crashed:: Dispatch queue: com.apple.main-thread +0 a.out @foo@ foo + 16 (test.c:3) +1 a.out @bar@ bar + 9 (test.c:6) +2 a.out @main@ main + 20 (test.c:8) +3 libdyld.dylib 0x0000000100000000 start + 1 + +Thread 0 crashed with X86 Thread State (64-bit): + rax: 0x0000000000000000 rbx: 0x0000000000000000 rcx: 0x00007ffee42d81d0 rdx: 0x00007ffee42d8080 + rdi: 0x0000000000000001 rsi: 0x00007ffee42d8070 rbp: 0x00007ffee42d8020 rsp: 0x00007ffee42d8020 + r8: 0x0000000000000000 r9: 0x0000000000000000 r10: 0x0000000000000000 r11: 0x0000000000000000 + r12: 0x0000000000000000 r13: 0x0000000000000000 r14: 0x0000000000000000 r15: 0x0000000000000000 + rip: 0x000000010b92af70 rfl: 0x0000000000010202 cr2: 0x0000000000000000 + +Logical CPU: 2 +Error Code: 0x00000006 (no mapping for user data write) +Trap Number: 14 + + +Binary Images: + 0x100000000 - 0x200000000 +a.out (0) <@UUID@> @EXEC@ diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.ips b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.ips new file mode 100644 index 000000000000..703acce05c65 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.ips @@ -0,0 +1,96 @@ +{"app_name":"a.out","timestamp":"2020-11-11 16:12:18.00 -0800","app_version":"","slice_uuid":"9b76648c-9b4e-33a9-a97e-10856e911631","build_version":"","platform":1,"share_with_app_devs":1,"is_first_party":1,"bug_type":"309","os_version":"macOS 11.0.1","incident_id":"598C4706-28B0-4D96-A2F9-AE6973BEC635","name":"a.out"} +{ + "uptime" : 180, + "procLaunch" : "2020-11-11 16:12:12.4375 -0800", + "procRole" : "Unspecified", + "exception" : { + "type" : "EXC_BAD_ACCESS", + "signal" : "SIGSEGV", + "subtype" : "KERN_INVALID_ADDRESS at 0x00000000" + }, + "userID" : 501, + "modelCode" : "iMacPro1,1", + "coalitionID" : 471, + "osVersion" : { + "train" : "macOS 11.0.1", + "build" : "", + "releaseType" : "" + }, + "captureTime" : "2020-11-11 16:12:12.6267 -0800", + "incident" : "598C4706-28B0-4D96-A2F9-AE6973BEC635", + "pid" : 2187, + "cpuType" : "X86-64", + "procName" : "a.out", + "procPath" : "\/private\/tmp\/a.out", + "parentProc" : "fish", + "parentPid" : 1651, + "coalitionName" : "io.alacritty", + "crashReporterKey" : "DCEF35CB-68D5-F524-FF13-060901F52EA8", + "responsiblePid" : 428, + "responsibleProc" : "alacritty", + "bridgeVersion" : {"build":"","train":""}, + "sip" : "enabled", + "is_corpse" : 1, + "termination" : {"reason":"Namespace SIGNAL, Code 0xb","signal":"Segmentation fault: 11","byProc":"exc handler","code":11,"namespace":"SIGNAL","byPid":2187,"flags":0}, + "asi" : ["dyld2 mode"], + "extMods" : {"caller":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"system":{"thread_create":0,"thread_set_state":0,"task_for_pid":2067},"targeted":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"warnings":0}, + "threads" : [{"triggered":true,"id":22172,"queue":"com.apple.main-thread","frames":[[0,16240],[0,16265],[0,16292],[1,87601]]}], + "threadState" : { + "r13" : 0, + "rax" : 0, + "rflags" : 66054, + "cpu" : 6, + "rsi" : 140732908048520, + "r14" : 0, + "trap_description" : "(no mapping for user data write)", + "r8" : 0, + "cr2" : 0, + "rdx" : 140732908048536, + "r10" : 0, + "r9" : 0, + "r15" : 0, + "rbx" : 0, + "trap" : 14, + "err" : 6, + "r11" : 0, + "rip" : 4307689328, + "rbp" : 140732908048432, + "rsp" : 140732908048432, + "r12" : 0, + "rcx" : 140732908048880, + "flavor" : "x86_THREAD_STATE", + "rdi" : 1 +}, + "usedImages" : [ + [ + "@UUID@", + 0, + "P" + ], + [ + "6a1f593e-3705-314d-bb40-e7f9d502bf81", + 140733737017344, + "P" + ] +], + "legacyInfo" : { + "imageExtraInfo" : [ + { + "size" : 16384, + "arch" : "x86_64", + "path" : "@EXEC@", + "name" : "@NAME@" + }, + { + "size" : 241664, + "arch" : "x86_64", + "path" : "\/usr\/lib\/system\/libdyld.dylib", + "name" : "libdyld.dylib" + } + ], + "threadTriggered" : { + "index" : 0, + "queue" : "com.apple.main-thread" + } +} +} diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/test.c b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/test.c new file mode 100644 index 000000000000..3f0819b4b6c1 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/test.c @@ -0,0 +1,8 @@ +void foo() { + int *i = 0; + *i = 1; +} + +void bar() { foo(); } + +int main(int argc, char **argv) { bar(); } diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test new file mode 100644 index 000000000000..c92be0b7f2d0 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test @@ -0,0 +1,10 @@ +# RUN: %clang_host -g %S/Inputs/test.c -o %t.out +# RUN: cp %S/Inputs/a.out.ips %t.crash +# RUN: python %S/patch-crashlog.py %t.out %t.crash +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s + +# CHECK: Thread[0] EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS at 0x00000000) +# CHECK: [ 0] {{.*}}out`foo + 16 at test.c +# CHECK: [ 1] {{.*}}out`bar + 8 at test.c +# CHECK: [ 2] {{.*}}out`main + 19 at test.c +# CHECK: [ 3] {{.*}}start diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_json.test new file mode 100644 index 000000000000..50da725547fb --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_json.test @@ -0,0 +1,45 @@ +# -*- python -*- +# RUN: cd %S/../../../../../examples/python && cat %s | %lldb 2>&1 > %t.out +# RUN: cat %t.out | FileCheck %S/Inputs/Assertion.check +script +import crashlog +import json + +parser = crashlog.JSONCrashLogParser("", "", False) + +process_info_json = json.loads('{"pid" : 287, "procName" : "mediaserverd", "procPath" : "\/usr\/sbin\/mediaserverd"}') +parser.parse_process_info(process_info_json) + +assert parser.crashlog.process_id == 287 +assert parser.crashlog.process_identifier == "mediaserverd" +assert parser.crashlog.process_path == "/usr/sbin/mediaserverd" + +crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV", "subtype" : "KERN_INVALID_ADDRESS"}') +assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS)" + +crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV"}') +assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV)" + +crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV", "codes" : "0x0000000000000000, 0x0000000000000000"}') +assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV) (0x0000000000000000, 0x0000000000000000)" + +thread_state_json = json.loads('{"x":[268451845,117442566],"lr":7309751904,"cpsr":1073741824,"fp":6093236784,"sp":6093236704,"esr":1442840704,"pc":7309755088}') +registers = parser.parse_thread_registers(thread_state_json) +assert registers['x0'] == 268451845 +assert registers['x1'] == 117442566 +assert registers['lr'] == 7309751904 +assert registers['cpsr'] ==1073741824 +assert registers['fp'] == 6093236784 +assert registers['sp'] == 6093236704 +assert registers['esr'] == 1442840704 +assert registers['pc'] == 7309755088 + +parser.data = json.loads('{"usedImages":[["f4d85377-f215-3da3-921e-3fe870e622e9",7309737984,"P"]],"legacyInfo":{"imageExtraInfo":[{"size":204800,"arch":"arm64e","path":"/usr/lib/system/libsystem_kernel.dylib","name":"libsystem_kernel.dylib"}]}}') +thread_json = json.loads('[{"triggered":true,"id":3835,"queue":"com.apple.bwgraph.devicevendor","frames":[[0,101472],[0,408892]]}]') +parser.parse_threads(thread_json) +assert len(parser.crashlog.threads) == 1 +assert parser.crashlog.threads[0].queue == "com.apple.bwgraph.devicevendor" +assert len(parser.crashlog.threads[0].frames) == 2 +assert parser.crashlog.threads[0].frames[0].pc == 7309839456 +assert parser.crashlog.threads[0].frames[0].description == 101472 +exit() diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test similarity index 98% rename from lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test rename to lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test index 6ac9392c87c1..7251d8541be1 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test @@ -4,7 +4,7 @@ # CHECK-LABEL: {{S}}KIP BEYOND CHECKS script import crashlog -crash_log_parser = crashlog.CrashLogParser +crash_log_parser = crashlog.TextCrashLogParser crash_log = crashlog.CrashLog images = [ "0x10b60b000 - 0x10f707fff com.apple.LLDB.framework (1.1000.11.38.2 - 1000.11.38.2) <96E36F5C-1A83-39A1-8713-5FDD9701C3F1> /Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/LLDB", diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/patch-crashlog.py b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/patch-crashlog.py new file mode 100644 index 000000000000..ba69547f6140 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/patch-crashlog.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +import json +import os +import re +import subprocess +import sys + + +class CrashLogPatcher: + + SYMBOL_REGEX = re.compile(r'^([0-9a-fA-F]+) T _(.*)$') + UUID_REGEX = re.compile(r'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*') + + def __init__(self, data, binary, offsets): + self.data = data + self.binary = binary + self.offsets = offsets + + def patch_executable(self): + self.data = self.data.replace("@EXEC@", self.binary) + self.data = self.data.replace("@NAME@", os.path.basename(self.binary)) + + def patch_uuid(self): + output = subprocess.check_output(['dwarfdump', '--uuid', self.binary]) + m = self.UUID_REGEX.match(output) + if m: + self.data = self.data.replace("@UUID@", m.group(1)) + + def patch_addresses(self): + if not self.offsets: + return + output = subprocess.check_output(['nm', self.binary]) + for line in output.splitlines(): + m = self.SYMBOL_REGEX.match(line) + if m: + address = m.group(1) + symbol = m.group(2) + if symbol in self.offsets: + patch_addr = int(m.group(1), 16) + int( + self.offsets[symbol]) + self.data = self.data.replace("@{}@".format(symbol), + str(hex(patch_addr))) + + +if __name__ == '__main__': + binary = sys.argv[1] + crashlog = sys.argv[2] + offsets = json.loads(sys.argv[3]) if len(sys.argv) > 3 else None + + with open(crashlog, 'r') as file: + data = file.read() + + p = CrashLogPatcher(data, binary, offsets) + p.patch_executable() + p.patch_uuid() + p.patch_addresses() + + with open(crashlog, 'w') as file: + file.write(p.data) diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test new file mode 100644 index 000000000000..137578494b0c --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test @@ -0,0 +1,10 @@ +# RUN: %clang_host -g %S/Inputs/test.c -o %t.out +# RUN: cp %S/Inputs/a.out.crash %t.crash +# RUN: python %S/patch-crashlog.py %t.out %t.crash '{"main":20, "bar":9, "foo":16}' +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s + +# CHECK: Thread[0] EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS at 0x0000000000000000) +# CHECK: [ 0] {{.*}}out`foo + 16 at test.c +# CHECK: [ 1] {{.*}}out`bar + 8 at test.c +# CHECK: [ 2] {{.*}}out`main + 19 at test.c +# CHECK: [ 3] {{.*}}start + 1