llvm-project/lldb/examples/python/symbolication.py

635 lines
26 KiB
Python
Raw Normal View History

#!/usr/bin/python
#----------------------------------------------------------------------
# Be sure to add the python path that points to the LLDB shared library.
#
# To use this in the embedded python interpreter using "lldb":
#
# cd /path/containing/crashlog.py
# lldb
# (lldb) script import crashlog
# "crashlog" command installed, type "crashlog --help" for detailed help
# (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
#
# The benefit of running the crashlog command inside lldb in the
# embedded python interpreter is when the command completes, there
# will be a target with all of the files loaded at the locations
# described in the crash log. Only the files that have stack frames
# in the backtrace will be loaded unless the "--load-all" option
# has been specified. This allows users to explore the program in the
# state it was in right at crash time.
#
# On MacOSX csh, tcsh:
# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
#
# On MacOSX sh, bash:
# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
#----------------------------------------------------------------------
import lldb
import commands
import optparse
import os
import plistlib
import re
import shlex
import sys
import time
import uuid
class Address:
"""Class that represents an address that will be symbolicated"""
def __init__(self, target, load_addr):
self.target = target
self.load_addr = load_addr # The load address that this object represents
self.so_addr = None # the resolved lldb.SBAddress (if any), named so_addr for section/offset address
self.sym_ctx = None # The cached symbol context for this address
self.description = None # Any original textual description of this address to be used as a backup in case symbolication fails
self.symbolication = None # The cached symbolicated string that describes this address
self.inlined = False
def __str__(self):
s = "%#16.16x" % (self.load_addr)
if self.symbolication:
s += " %s" % (self.symbolication)
elif self.description:
s += " %s" % (self.description)
elif self.so_addr:
s += " %s" % (self.so_addr)
return s
def resolve_addr(self):
if self.so_addr == None:
self.so_addr = self.target.ResolveLoadAddress (self.load_addr)
return self.so_addr
def is_inlined(self):
return self.inlined
def get_symbol_context(self):
if self.sym_ctx == None:
sb_addr = self.resolve_addr()
if sb_addr:
self.sym_ctx = self.target.ResolveSymbolContextForAddress (sb_addr, lldb.eSymbolContextEverything)
else:
self.sym_ctx = lldb.SBSymbolContext()
return self.sym_ctx
def get_instructions(self):
sym_ctx = self.get_symbol_context()
if sym_ctx:
function = sym_ctx.GetFunction()
if function:
return function.GetInstructions(self.target)
return sym_ctx.GetSymbol().GetInstructions(self.target)
return None
def symbolicate(self, verbose = False):
if self.symbolication == None:
self.symbolication = ''
self.inlined = False
sym_ctx = self.get_symbol_context()
if sym_ctx:
module = sym_ctx.GetModule()
if module:
# Print full source file path in verbose mode
if verbose:
self.symbolication += str(module.GetFileSpec()) + '`'
else:
self.symbolication += module.GetFileSpec().GetFilename() + '`'
function_start_load_addr = -1
function = sym_ctx.GetFunction()
block = sym_ctx.GetBlock()
line_entry = sym_ctx.GetLineEntry()
symbol = sym_ctx.GetSymbol()
inlined_block = block.GetContainingInlinedBlock();
if function:
self.symbolication += function.GetName()
if inlined_block:
self.inlined = True
self.symbolication += ' [inlined] ' + inlined_block.GetInlinedName();
block_range_idx = inlined_block.GetRangeIndexForBlockAddress (self.so_addr)
if block_range_idx < lldb.UINT32_MAX:
block_range_start_addr = inlined_block.GetRangeStartAddress (block_range_idx)
function_start_load_addr = block_range_start_addr.GetLoadAddress (self.target)
if function_start_load_addr == -1:
function_start_load_addr = function.GetStartAddress().GetLoadAddress (self.target)
elif symbol:
self.symbolication += symbol.GetName()
function_start_load_addr = symbol.GetStartAddress().GetLoadAddress (self.target)
else:
self.symbolication = ''
return False
# Dump the offset from the current function or symbol if it is non zero
function_offset = self.load_addr - function_start_load_addr
if function_offset > 0:
self.symbolication += " + %u" % (function_offset)
elif function_offset < 0:
self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset
# Print out any line information if any is available
if line_entry.GetFileSpec():
# Print full source file path in verbose mode
if verbose:
self.symbolication += ' at %s' % line_entry.GetFileSpec()
else:
self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename()
self.symbolication += ':%u' % line_entry.GetLine ()
column = line_entry.GetColumn()
if column > 0:
self.symbolication += ':%u' % column
return True
return False
class Section:
"""Class that represents an load address range"""
sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)')
addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$')
range_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$')
def __init__(self, start_addr = None, end_addr = None, name = None):
self.start_addr = start_addr
self.end_addr = end_addr
self.name = name
@classmethod
def InitWithSBTargetAndSBSection(cls, target, section):
sect_load_addr = section.GetLoadAddress(target)
if sect_load_addr != lldb.LLDB_INVALID_ADDRESS:
obj = cls(sect_load_addr, sect_load_addr + section.size, section.name)
return obj
else:
return None
def contains(self, addr):
return self.start_addr <= addr and addr < self.end_addr;
def set_from_string(self, s):
match = self.sect_info_regex.match (s)
if match:
self.name = match.group('name')
range_str = match.group('range')
addr_match = self.addr_regex.match(range_str)
if addr_match:
self.start_addr = int(addr_match.group('start'), 16)
self.end_addr = None
return True
range_match = self.range_regex.match(range_str)
if range_match:
self.start_addr = int(range_match.group('start'), 16)
self.end_addr = int(range_match.group('end'), 16)
op = range_match.group('op')
if op == '+':
self.end_addr += self.start_addr
return True
print 'error: invalid section info string "%s"' % s
print 'Valid section info formats are:'
print 'Format Example Description'
print '--------------------- -----------------------------------------------'
print '<name>=<base> __TEXT=0x123000 Section from base address only'
print '<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address'
print '<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size'
return False
def __str__(self):
if self.name:
if self.end_addr != None:
if self.start_addr != None:
return "%s=[0x%16.16x - 0x%16.16x)" % (self.name, self.start_addr, self.end_addr)
else:
if self.start_addr != None:
return "%s=0x%16.16x" % (self.name, self.start_addr)
return self.name
return "<invalid>"
class Image:
"""A class that represents an executable image and any associated data"""
def __init__(self, path, uuid = None):
self.path = path
self.resolved_path = None
self.resolved = False
self.unavailable = False
self.uuid = uuid
self.section_infos = list()
self.identifier = None
self.version = None
self.arch = None
self.module = None
self.symfile = None
self.slide = None
@classmethod
def InitWithSBTargetAndSBModule(cls, target, module):
'''Initalize this Image object with a module from a target.'''
obj = cls(module.file.fullpath, module.uuid)
obj.resolved_path = module.platform_file.fullpath
obj.resolved = True
obj.arch = module.triple
for section in module.sections:
symb_section = Section.InitWithSBTargetAndSBSection(target, section)
if symb_section:
obj.section_infos.append (symb_section)
obj.arch = module.triple
obj.module = module
obj.symfile = None
obj.slide = None
return obj
def dump(self, prefix):
print "%s%s" % (prefix, self)
<rdar://problem/11757916> Make breakpoint setting by file and line much more efficient by only looking for inlined breakpoint locations if we are setting a breakpoint in anything but a source implementation file. Implementing this complex for a many reasons. Turns out that parsing compile units lazily had some issues with respect to how we need to do things with DWARF in .o files. So the fixes in the checkin for this makes these changes: - Add a new setting called "target.inline-breakpoint-strategy" which can be set to "never", "always", or "headers". "never" will never try and set any inlined breakpoints (fastest). "always" always looks for inlined breakpoint locations (slowest, but most accurate). "headers", which is the default setting, will only look for inlined breakpoint locations if the breakpoint is set in what are consudered to be header files, which is realy defined as "not in an implementation source file". - modify the breakpoint setting by file and line to check the current "target.inline-breakpoint-strategy" setting and act accordingly - Modify compile units to be able to get their language and other info lazily. This allows us to create compile units from the debug map and not have to fill all of the details in, and then lazily discover this information as we go on debuggging. This is needed to avoid parsing all .o files when setting breakpoints in implementation only files (no inlines). Otherwise we would need to parse the .o file, the object file (mach-o in our case) and the symbol file (DWARF in the object file) just to see what the compile unit was. - modify the "SymbolFileDWARFDebugMap" to subclass lldb_private::Module so that the virtual "GetObjectFile()" and "GetSymbolVendor()" functions can be intercepted when the .o file contenst are later lazilly needed. Prior to this fix, when we first instantiated the "SymbolFileDWARFDebugMap" class, we would also make modules, object files and symbol files for every .o file in the debug map because we needed to fix up the sections in the .o files with information that is in the executable debug map. Now we lazily do this in the DebugMapModule::GetObjectFile() Cleaned up header includes a bit as well. llvm-svn: 162860
2012-08-30 05:13:06 +08:00
def debug_dump(self):
print 'path = "%s"' % (self.path)
print 'resolved_path = "%s"' % (self.resolved_path)
print 'resolved = %i' % (self.resolved)
print 'unavailable = %i' % (self.unavailable)
print 'uuid = %s' % (self.uuid)
print 'section_infos = %s' % (self.section_infos)
print 'identifier = "%s"' % (self.identifier)
print 'version = %s' % (self.version)
print 'arch = %s' % (self.arch)
print 'module = %s' % (self.module)
print 'symfile = "%s"' % (self.symfile)
print 'slide = %i (0x%x)' % (self.slide, self.slide)
def __str__(self):
s = ''
if self.uuid:
s += "%s " % (self.get_uuid())
if self.arch:
s += "%s " % (self.arch)
if self.version:
s += "%s " % (self.version)
resolved_path = self.get_resolved_path()
if resolved_path:
s += "%s " % (resolved_path)
for section_info in self.section_infos:
s += ", %s" % (section_info)
if self.slide != None:
s += ', slide = 0x%16.16x' % self.slide
return s
def add_section(self, section):
#print "added '%s' to '%s'" % (section, self.path)
self.section_infos.append (section)
def get_section_containing_load_addr (self, load_addr):
for section_info in self.section_infos:
if section_info.contains(load_addr):
return section_info
return None
def get_resolved_path(self):
if self.resolved_path:
return self.resolved_path
elif self.path:
return self.path
return None
def get_resolved_path_basename(self):
path = self.get_resolved_path()
if path:
return os.path.basename(path)
return None
def symfile_basename(self):
if self.symfile:
return os.path.basename(self.symfile)
return None
def has_section_load_info(self):
return self.section_infos or self.slide != None
def load_module(self, target):
if self.unavailable:
return None # We already warned that we couldn't find this module, so don't return an error string
# Load this module into "target" using the section infos to
# set the section load addresses
if self.has_section_load_info():
if target:
if self.module:
if self.section_infos:
num_sections_loaded = 0
for section_info in self.section_infos:
if section_info.name:
section = self.module.FindSection (section_info.name)
if section:
error = target.SetSectionLoadAddress (section, section_info.start_addr)
if error.Success():
num_sections_loaded += 1
else:
return 'error: %s' % error.GetCString()
else:
return 'error: unable to find the section named "%s"' % section_info.name
else:
return 'error: unable to find "%s" section in "%s"' % (range.name, self.get_resolved_path())
if num_sections_loaded == 0:
return 'error: no sections were successfully loaded'
else:
err = target.SetModuleLoadAddress(self.module, self.slide)
if err.Fail():
return err.GetCString()
return None
else:
return 'error: invalid module'
else:
return 'error: invalid target'
else:
return 'error: no section infos'
def add_module(self, target):
'''Add the Image described in this object to "target" and load the sections if "load" is True.'''
if target:
# Try and find using UUID only first so that paths need not match up
uuid_str = self.get_normalized_uuid_string()
if uuid_str:
self.module = target.AddModule (None, None, uuid_str)
if not self.module:
self.locate_module_and_debug_symbols ()
if self.unavailable:
return None
resolved_path = self.get_resolved_path()
self.module = target.AddModule (resolved_path, self.arch, uuid_str, self.symfile)
if not self.module:
return 'error: unable to get module for (%s) "%s"' % (self.arch, self.get_resolved_path())
if self.has_section_load_info():
return self.load_module(target)
else:
return None # No sections, the module was added to the target, so success
else:
return 'error: invalid target'
def locate_module_and_debug_symbols (self):
# By default, just use the paths that were supplied in:
# self.path
# self.resolved_path
# self.module
# self.symfile
# Subclasses can inherit from this class and override this function
self.resolved = True
return True
def get_uuid(self):
if not self.uuid and self.module:
self.uuid = uuid.UUID(self.module.GetUUIDString())
return self.uuid
def get_normalized_uuid_string(self):
if self.uuid:
return str(self.uuid).upper()
return None
def create_target(self):
'''Create a target using the information in this Image object.'''
if self.unavailable:
return None
if self.locate_module_and_debug_symbols ():
resolved_path = self.get_resolved_path();
path_spec = lldb.SBFileSpec (resolved_path)
#result.PutCString ('plist[%s] = %s' % (uuid, self.plist))
error = lldb.SBError()
target = lldb.debugger.CreateTarget (resolved_path, self.arch, None, False, error);
if target:
self.module = target.FindModule(path_spec)
if self.has_section_load_info():
err = self.load_module(target)
if err:
print 'ERROR: ', err
return target
else:
print 'error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)
else:
print 'error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)
return None
class Symbolicator:
def __init__(self):
"""A class the represents the information needed to symbolicate addresses in a program"""
self.target = None
self.images = list() # a list of images to be used when symbolicating
self.addr_mask = 0xffffffffffffffff
@classmethod
def InitWithSBTarget(cls, target):
obj = cls()
obj.target = target
obj.images = list();
triple = target.triple
if triple:
arch = triple.split('-')[0]
if "arm" in arch:
obj.addr_mask = 0xfffffffffffffffe
for module in target.modules:
image = Image.InitWithSBTargetAndSBModule(target, module)
obj.images.append(image)
return obj
def __str__(self):
s = "Symbolicator:\n"
if self.target:
s += "Target = '%s'\n" % (self.target)
s += "Target modules:\n"
for m in self.target.modules:
s += str(m) + "\n"
s += "Images:\n"
for image in self.images:
s += ' %s\n' % (image)
return s
def find_images_with_identifier(self, identifier):
images = list()
for image in self.images:
if image.identifier == identifier:
images.append(image)
return images
def find_image_containing_load_addr(self, load_addr):
for image in self.images:
if image.get_section_containing_load_addr (load_addr):
return image
return None
def create_target(self):
if self.target:
return self.target
if self.images:
for image in self.images:
self.target = image.create_target ()
if self.target:
if self.target.GetAddressByteSize() == 4:
triple = self.target.triple
if triple:
arch = triple.split('-')[0]
if "arm" in arch:
self.addr_mask = 0xfffffffffffffffe
return self.target
return None
def symbolicate(self, load_addr, verbose = False):
if not self.target:
self.create_target()
if self.target:
live_process = False
process = self.target.process
if process:
state = process.state
if state > lldb.eStateUnloaded and state < lldb.eStateDetached:
live_process = True
# If we don't have a live process, we can attempt to find the image
# that a load address belongs to and lazily load its module in the
# target, but we shouldn't do any of this if we have a live process
if not live_process:
image = self.find_image_containing_load_addr (load_addr)
if image:
image.add_module (self.target)
symbolicated_address = Address(self.target, load_addr)
if symbolicated_address.symbolicate (verbose):
if symbolicated_address.so_addr:
symbolicated_addresses = list()
symbolicated_addresses.append(symbolicated_address)
# See if we were able to reconstruct anything?
while 1:
inlined_parent_so_addr = lldb.SBAddress()
inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope (symbolicated_address.so_addr, inlined_parent_so_addr)
if not inlined_parent_sym_ctx:
break
if not inlined_parent_so_addr:
break
symbolicated_address = Address(self.target, inlined_parent_so_addr.GetLoadAddress(self.target))
symbolicated_address.sym_ctx = inlined_parent_sym_ctx
symbolicated_address.so_addr = inlined_parent_so_addr
symbolicated_address.symbolicate (verbose)
# push the new frame onto the new frame stack
symbolicated_addresses.append (symbolicated_address)
if symbolicated_addresses:
return symbolicated_addresses
else:
print 'error: no target in Symbolicator'
return None
def disassemble_instructions (target, instructions, pc, insts_before_pc, insts_after_pc, non_zeroeth_frame):
lines = list()
pc_index = -1
comment_column = 50
for inst_idx, inst in enumerate(instructions):
inst_pc = inst.GetAddress().GetLoadAddress(target);
if pc == inst_pc:
pc_index = inst_idx
mnemonic = inst.GetMnemonic (target)
operands = inst.GetOperands (target)
comment = inst.GetComment (target)
#data = inst.GetData (target)
lines.append ("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands))
if comment:
line_len = len(lines[-1])
if line_len < comment_column:
lines[-1] += ' ' * (comment_column - line_len)
lines[-1] += "; %s" % comment
if pc_index >= 0:
# If we are disassembling the non-zeroeth frame, we need to backup the PC by 1
if non_zeroeth_frame and pc_index > 0:
pc_index = pc_index - 1
if insts_before_pc == -1:
start_idx = 0
else:
start_idx = pc_index - insts_before_pc
if start_idx < 0:
start_idx = 0
if insts_before_pc == -1:
end_idx = inst_idx
else:
end_idx = pc_index + insts_after_pc
if end_idx > inst_idx:
end_idx = inst_idx
for i in range(start_idx, end_idx+1):
if i == pc_index:
print ' -> ', lines[i]
else:
print ' ', lines[i]
def print_module_section_data (section):
print section
section_data = section.GetSectionData()
if section_data:
ostream = lldb.SBStream()
section_data.GetDescription (ostream, section.GetFileAddress())
print ostream.GetData()
def print_module_section (section, depth):
print section
if depth > 0:
num_sub_sections = section.GetNumSubSections()
for sect_idx in range(num_sub_sections):
print_module_section (section.GetSubSectionAtIndex(sect_idx), depth - 1)
def print_module_sections (module, depth):
for sect in module.section_iter():
print_module_section (sect, depth)
def print_module_symbols (module):
for sym in module:
print sym
def Symbolicate(command_args):
usage = "usage: %prog [options] <addr1> [addr2 ...]"
description='''Symbolicate one or more addresses using LLDB's python scripting API..'''
parser = optparse.OptionParser(description=description, prog='crashlog.py',usage=usage)
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
parser.add_option('-p', '--platform', type='string', metavar='platform', dest='platform', help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".')
parser.add_option('-f', '--file', type='string', metavar='file', dest='file', help='Specify a file to use when symbolicating')
parser.add_option('-a', '--arch', type='string', metavar='arch', dest='arch', help='Specify a architecture to use when symbolicating')
parser.add_option('-s', '--slide', type='int', metavar='slide', dest='slide', help='Specify the slide to use on the file specified with the --file option', default=None)
parser.add_option('--section', type='string', action='append', dest='section_strings', help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>')
try:
(options, args) = parser.parse_args(command_args)
except:
return
symbolicator = Symbolicator()
images = list();
if options.file:
image = Image(options.file);
image.arch = options.arch
# Add any sections that were specified with one or more --section options
if options.section_strings:
for section_str in options.section_strings:
section = Section()
if section.set_from_string (section_str):
image.add_section (section)
else:
sys.exit(1)
if options.slide != None:
image.slide = options.slide
symbolicator.images.append(image)
target = symbolicator.create_target()
if options.verbose:
print symbolicator
if target:
for addr_str in args:
addr = int(addr_str, 0)
symbolicated_addrs = symbolicator.symbolicate(addr, options.verbose)
for symbolicated_addr in symbolicated_addrs:
print symbolicated_addr
print
else:
print 'error: no target for %s' % (symbolicator)
if __name__ == '__main__':
# Create a new debugger instance
lldb.debugger = lldb.SBDebugger.Create()
Symbolicate (sys.argv[1:])