llvm-project/lldb/examples/python/bsd.py

482 lines
18 KiB
Python
Executable File

#!/usr/bin/python
import optparse
import os
import shlex
import struct
import sys
ARMAG = "!<arch>\n"
SARMAG = 8
ARFMAG = "`\n"
AR_EFMT1 = "#1/"
def memdump(src, bytes_per_line=16, address=0):
FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.'
for x in range(256)])
for i in range(0, len(src), bytes_per_line):
s = src[i:i+bytes_per_line]
hex_bytes = ' '.join(["%02x" % (ord(x)) for x in s])
ascii = s.translate(FILTER)
print("%#08.8x: %-*s %s" % (address+i, bytes_per_line*3, hex_bytes,
ascii))
class Object(object):
def __init__(self, file):
def read_str(file, str_len):
return file.read(str_len).rstrip('\0 ')
def read_int(file, str_len, base):
return int(read_str(file, str_len), base)
self.offset = file.tell()
self.file = file
self.name = read_str(file, 16)
self.date = read_int(file, 12, 10)
self.uid = read_int(file, 6, 10)
self.gid = read_int(file, 6, 10)
self.mode = read_int(file, 8, 8)
self.size = read_int(file, 10, 10)
if file.read(2) != ARFMAG:
raise ValueError('invalid BSD object at offset %#08.8x' % (
self.offset))
# If we have an extended name read it. Extended names start with
name_len = 0
if self.name.startswith(AR_EFMT1):
name_len = int(self.name[len(AR_EFMT1):], 10)
self.name = read_str(file, name_len)
self.obj_offset = file.tell()
self.obj_size = self.size - name_len
file.seek(self.obj_size, 1)
def dump(self, f=sys.stdout, flat=True):
if flat:
f.write('%#08.8x: %#08.8x %5u %5u %6o %#08.8x %s\n' % (self.offset,
self.date, self.uid, self.gid, self.mode, self.size,
self.name))
else:
f.write('%#08.8x: \n' % self.offset)
f.write(' name = "%s"\n' % self.name)
f.write(' date = %#08.8x\n' % self.date)
f.write(' uid = %i\n' % self.uid)
f.write(' gid = %i\n' % self.gid)
f.write(' mode = %o\n' % self.mode)
f.write(' size = %#08.8x\n' % (self.size))
self.file.seek(self.obj_offset, 0)
first_bytes = self.file.read(4)
f.write('bytes = ')
memdump(first_bytes)
def get_bytes(self):
saved_pos = self.file.tell()
self.file.seek(self.obj_offset, 0)
bytes = self.file.read(self.obj_size)
self.file.seek(saved_pos, 0)
return bytes
class StringTable(object):
def __init__(self, bytes):
self.bytes = bytes
def get_string(self, offset):
length = len(self.bytes)
if offset >= length:
return None
return self.bytes[offset:self.bytes.find('\0', offset)]
class Archive(object):
def __init__(self, path):
self.path = path
self.file = open(path, 'r')
self.objects = []
self.offset_to_object = {}
if self.file.read(SARMAG) != ARMAG:
print("error: file isn't a BSD archive")
while True:
try:
self.objects.append(Object(self.file))
except ValueError:
break
def get_object_at_offset(self, offset):
if offset in self.offset_to_object:
return self.offset_to_object[offset]
for obj in self.objects:
if obj.offset == offset:
self.offset_to_object[offset] = obj
return obj
return None
def find(self, name, mtime=None, f=sys.stdout):
'''
Find an object(s) by name with optional modification time. There
can be multple objects with the same name inside and possibly with
the same modification time within a BSD archive so clients must be
prepared to get multiple results.
'''
matches = []
for obj in self.objects:
if obj.name == name and (mtime is None or mtime == obj.date):
matches.append(obj)
return matches
@classmethod
def dump_header(self, f=sys.stdout):
f.write(' DATE UID GID MODE SIZE NAME\n')
f.write(' ---------- ----- ----- ------ ---------- '
'--------------\n')
def get_symdef(self):
def get_uint32(file):
'''Extract a uint32_t from the current file position.'''
v, = struct.unpack('=I', file.read(4))
return v
for obj in self.objects:
symdef = []
if obj.name.startswith("__.SYMDEF"):
self.file.seek(obj.obj_offset, 0)
ranlib_byte_size = get_uint32(self.file)
num_ranlib_structs = ranlib_byte_size/8
str_offset_pairs = []
for _ in range(num_ranlib_structs):
strx = get_uint32(self.file)
offset = get_uint32(self.file)
str_offset_pairs.append((strx, offset))
strtab_len = get_uint32(self.file)
strtab = StringTable(self.file.read(strtab_len))
for s in str_offset_pairs:
symdef.append((strtab.get_string(s[0]), s[1]))
return symdef
def get_object_dicts(self):
'''
Returns an array of object dictionaries that contain they following
keys:
'object': the actual bsd.Object instance
'symdefs': an array of symbol names that the object contains
as found in the "__.SYMDEF" item in the archive
'''
symdefs = self.get_symdef()
symdef_dict = {}
if symdefs:
for (name, offset) in symdefs:
if offset in symdef_dict:
object_dict = symdef_dict[offset]
else:
object_dict = {
'object': self.get_object_at_offset(offset),
'symdefs': []
}
symdef_dict[offset] = object_dict
object_dict['symdefs'].append(name)
object_dicts = []
for offset in sorted(symdef_dict):
object_dicts.append(symdef_dict[offset])
return object_dicts
def dump(self, f=sys.stdout, flat=True):
f.write('%s:\n' % self.path)
if flat:
self.dump_header(f=f)
for obj in self.objects:
obj.dump(f=f, flat=flat)
def main():
parser = optparse.OptionParser(
prog='bsd',
description='Utility for BSD archives')
parser.add_option(
'--object',
type='string',
dest='object_name',
default=None,
help=('Specify the name of a object within the BSD archive to get '
'information on'))
parser.add_option(
'-s', '--symbol',
type='string',
dest='find_symbol',
default=None,
help=('Specify the name of a symbol within the BSD archive to get '
'information on from SYMDEF'))
parser.add_option(
'--symdef',
action='store_true',
dest='symdef',
default=False,
help=('Dump the information in the SYMDEF.'))
parser.add_option(
'-v', '--verbose',
action='store_true',
dest='verbose',
default=False,
help='Enable verbose output')
parser.add_option(
'-e', '--extract',
action='store_true',
dest='extract',
default=False,
help=('Specify this to extract the object specified with the --object '
'option. There must be only one object with a matching name or '
'the --mtime option must be specified to uniquely identify a '
'single object.'))
parser.add_option(
'-m', '--mtime',
type='int',
dest='mtime',
default=None,
help=('Specify the modification time of the object an object. This '
'option is used with either the --object or --extract options.'))
parser.add_option(
'-o', '--outfile',
type='string',
dest='outfile',
default=None,
help=('Specify a different name or path for the file to extract when '
'using the --extract option. If this option isn\'t specified, '
'then the extracted object file will be extracted into the '
'current working directory if a file doesn\'t already exist '
'with that name.'))
(options, args) = parser.parse_args(sys.argv[1:])
for path in args:
archive = Archive(path)
if options.object_name:
print('%s:\n' % (path))
matches = archive.find(options.object_name, options.mtime)
if matches:
dump_all = True
if options.extract:
if len(matches) == 1:
dump_all = False
if options.outfile is None:
outfile_path = matches[0].name
else:
outfile_path = options.outfile
if os.path.exists(outfile_path):
print('error: outfile "%s" already exists' % (
outfile_path))
else:
print('Saving file to "%s"...' % (outfile_path))
with open(outfile_path, 'w') as outfile:
outfile.write(matches[0].get_bytes())
else:
print('error: multiple objects match "%s". Specify '
'the modification time using --mtime.' % (
options.object_name))
if dump_all:
for obj in matches:
obj.dump(flat=False)
else:
print('error: object "%s" not found in archive' % (
options.object_name))
elif options.find_symbol:
symdefs = archive.get_symdef()
if symdefs:
success = False
for (name, offset) in symdefs:
obj = archive.get_object_at_offset(offset)
if name == options.find_symbol:
print('Found "%s" in:' % (options.find_symbol))
obj.dump(flat=False)
success = True
if not success:
print('Didn\'t find "%s" in any objects' % (
options.find_symbol))
else:
print("error: no __.SYMDEF was found")
elif options.symdef:
object_dicts = archive.get_object_dicts()
for object_dict in object_dicts:
object_dict['object'].dump(flat=False)
print("symbols:")
for name in object_dict['symdefs']:
print(" %s" % (name))
else:
archive.dump(flat=not options.verbose)
if __name__ == '__main__':
main()
def print_mtime_error(result, dmap_mtime, actual_mtime):
print >>result, ("error: modification time in debug map (%#08.8x) doesn't "
"match the .o file modification time (%#08.8x)" % (
dmap_mtime, actual_mtime))
def print_file_missing_error(result, path):
print >>result, "error: file \"%s\" doesn't exist" % (path)
def print_multiple_object_matches(result, object_name, mtime, matches):
print >>result, ("error: multiple matches for object '%s' with with "
"modification time %#08.8x:" % (object_name, mtime))
Archive.dump_header(f=result)
for match in matches:
match.dump(f=result, flat=True)
def print_archive_object_error(result, object_name, mtime, archive):
matches = archive.find(object_name, f=result)
if len(matches) > 0:
print >>result, ("error: no objects have a modification time that "
"matches %#08.8x for '%s'. Potential matches:" % (
mtime, object_name))
Archive.dump_header(f=result)
for match in matches:
match.dump(f=result, flat=True)
else:
print >>result, "error: no object named \"%s\" found in archive:" % (
object_name)
Archive.dump_header(f=result)
for match in archive.objects:
match.dump(f=result, flat=True)
# archive.dump(f=result, flat=True)
class VerifyDebugMapCommand:
name = "verify-debug-map-objects"
def create_options(self):
usage = "usage: %prog [options]"
description = '''This command reports any .o files that are missing
or whose modification times don't match in the debug map of an executable.'''
self.parser = optparse.OptionParser(
description=description,
prog=self.name,
usage=usage,
add_help_option=False)
self.parser.add_option(
'-e', '--errors',
action='store_true',
dest='errors',
default=False,
help="Only show errors")
def get_short_help(self):
return "Verify debug map object files."
def get_long_help(self):
return self.help_string
def __init__(self, debugger, unused):
self.create_options()
self.help_string = self.parser.format_help()
def __call__(self, debugger, command, exe_ctx, result):
import lldb
# Use the Shell Lexer to properly parse up command options just like a
# shell would
command_args = shlex.split(command)
try:
(options, args) = self.parser.parse_args(command_args)
except:
result.SetError("option parsing failed")
return
# Always get program state from the SBExecutionContext passed in
target = exe_ctx.GetTarget()
if not target.IsValid():
result.SetError("invalid target")
return
archives = {}
for module_spec in args:
module = target.module[module_spec]
if not (module and module.IsValid()):
result.SetError('error: invalid module specification: "%s". '
'Specify the full path, basename, or UUID of '
'a module ' % (module_spec))
return
num_symbols = module.GetNumSymbols()
num_errors = 0
for i in range(num_symbols):
symbol = module.GetSymbolAtIndex(i)
if symbol.GetType() != lldb.eSymbolTypeObjectFile:
continue
path = symbol.GetName()
if not path:
continue
# Extract the value of the symbol by dumping the
# symbol. The value is the mod time.
dmap_mtime = int(str(symbol).split('value = ')
[1].split(',')[0], 16)
if not options.errors:
print >>result, '%s' % (path)
if os.path.exists(path):
actual_mtime = int(os.stat(path).st_mtime)
if dmap_mtime != actual_mtime:
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_mtime_error(result, dmap_mtime,
actual_mtime)
elif path[-1] == ')':
(archive_path, object_name) = path[0:-1].split('(')
if not archive_path and not object_name:
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_file_missing_error(path)
continue
if not os.path.exists(archive_path):
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_file_missing_error(archive_path)
continue
if archive_path in archives:
archive = archives[archive_path]
else:
archive = Archive(archive_path)
archives[archive_path] = archive
matches = archive.find(object_name, dmap_mtime)
num_matches = len(matches)
if num_matches == 1:
print >>result, '1 match'
obj = matches[0]
if obj.date != dmap_mtime:
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_mtime_error(result, dmap_mtime, obj.date)
elif num_matches == 0:
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_archive_object_error(result, object_name,
dmap_mtime, archive)
elif num_matches > 1:
num_errors += 1
if options.errors:
print >>result, '%s' % (path),
print_multiple_object_matches(result,
object_name,
dmap_mtime, matches)
if num_errors > 0:
print >>result, "%u errors found" % (num_errors)
else:
print >>result, "No errors detected in debug map"
def __lldb_init_module(debugger, dict):
# This initializer is being run from LLDB in the embedded command
# interpreter.
# Add any commands contained in this module to LLDB
debugger.HandleCommand(
'command script add -c %s.VerifyDebugMapCommand %s' % (
__name__, VerifyDebugMapCommand.name))
print('The "%s" command has been installed, type "help %s" for detailed '
'help.' % (VerifyDebugMapCommand.name, VerifyDebugMapCommand.name))