2013-09-05 01:31:40 +08:00
#!/usr/bin/python
import cmd
import dict_utils
import file_extract
import optparse
import re
import struct
import string
import StringIO
import sys
import uuid
# Mach header "magic" constants
MH_MAGIC = 0xfeedface
MH_CIGAM = 0xcefaedfe
MH_MAGIC_64 = 0xfeedfacf
MH_CIGAM_64 = 0xcffaedfe
FAT_MAGIC = 0xcafebabe
FAT_CIGAM = 0xbebafeca
# Mach haeder "filetype" constants
MH_OBJECT = 0x00000001
MH_EXECUTE = 0x00000002
MH_FVMLIB = 0x00000003
MH_CORE = 0x00000004
MH_PRELOAD = 0x00000005
MH_DYLIB = 0x00000006
MH_DYLINKER = 0x00000007
MH_BUNDLE = 0x00000008
MH_DYLIB_STUB = 0x00000009
MH_DSYM = 0x0000000a
MH_KEXT_BUNDLE = 0x0000000b
# Mach haeder "flag" constant bits
MH_NOUNDEFS = 0x00000001
MH_INCRLINK = 0x00000002
MH_DYLDLINK = 0x00000004
MH_BINDATLOAD = 0x00000008
MH_PREBOUND = 0x00000010
MH_SPLIT_SEGS = 0x00000020
MH_LAZY_INIT = 0x00000040
MH_TWOLEVEL = 0x00000080
MH_FORCE_FLAT = 0x00000100
MH_NOMULTIDEFS = 0x00000200
MH_NOFIXPREBINDING = 0x00000400
MH_PREBINDABLE = 0x00000800
MH_ALLMODSBOUND = 0x00001000
MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000
MH_CANONICAL = 0x00004000
MH_WEAK_DEFINES = 0x00008000
MH_BINDS_TO_WEAK = 0x00010000
MH_ALLOW_STACK_EXECUTION = 0x00020000
MH_ROOT_SAFE = 0x00040000
MH_SETUID_SAFE = 0x00080000
MH_NO_REEXPORTED_DYLIBS = 0x00100000
MH_PIE = 0x00200000
MH_DEAD_STRIPPABLE_DYLIB = 0x00400000
MH_HAS_TLV_DESCRIPTORS = 0x00800000
MH_NO_HEAP_EXECUTION = 0x01000000
# Mach load command constants
LC_REQ_DYLD = 0x80000000
LC_SEGMENT = 0x00000001
LC_SYMTAB = 0x00000002
LC_SYMSEG = 0x00000003
LC_THREAD = 0x00000004
LC_UNIXTHREAD = 0x00000005
LC_LOADFVMLIB = 0x00000006
LC_IDFVMLIB = 0x00000007
LC_IDENT = 0x00000008
LC_FVMFILE = 0x00000009
LC_PREPAGE = 0x0000000a
LC_DYSYMTAB = 0x0000000b
LC_LOAD_DYLIB = 0x0000000c
LC_ID_DYLIB = 0x0000000d
LC_LOAD_DYLINKER = 0x0000000e
LC_ID_DYLINKER = 0x0000000f
LC_PREBOUND_DYLIB = 0x00000010
LC_ROUTINES = 0x00000011
LC_SUB_FRAMEWORK = 0x00000012
LC_SUB_UMBRELLA = 0x00000013
LC_SUB_CLIENT = 0x00000014
LC_SUB_LIBRARY = 0x00000015
LC_TWOLEVEL_HINTS = 0x00000016
LC_PREBIND_CKSUM = 0x00000017
LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD
LC_SEGMENT_64 = 0x00000019
LC_ROUTINES_64 = 0x0000001a
LC_UUID = 0x0000001b
LC_RPATH = 0x0000001c | LC_REQ_DYLD
LC_CODE_SIGNATURE = 0x0000001d
LC_SEGMENT_SPLIT_INFO = 0x0000001e
LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD
LC_LAZY_LOAD_DYLIB = 0x00000020
LC_ENCRYPTION_INFO = 0x00000021
LC_DYLD_INFO = 0x00000022
LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD
LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD
LC_VERSION_MIN_MACOSX = 0x00000024
LC_VERSION_MIN_IPHONEOS = 0x00000025
LC_FUNCTION_STARTS = 0x00000026
LC_DYLD_ENVIRONMENT = 0x00000027
# Mach CPU constants
CPU_ARCH_MASK = 0xff000000
CPU_ARCH_ABI64 = 0x01000000
CPU_TYPE_ANY = 0xffffffff
CPU_TYPE_VAX = 1
CPU_TYPE_MC680x0 = 6
CPU_TYPE_I386 = 7
CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64
CPU_TYPE_MIPS = 8
CPU_TYPE_MC98000 = 10
CPU_TYPE_HPPA = 11
CPU_TYPE_ARM = 12
CPU_TYPE_MC88000 = 13
CPU_TYPE_SPARC = 14
CPU_TYPE_I860 = 15
CPU_TYPE_ALPHA = 16
CPU_TYPE_POWERPC = 18
CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
# VM protection constants
VM_PROT_READ = 1
VM_PROT_WRITE = 2
VM_PROT_EXECUTE = 4
# VM protection constants
N_STAB = 0xe0
N_PEXT = 0x10
N_TYPE = 0x0e
N_EXT = 0x01
# Values for nlist N_TYPE bits of the "Mach.NList.type" field.
N_UNDF = 0x0
N_ABS = 0x2
N_SECT = 0xe
N_PBUD = 0xc
N_INDR = 0xa
# Section indexes for the "Mach.NList.sect_idx" fields
NO_SECT = 0
MAX_SECT = 255
# Stab defines
N_GSYM = 0x20
N_FNAME = 0x22
N_FUN = 0x24
N_STSYM = 0x26
N_LCSYM = 0x28
N_BNSYM = 0x2e
N_OPT = 0x3c
N_RSYM = 0x40
N_SLINE = 0x44
N_ENSYM = 0x4e
N_SSYM = 0x60
N_SO = 0x64
N_OSO = 0x66
N_LSYM = 0x80
N_BINCL = 0x82
N_SOL = 0x84
N_PARAMS = 0x86
N_VERSION = 0x88
N_OLEVEL = 0x8A
N_PSYM = 0xa0
N_EINCL = 0xa2
N_ENTRY = 0xa4
N_LBRAC = 0xc0
N_EXCL = 0xc2
N_RBRAC = 0xe0
N_BCOMM = 0xe2
N_ECOMM = 0xe4
N_ECOML = 0xe8
N_LENG = 0xfe
vm_prot_names = [ ' --- ' , ' r-- ' , ' -w- ' , ' rw- ' , ' --x ' , ' r-x ' , ' -wx ' , ' rwx ' ]
def dump_memory ( base_addr , data , hex_bytes_len , num_per_line ) :
hex_bytes = data . encode ( ' hex ' )
if hex_bytes_len == - 1 :
hex_bytes_len = len ( hex_bytes )
addr = base_addr
ascii_str = ' '
i = 0
while i < hex_bytes_len :
if ( ( i / 2 ) % num_per_line ) == 0 :
if i > 0 :
print ' %s ' % ( ascii_str )
ascii_str = ' '
print ' 0x %8.8x : ' % ( addr + i ) ,
hex_byte = hex_bytes [ i : i + 2 ]
print hex_byte ,
int_byte = int ( hex_byte , 16 )
ascii_char = ' %c ' % ( int_byte )
if int_byte > = 32 and int_byte < 127 :
ascii_str + = ascii_char
else :
ascii_str + = ' . '
i = i + 2
if ascii_str :
if ( i / 2 ) % num_per_line :
padding = num_per_line - ( ( i / 2 ) % num_per_line )
else :
padding = 0
print ' %*s %s ' % ( padding * 3 + 1 , ' ' , ascii_str )
print
class TerminalColors :
''' Simple terminal colors class '''
def __init__ ( self , enabled = True ) :
# TODO: discover terminal type from "file" and disable if
# it can't handle the color codes
self . enabled = enabled
def reset ( self ) :
''' Reset all terminal colors and formatting. '''
if self . enabled :
return " \x1b [0m " ;
return ' '
def bold ( self , on = True ) :
2014-07-02 05:22:11 +08:00
''' Enable or disable bold depending on the " on " parameter. '''
2013-09-05 01:31:40 +08:00
if self . enabled :
if on :
return " \x1b [1m " ;
else :
return " \x1b [22m " ;
return ' '
def italics ( self , on = True ) :
2014-07-02 05:22:11 +08:00
''' Enable or disable italics depending on the " on " parameter. '''
2013-09-05 01:31:40 +08:00
if self . enabled :
if on :
return " \x1b [3m " ;
else :
return " \x1b [23m " ;
return ' '
def underline ( self , on = True ) :
2014-07-02 05:22:11 +08:00
''' Enable or disable underline depending on the " on " parameter. '''
2013-09-05 01:31:40 +08:00
if self . enabled :
if on :
return " \x1b [4m " ;
else :
return " \x1b [24m " ;
return ' '
def inverse ( self , on = True ) :
2014-07-02 05:22:11 +08:00
''' Enable or disable inverse depending on the " on " parameter. '''
2013-09-05 01:31:40 +08:00
if self . enabled :
if on :
return " \x1b [7m " ;
else :
return " \x1b [27m " ;
return ' '
def strike ( self , on = True ) :
2014-07-02 05:22:11 +08:00
''' Enable or disable strike through depending on the " on " parameter. '''
2013-09-05 01:31:40 +08:00
if self . enabled :
if on :
return " \x1b [9m " ;
else :
return " \x1b [29m " ;
return ' '
def black ( self , fg = True ) :
''' Set the foreground or background color to black.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [30m " ;
else :
return " \x1b [40m " ;
return ' '
def red ( self , fg = True ) :
''' Set the foreground or background color to red.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [31m " ;
else :
return " \x1b [41m " ;
return ' '
def green ( self , fg = True ) :
''' Set the foreground or background color to green.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [32m " ;
else :
return " \x1b [42m " ;
return ' '
def yellow ( self , fg = True ) :
''' Set the foreground or background color to yellow.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [43m " ;
else :
return " \x1b [33m " ;
return ' '
def blue ( self , fg = True ) :
''' Set the foreground or background color to blue.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [34m " ;
else :
return " \x1b [44m " ;
return ' '
def magenta ( self , fg = True ) :
''' Set the foreground or background color to magenta.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [35m " ;
else :
return " \x1b [45m " ;
return ' '
def cyan ( self , fg = True ) :
''' Set the foreground or background color to cyan.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [36m " ;
else :
return " \x1b [46m " ;
return ' '
def white ( self , fg = True ) :
''' Set the foreground or background color to white.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [37m " ;
else :
return " \x1b [47m " ;
return ' '
def default ( self , fg = True ) :
''' Set the foreground or background color to the default.
The foreground color will be set if " fg " tests True . The background color will be set if " fg " tests False . '''
if self . enabled :
if fg :
return " \x1b [39m " ;
else :
return " \x1b [49m " ;
return ' '
def swap_unpack_char ( ) :
""" Returns the unpack prefix that will for non-native endian-ness. """
if struct . pack ( ' H ' , 1 ) . startswith ( " \x00 " ) :
return ' < '
return ' > '
def dump_hex_bytes ( addr , s , bytes_per_line = 16 ) :
i = 0
line = ' '
for ch in s :
if ( i % bytes_per_line ) == 0 :
if line :
print line
line = ' %#8.8x : ' % ( addr + i )
line + = " %02X " % ord ( ch )
i + = 1
print line
def dump_hex_byte_string_diff ( addr , a , b , bytes_per_line = 16 ) :
i = 0
line = ' '
a_len = len ( a )
b_len = len ( b )
if a_len < b_len :
max_len = b_len
else :
max_len = a_len
tty_colors = TerminalColors ( True )
for i in range ( max_len ) :
ch = None
if i < a_len :
ch_a = a [ i ]
ch = ch_a
else :
ch_a = None
if i < b_len :
ch_b = b [ i ]
if not ch :
ch = ch_b
else :
ch_b = None
mismatch = ch_a != ch_b
if ( i % bytes_per_line ) == 0 :
if line :
print line
line = ' %#8.8x : ' % ( addr + i )
if mismatch : line + = tty_colors . red ( )
line + = " %02X " % ord ( ch )
if mismatch : line + = tty_colors . default ( )
i + = 1
print line
class Mach :
""" Class that does everything mach-o related """
class Arch :
""" Class that implements mach-o architectures """
def __init__ ( self , c = 0 , s = 0 ) :
self . cpu = c
self . sub = s
def set_cpu_type ( self , c ) :
self . cpu = c
def set_cpu_subtype ( self , s ) :
self . sub = s
def set_arch ( self , c , s ) :
self . cpu = c
self . sub = s
def is_64_bit ( self ) :
return ( self . cpu & CPU_ARCH_ABI64 ) != 0
cpu_infos = [
[ " arm " , CPU_TYPE_ARM , CPU_TYPE_ANY ] ,
[ " arm " , CPU_TYPE_ARM , 0 ] ,
[ " armv4 " , CPU_TYPE_ARM , 5 ] ,
[ " armv6 " , CPU_TYPE_ARM , 6 ] ,
[ " armv5 " , CPU_TYPE_ARM , 7 ] ,
[ " xscale " , CPU_TYPE_ARM , 8 ] ,
[ " armv7 " , CPU_TYPE_ARM , 9 ] ,
[ " armv7f " , CPU_TYPE_ARM , 10 ] ,
[ " armv7s " , CPU_TYPE_ARM , 11 ] ,
[ " armv7k " , CPU_TYPE_ARM , 12 ] ,
[ " armv7m " , CPU_TYPE_ARM , 15 ] ,
[ " armv7em " , CPU_TYPE_ARM , 16 ] ,
[ " ppc " , CPU_TYPE_POWERPC , CPU_TYPE_ANY ] ,
[ " ppc " , CPU_TYPE_POWERPC , 0 ] ,
[ " ppc601 " , CPU_TYPE_POWERPC , 1 ] ,
[ " ppc602 " , CPU_TYPE_POWERPC , 2 ] ,
[ " ppc603 " , CPU_TYPE_POWERPC , 3 ] ,
[ " ppc603e " , CPU_TYPE_POWERPC , 4 ] ,
[ " ppc603ev " , CPU_TYPE_POWERPC , 5 ] ,
[ " ppc604 " , CPU_TYPE_POWERPC , 6 ] ,
[ " ppc604e " , CPU_TYPE_POWERPC , 7 ] ,
[ " ppc620 " , CPU_TYPE_POWERPC , 8 ] ,
[ " ppc750 " , CPU_TYPE_POWERPC , 9 ] ,
[ " ppc7400 " , CPU_TYPE_POWERPC , 10 ] ,
[ " ppc7450 " , CPU_TYPE_POWERPC , 11 ] ,
[ " ppc970 " , CPU_TYPE_POWERPC , 100 ] ,
[ " ppc64 " , CPU_TYPE_POWERPC64 , 0 ] ,
[ " ppc970-64 " , CPU_TYPE_POWERPC64 , 100 ] ,
[ " i386 " , CPU_TYPE_I386 , 3 ] ,
[ " i486 " , CPU_TYPE_I386 , 4 ] ,
[ " i486sx " , CPU_TYPE_I386 , 0x84 ] ,
[ " i386 " , CPU_TYPE_I386 , CPU_TYPE_ANY ] ,
[ " x86_64 " , CPU_TYPE_X86_64 , 3 ] ,
[ " x86_64 " , CPU_TYPE_X86_64 , CPU_TYPE_ANY ] ,
]
def __str__ ( self ) :
for info in self . cpu_infos :
if self . cpu == info [ 1 ] and ( self . sub & 0x00ffffff ) == info [ 2 ] :
return info [ 0 ]
return " {0} . {1} " . format ( self . cpu , self . sub )
class Magic ( dict_utils . Enum ) :
enum = {
' MH_MAGIC ' : MH_MAGIC ,
' MH_CIGAM ' : MH_CIGAM ,
' MH_MAGIC_64 ' : MH_MAGIC_64 ,
' MH_CIGAM_64 ' : MH_CIGAM_64 ,
' FAT_MAGIC ' : FAT_MAGIC ,
' FAT_CIGAM ' : FAT_CIGAM
}
def __init__ ( self , initial_value = 0 ) :
dict_utils . Enum . __init__ ( self , initial_value , self . enum )
def is_skinny_mach_file ( self ) :
return self . value == MH_MAGIC or self . value == MH_CIGAM or self . value == MH_MAGIC_64 or self . value == MH_CIGAM_64
def is_universal_mach_file ( self ) :
return self . value == FAT_MAGIC or self . value == FAT_CIGAM
def unpack ( self , data ) :
data . set_byte_order ( ' native ' )
self . value = data . get_uint32 ( ) ;
def get_byte_order ( self ) :
if self . value == MH_CIGAM or self . value == MH_CIGAM_64 or self . value == FAT_CIGAM :
return swap_unpack_char ( )
else :
return ' = '
def is_64_bit ( self ) :
return self . value == MH_MAGIC_64 or self . value == MH_CIGAM_64
def __init__ ( self ) :
self . magic = Mach . Magic ( )
self . content = None
self . path = None
def extract ( self , path , extractor ) :
self . path = path ;
self . unpack ( extractor )
def parse ( self , path ) :
self . path = path ;
try :
f = open ( self . path )
file_extractor = file_extract . FileExtract ( f , ' = ' )
self . unpack ( file_extractor )
#f.close()
except IOError as ( errno , strerror ) :
print " I/O error( {0} ): {1} " . format ( errno , strerror )
except ValueError :
print " Could not convert data to an integer. "
except :
print " Unexpected error: " , sys . exc_info ( ) [ 0 ]
raise
def compare ( self , rhs ) :
self . content . compare ( rhs . content )
def dump ( self , options = None ) :
self . content . dump ( options )
def dump_header ( self , dump_description = True , options = None ) :
self . content . dump_header ( dump_description , options )
def dump_load_commands ( self , dump_description = True , options = None ) :
self . content . dump_load_commands ( dump_description , options )
def dump_sections ( self , dump_description = True , options = None ) :
self . content . dump_sections ( dump_description , options )
def dump_section_contents ( self , options ) :
self . content . dump_section_contents ( options )
def dump_symtab ( self , dump_description = True , options = None ) :
self . content . dump_symtab ( dump_description , options )
def dump_symbol_names_matching_regex ( self , regex , file = None ) :
self . content . dump_symbol_names_matching_regex ( regex , file )
def description ( self ) :
return self . content . description ( )
def unpack ( self , data ) :
self . magic . unpack ( data )
if self . magic . is_skinny_mach_file ( ) :
self . content = Mach . Skinny ( self . path )
elif self . magic . is_universal_mach_file ( ) :
self . content = Mach . Universal ( self . path )
else :
self . content = None
if self . content != None :
self . content . unpack ( data , self . magic )
def is_valid ( self ) :
return self . content != None
class Universal :
def __init__ ( self , path ) :
self . path = path
self . type = ' universal '
self . file_off = 0
self . magic = None
self . nfat_arch = 0
self . archs = list ( )
def description ( self ) :
s = ' %#8.8x : %s ( ' % ( self . file_off , self . path )
archs_string = ' '
for arch in self . archs :
if len ( archs_string ) :
archs_string + = ' , '
archs_string + = ' %s ' % arch . arch
s + = archs_string
s + = ' ) '
return s
def unpack ( self , data , magic = None ) :
self . file_off = data . tell ( )
if magic is None :
self . magic = Mach . Magic ( )
self . magic . unpack ( data )
else :
self . magic = magic
self . file_off = self . file_off - 4
# Universal headers are always in big endian
data . set_byte_order ( ' big ' )
self . nfat_arch = data . get_uint32 ( )
for i in range ( self . nfat_arch ) :
self . archs . append ( Mach . Universal . ArchInfo ( ) )
self . archs [ i ] . unpack ( data )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach = Mach . Skinny ( self . path )
data . seek ( self . archs [ i ] . offset , 0 )
skinny_magic = Mach . Magic ( )
skinny_magic . unpack ( data )
self . archs [ i ] . mach . unpack ( data , skinny_magic )
def compare ( self , rhs ) :
print ' error: comparing two universal files is not supported yet '
return False
def dump ( self , options ) :
if options . dump_header :
print
print " Universal Mach File: magic = %s , nfat_arch = %u " % ( self . magic , self . nfat_arch )
print
if self . nfat_arch > 0 :
if options . dump_header :
self . archs [ 0 ] . dump_header ( True , options )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . dump_flat ( options )
if options . dump_header :
print
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump ( options )
def dump_header ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_header ( True , options )
print
def dump_load_commands ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_load_commands ( True , options )
print
def dump_sections ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_sections ( True , options )
print
def dump_section_contents ( self , options ) :
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_section_contents ( options )
print
def dump_symtab ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_symtab ( True , options )
print
def dump_symbol_names_matching_regex ( self , regex , file = None ) :
for i in range ( self . nfat_arch ) :
self . archs [ i ] . mach . dump_symbol_names_matching_regex ( regex , file )
class ArchInfo :
def __init__ ( self ) :
self . arch = Mach . Arch ( 0 , 0 )
self . offset = 0
self . size = 0
self . align = 0
self . mach = None
def unpack ( self , data ) :
# Universal headers are always in big endian
data . set_byte_order ( ' big ' )
self . arch . cpu , self . arch . sub , self . offset , self . size , self . align = data . get_n_uint32 ( 5 )
def dump_header ( self , dump_description = True , options = None ) :
if options . verbose :
print " CPU SUBTYPE OFFSET SIZE ALIGN "
print " ---------- ---------- ---------- ---------- ---------- "
else :
print " ARCH FILEOFFSET FILESIZE ALIGN "
print " ---------- ---------- ---------- ---------- "
def dump_flat ( self , options ) :
if options . verbose :
print " %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x " % ( self . arch . cpu , self . arch . sub , self . offset , self . size , self . align )
else :
print " %-10s %#8.8x %#8.8x %#8.8x " % ( self . arch , self . offset , self . size , self . align )
def dump ( self ) :
print " cputype: %#8.8x " % self . arch . cpu
print " cpusubtype: %#8.8x " % self . arch . sub
print " offset: %#8.8x " % self . offset
print " size: %#8.8x " % self . size
print " align: %#8.8x " % self . align
def __str__ ( self ) :
return " Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x " % ( self . arch . cpu , self . arch . sub , self . offset , self . size , self . align )
def __repr__ ( self ) :
return " Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x " % ( self . arch . cpu , self . arch . sub , self . offset , self . size , self . align )
class Flags :
def __init__ ( self , b ) :
self . bits = b
def __str__ ( self ) :
s = ' '
if self . bits & MH_NOUNDEFS :
s + = ' MH_NOUNDEFS | '
if self . bits & MH_INCRLINK :
s + = ' MH_INCRLINK | '
if self . bits & MH_DYLDLINK :
s + = ' MH_DYLDLINK | '
if self . bits & MH_BINDATLOAD :
s + = ' MH_BINDATLOAD | '
if self . bits & MH_PREBOUND :
s + = ' MH_PREBOUND | '
if self . bits & MH_SPLIT_SEGS :
s + = ' MH_SPLIT_SEGS | '
if self . bits & MH_LAZY_INIT :
s + = ' MH_LAZY_INIT | '
if self . bits & MH_TWOLEVEL :
s + = ' MH_TWOLEVEL | '
if self . bits & MH_FORCE_FLAT :
s + = ' MH_FORCE_FLAT | '
if self . bits & MH_NOMULTIDEFS :
s + = ' MH_NOMULTIDEFS | '
if self . bits & MH_NOFIXPREBINDING :
s + = ' MH_NOFIXPREBINDING | '
if self . bits & MH_PREBINDABLE :
s + = ' MH_PREBINDABLE | '
if self . bits & MH_ALLMODSBOUND :
s + = ' MH_ALLMODSBOUND | '
if self . bits & MH_SUBSECTIONS_VIA_SYMBOLS :
s + = ' MH_SUBSECTIONS_VIA_SYMBOLS | '
if self . bits & MH_CANONICAL :
s + = ' MH_CANONICAL | '
if self . bits & MH_WEAK_DEFINES :
s + = ' MH_WEAK_DEFINES | '
if self . bits & MH_BINDS_TO_WEAK :
s + = ' MH_BINDS_TO_WEAK | '
if self . bits & MH_ALLOW_STACK_EXECUTION :
s + = ' MH_ALLOW_STACK_EXECUTION | '
if self . bits & MH_ROOT_SAFE :
s + = ' MH_ROOT_SAFE | '
if self . bits & MH_SETUID_SAFE :
s + = ' MH_SETUID_SAFE | '
if self . bits & MH_NO_REEXPORTED_DYLIBS :
s + = ' MH_NO_REEXPORTED_DYLIBS | '
if self . bits & MH_PIE :
s + = ' MH_PIE | '
if self . bits & MH_DEAD_STRIPPABLE_DYLIB :
s + = ' MH_DEAD_STRIPPABLE_DYLIB | '
if self . bits & MH_HAS_TLV_DESCRIPTORS :
s + = ' MH_HAS_TLV_DESCRIPTORS | '
if self . bits & MH_NO_HEAP_EXECUTION :
s + = ' MH_NO_HEAP_EXECUTION | '
# Strip the trailing " |" if we have any flags
if len ( s ) > 0 :
s = s [ 0 : - 2 ]
return s
class FileType ( dict_utils . Enum ) :
enum = {
' MH_OBJECT ' : MH_OBJECT ,
' MH_EXECUTE ' : MH_EXECUTE ,
' MH_FVMLIB ' : MH_FVMLIB ,
' MH_CORE ' : MH_CORE ,
' MH_PRELOAD ' : MH_PRELOAD ,
' MH_DYLIB ' : MH_DYLIB ,
' MH_DYLINKER ' : MH_DYLINKER ,
' MH_BUNDLE ' : MH_BUNDLE ,
' MH_DYLIB_STUB ' : MH_DYLIB_STUB ,
' MH_DSYM ' : MH_DSYM ,
' MH_KEXT_BUNDLE ' : MH_KEXT_BUNDLE
}
def __init__ ( self , initial_value = 0 ) :
dict_utils . Enum . __init__ ( self , initial_value , self . enum )
class Skinny :
def __init__ ( self , path ) :
self . path = path
self . type = ' skinny '
self . data = None
self . file_off = 0
self . magic = 0
self . arch = Mach . Arch ( 0 , 0 )
self . filetype = Mach . FileType ( 0 )
self . ncmds = 0
self . sizeofcmds = 0
self . flags = Mach . Flags ( 0 )
self . uuid = None
self . commands = list ( )
self . segments = list ( )
self . sections = list ( )
self . symbols = list ( )
self . sections . append ( Mach . Section ( ) )
def description ( self ) :
return ' %#8.8x : %s ( %s ) ' % ( self . file_off , self . path , self . arch )
def unpack ( self , data , magic = None ) :
self . data = data
self . file_off = data . tell ( )
if magic is None :
self . magic = Mach . Magic ( )
self . magic . unpack ( data )
else :
self . magic = magic
self . file_off = self . file_off - 4
data . set_byte_order ( self . magic . get_byte_order ( ) )
self . arch . cpu , self . arch . sub , self . filetype . value , self . ncmds , self . sizeofcmds , bits = data . get_n_uint32 ( 6 )
self . flags . bits = bits
if self . is_64_bit ( ) :
data . get_uint32 ( ) # Skip reserved word in mach_header_64
for i in range ( 0 , self . ncmds ) :
lc = self . unpack_load_command ( data )
self . commands . append ( lc )
def get_data ( self ) :
if self . data :
self . data . set_byte_order ( self . magic . get_byte_order ( ) )
return self . data
return None
def unpack_load_command ( self , data ) :
lc = Mach . LoadCommand ( )
lc . unpack ( self , data )
lc_command = lc . command . get_enum_value ( ) ;
if ( lc_command == LC_SEGMENT or
lc_command == LC_SEGMENT_64 ) :
lc = Mach . SegmentLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_LOAD_DYLIB or
lc_command == LC_ID_DYLIB or
lc_command == LC_LOAD_WEAK_DYLIB or
lc_command == LC_REEXPORT_DYLIB ) :
lc = Mach . DylibLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_LOAD_DYLINKER or
lc_command == LC_SUB_FRAMEWORK or
lc_command == LC_SUB_CLIENT or
lc_command == LC_SUB_UMBRELLA or
lc_command == LC_SUB_LIBRARY or
lc_command == LC_ID_DYLINKER or
lc_command == LC_RPATH ) :
lc = Mach . LoadDYLDLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_DYLD_INFO_ONLY ) :
lc = Mach . DYLDInfoOnlyLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_SYMTAB ) :
lc = Mach . SymtabLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_DYSYMTAB ) :
lc = Mach . DYLDSymtabLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_UUID ) :
lc = Mach . UUIDLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_CODE_SIGNATURE or
lc_command == LC_SEGMENT_SPLIT_INFO or
lc_command == LC_FUNCTION_STARTS ) :
lc = Mach . DataBlobLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_UNIXTHREAD ) :
lc = Mach . UnixThreadLoadCommand ( lc )
lc . unpack ( self , data )
elif ( lc_command == LC_ENCRYPTION_INFO ) :
lc = Mach . EncryptionInfoLoadCommand ( lc )
lc . unpack ( self , data )
lc . skip ( data )
return lc
def compare ( self , rhs ) :
print " \n Comparing: "
print " a) %s %s " % ( self . arch , self . path )
print " b) %s %s " % ( rhs . arch , rhs . path )
result = True
if self . type == rhs . type :
for lhs_section in self . sections [ 1 : ] :
rhs_section = rhs . get_section_by_section ( lhs_section )
if rhs_section :
print ' comparing %s . %s ... ' % ( lhs_section . segname , lhs_section . sectname ) ,
sys . stdout . flush ( )
lhs_data = lhs_section . get_contents ( self )
rhs_data = rhs_section . get_contents ( rhs )
if lhs_data and rhs_data :
if lhs_data == rhs_data :
print ' ok '
else :
lhs_data_len = len ( lhs_data )
rhs_data_len = len ( rhs_data )
# if lhs_data_len < rhs_data_len:
# if lhs_data == rhs_data[0:lhs_data_len]:
# print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
# else:
# # TODO: check padding
# result = False
# elif lhs_data_len > rhs_data_len:
# if lhs_data[0:rhs_data_len] == rhs_data:
# print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
# else:
# # TODO: check padding
# result = False
# else:
result = False
print ' error: sections differ '
#print 'a) %s' % (lhs_section)
# dump_hex_byte_string_diff(0, lhs_data, rhs_data)
#print 'b) %s' % (rhs_section)
# dump_hex_byte_string_diff(0, rhs_data, lhs_data)
elif lhs_data and not rhs_data :
print ' error: section data missing from b: '
print ' a) %s ' % ( lhs_section )
print ' b) %s ' % ( rhs_section )
result = False
elif not lhs_data and rhs_data :
print ' error: section data missing from a: '
print ' a) %s ' % ( lhs_section )
print ' b) %s ' % ( rhs_section )
result = False
elif lhs_section . offset or rhs_section . offset :
print ' error: section data missing for both a and b: '
print ' a) %s ' % ( lhs_section )
print ' b) %s ' % ( rhs_section )
result = False
else :
print ' ok '
else :
result = False
print ' error: section %s is missing in %s ' % ( lhs_section . sectname , rhs . path )
else :
print ' error: comaparing a %s mach-o file with a %s mach-o file is not supported ' % ( self . type , rhs . type )
result = False
if not result :
print ' error: mach files differ '
return result
def dump_header ( self , dump_description = True , options = None ) :
if options . verbose :
print " MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS "
print " ---------- ---------- ---------- ---------- -------- ---------- ---------- "
else :
print " MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS "
print " ------------ ---------- -------------- -------- ---------- ---------- "
def dump_flat ( self , options ) :
if options . verbose :
print " %#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x " % ( self . magic , self . arch . cpu , self . arch . sub , self . filetype . value , self . ncmds , self . sizeofcmds , self . flags . bits )
else :
print " %-12s %-10s %-14s %#8u %#8.8x %s " % ( self . magic , self . arch , self . filetype , self . ncmds , self . sizeofcmds , self . flags )
def dump ( self , options ) :
if options . dump_header :
self . dump_header ( True , options )
if options . dump_load_commands :
self . dump_load_commands ( False , options )
if options . dump_sections :
self . dump_sections ( False , options )
if options . section_names :
self . dump_section_contents ( options )
if options . dump_symtab :
self . get_symtab ( )
if len ( self . symbols ) :
self . dump_sections ( False , options )
else :
print " No symbols "
if options . find_mangled :
self . dump_symbol_names_matching_regex ( re . compile ( ' ^_?_Z ' ) )
def dump_header ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
print " Mach Header "
print " magic: %#8.8x %s " % ( self . magic . value , self . magic )
print " cputype: %#8.8x %s " % ( self . arch . cpu , self . arch )
print " cpusubtype: %#8.8x " % self . arch . sub
print " filetype: %#8.8x %s " % ( self . filetype . get_enum_value ( ) , self . filetype . get_enum_name ( ) )
print " ncmds: %#8.8x %u " % ( self . ncmds , self . ncmds )
print " sizeofcmds: %#8.8x " % self . sizeofcmds
print " flags: %#8.8x %s " % ( self . flags . bits , self . flags )
def dump_load_commands ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
for lc in self . commands :
print lc
def get_section_by_name ( self , name ) :
for section in self . sections :
if section . sectname and section . sectname == name :
return section
return None
def get_section_by_section ( self , other_section ) :
for section in self . sections :
if section . sectname == other_section . sectname and section . segname == other_section . segname :
return section
return None
def dump_sections ( self , dump_description = True , options = None ) :
if dump_description :
print self . description ( )
num_sections = len ( self . sections )
if num_sections > 1 :
self . sections [ 1 ] . dump_header ( )
for sect_idx in range ( 1 , num_sections ) :
print " %s " % self . sections [ sect_idx ]
def dump_section_contents ( self , options ) :
saved_section_to_disk = False
for sectname in options . section_names :
section = self . get_section_by_name ( sectname )
if section :
sect_bytes = section . get_contents ( self )
if options . outfile :
if not saved_section_to_disk :
outfile = open ( options . outfile , ' w ' )
if options . extract_modules :
#print "Extracting modules from mach file..."
data = file_extract . FileExtract ( StringIO . StringIO ( sect_bytes ) , self . data . byte_order )
version = data . get_uint32 ( )
num_modules = data . get_uint32 ( )
#print "version = %u, num_modules = %u" % (version, num_modules)
for i in range ( num_modules ) :
data_offset = data . get_uint64 ( )
data_size = data . get_uint64 ( )
name_offset = data . get_uint32 ( )
language = data . get_uint32 ( )
flags = data . get_uint32 ( )
data . seek ( name_offset )
module_name = data . get_c_string ( )
#print "module[%u] data_offset = %#16.16x, data_size = %#16.16x, name_offset = %#16.16x (%s), language = %u, flags = %#x" % (i, data_offset, data_size, name_offset, module_name, language, flags)
data . seek ( data_offset )
outfile . write ( data . read_size ( data_size ) )
else :
print " Saving section %s to ' %s ' " % ( sectname , options . outfile )
outfile . write ( sect_bytes )
outfile . close ( )
saved_section_to_disk = True
else :
print " error: you can only save a single section to disk at a time, skipping section ' %s ' " % ( sectname )
else :
print ' section %s : \n ' % ( sectname )
section . dump_header ( )
print ' %s \n ' % ( section )
dump_memory ( 0 , sect_bytes , options . max_count , 16 )
else :
print ' error: no section named " %s " was found ' % ( sectname )
def get_segment ( self , segname ) :
if len ( self . segments ) == 1 and self . segments [ 0 ] . segname == ' ' :
return self . segments [ 0 ]
for segment in self . segments :
if segment . segname == segname :
return segment
return None
def get_first_load_command ( self , lc_enum_value ) :
for lc in self . commands :
if lc . command . value == lc_enum_value :
return lc
return None
def get_symtab ( self ) :
if self . data and not self . symbols :
lc_symtab = self . get_first_load_command ( LC_SYMTAB )
if lc_symtab :
symtab_offset = self . file_off
if self . data . is_in_memory ( ) :
linkedit_segment = self . get_segment ( ' __LINKEDIT ' )
if linkedit_segment :
linkedit_vmaddr = linkedit_segment . vmaddr
linkedit_fileoff = linkedit_segment . fileoff
symtab_offset = linkedit_vmaddr + lc_symtab . symoff - linkedit_fileoff
symtab_offset = linkedit_vmaddr + lc_symtab . stroff - linkedit_fileoff
else :
symtab_offset + = lc_symtab . symoff
self . data . seek ( symtab_offset )
is_64 = self . is_64_bit ( )
for i in range ( lc_symtab . nsyms ) :
nlist = Mach . NList ( )
nlist . unpack ( self , self . data , lc_symtab )
self . symbols . append ( nlist )
else :
print " no LC_SYMTAB "
def dump_symtab ( self , dump_description = True , options = None ) :
self . get_symtab ( )
if dump_description :
print self . description ( )
for i , symbol in enumerate ( self . symbols ) :
print ' [ %5u ] %s ' % ( i , symbol )
def dump_symbol_names_matching_regex ( self , regex , file = None ) :
self . get_symtab ( )
for symbol in self . symbols :
if symbol . name and regex . search ( symbol . name ) :
print symbol . name
if file :
file . write ( ' %s \n ' % ( symbol . name ) )
def is_64_bit ( self ) :
return self . magic . is_64_bit ( )
class LoadCommand :
class Command ( dict_utils . Enum ) :
enum = {
' LC_SEGMENT ' : LC_SEGMENT ,
' LC_SYMTAB ' : LC_SYMTAB ,
' LC_SYMSEG ' : LC_SYMSEG ,
' LC_THREAD ' : LC_THREAD ,
' LC_UNIXTHREAD ' : LC_UNIXTHREAD ,
' LC_LOADFVMLIB ' : LC_LOADFVMLIB ,
' LC_IDFVMLIB ' : LC_IDFVMLIB ,
' LC_IDENT ' : LC_IDENT ,
' LC_FVMFILE ' : LC_FVMFILE ,
' LC_PREPAGE ' : LC_PREPAGE ,
' LC_DYSYMTAB ' : LC_DYSYMTAB ,
' LC_LOAD_DYLIB ' : LC_LOAD_DYLIB ,
' LC_ID_DYLIB ' : LC_ID_DYLIB ,
' LC_LOAD_DYLINKER ' : LC_LOAD_DYLINKER ,
' LC_ID_DYLINKER ' : LC_ID_DYLINKER ,
' LC_PREBOUND_DYLIB ' : LC_PREBOUND_DYLIB ,
' LC_ROUTINES ' : LC_ROUTINES ,
' LC_SUB_FRAMEWORK ' : LC_SUB_FRAMEWORK ,
' LC_SUB_UMBRELLA ' : LC_SUB_UMBRELLA ,
' LC_SUB_CLIENT ' : LC_SUB_CLIENT ,
' LC_SUB_LIBRARY ' : LC_SUB_LIBRARY ,
' LC_TWOLEVEL_HINTS ' : LC_TWOLEVEL_HINTS ,
' LC_PREBIND_CKSUM ' : LC_PREBIND_CKSUM ,
' LC_LOAD_WEAK_DYLIB ' : LC_LOAD_WEAK_DYLIB ,
' LC_SEGMENT_64 ' : LC_SEGMENT_64 ,
' LC_ROUTINES_64 ' : LC_ROUTINES_64 ,
' LC_UUID ' : LC_UUID ,
' LC_RPATH ' : LC_RPATH ,
' LC_CODE_SIGNATURE ' : LC_CODE_SIGNATURE ,
' LC_SEGMENT_SPLIT_INFO ' : LC_SEGMENT_SPLIT_INFO ,
' LC_REEXPORT_DYLIB ' : LC_REEXPORT_DYLIB ,
' LC_LAZY_LOAD_DYLIB ' : LC_LAZY_LOAD_DYLIB ,
' LC_ENCRYPTION_INFO ' : LC_ENCRYPTION_INFO ,
' LC_DYLD_INFO ' : LC_DYLD_INFO ,
' LC_DYLD_INFO_ONLY ' : LC_DYLD_INFO_ONLY ,
' LC_LOAD_UPWARD_DYLIB ' : LC_LOAD_UPWARD_DYLIB ,
' LC_VERSION_MIN_MACOSX ' : LC_VERSION_MIN_MACOSX ,
' LC_VERSION_MIN_IPHONEOS ' : LC_VERSION_MIN_IPHONEOS ,
' LC_FUNCTION_STARTS ' : LC_FUNCTION_STARTS ,
' LC_DYLD_ENVIRONMENT ' : LC_DYLD_ENVIRONMENT
}
def __init__ ( self , initial_value = 0 ) :
dict_utils . Enum . __init__ ( self , initial_value , self . enum )
def __init__ ( self , c = None , l = 0 , o = 0 ) :
if c != None :
self . command = c
else :
self . command = Mach . LoadCommand . Command ( 0 )
self . length = l
self . file_off = o
def unpack ( self , mach_file , data ) :
self . file_off = data . tell ( )
self . command . value , self . length = data . get_n_uint32 ( 2 )
def skip ( self , data ) :
data . seek ( self . file_off + self . length , 0 )
def __str__ ( self ) :
lc_name = self . command . get_enum_name ( )
return ' %#8.8x : < %#4.4x > %-24s ' % ( self . file_off , self . length , lc_name )
class Section :
def __init__ ( self ) :
self . index = 0
self . is_64 = False
self . sectname = None
self . segname = None
self . addr = 0
self . size = 0
self . offset = 0
self . align = 0
self . reloff = 0
self . nreloc = 0
self . flags = 0
self . reserved1 = 0
self . reserved2 = 0
self . reserved3 = 0
def unpack ( self , is_64 , data ) :
self . is_64 = is_64
self . sectname = data . get_fixed_length_c_string ( 16 , ' ' , True )
self . segname = data . get_fixed_length_c_string ( 16 , ' ' , True )
if self . is_64 :
self . addr , self . size = data . get_n_uint64 ( 2 )
self . offset , self . align , self . reloff , self . nreloc , self . flags , self . reserved1 , self . reserved2 , self . reserved3 = data . get_n_uint32 ( 8 )
else :
self . addr , self . size = data . get_n_uint32 ( 2 )
self . offset , self . align , self . reloff , self . nreloc , self . flags , self . reserved1 , self . reserved2 = data . get_n_uint32 ( 7 )
def dump_header ( self ) :
if self . is_64 :
print " INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME " ;
print " ===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------- " ;
else :
print " INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME " ;
print " ===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------- " ;
def __str__ ( self ) :
if self . is_64 :
return " [ %3u ] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s . %s " % ( self . index , self . addr , self . size , self . offset , self . align , self . reloff , self . nreloc , self . flags , self . reserved1 , self . reserved2 , self . reserved3 , self . segname , self . sectname )
else :
return " [ %3u ] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s . %s " % ( self . index , self . addr , self . size , self . offset , self . align , self . reloff , self . nreloc , self . flags , self . reserved1 , self . reserved2 , self . segname , self . sectname )
def get_contents ( self , mach_file ) :
''' Get the section contents as a python string '''
if self . size > 0 and mach_file . get_segment ( self . segname ) . filesize > 0 :
data = mach_file . get_data ( )
if data :
section_data_offset = mach_file . file_off + self . offset
#print '%s.%s is at offset 0x%x with size 0x%x' % (self.segname, self.sectname, section_data_offset, self.size)
data . push_offset_and_seek ( section_data_offset )
bytes = data . read_size ( self . size )
data . pop_offset_and_seek ( )
return bytes
return None
class DylibLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . name = None
self . timestamp = 0
self . current_version = 0
self . compatibility_version = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
name_offset , self . timestamp , self . current_version , self . compatibility_version = data . get_n_uint32 ( 4 )
data . seek ( self . file_off + name_offset , 0 )
self . name = data . get_fixed_length_c_string ( self . length - 24 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " %#8.8x %#8.8x %#8.8x " % ( self . timestamp , self . current_version , self . compatibility_version )
s + = self . name
return s
class LoadDYLDLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . name = None
def unpack ( self , mach_file , data ) :
data . get_uint32 ( )
self . name = data . get_fixed_length_c_string ( self . length - 12 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " %s " % self . name
return s
class UnixThreadLoadCommand ( LoadCommand ) :
class ThreadState :
def __init__ ( self ) :
self . flavor = 0
self . count = 0
self . register_values = list ( )
def unpack ( self , data ) :
self . flavor , self . count = data . get_n_uint32 ( 2 )
self . register_values = data . get_n_uint32 ( self . count )
def __str__ ( self ) :
s = " flavor = %u , count = %u , regs = " % ( self . flavor , self . count )
i = 0
for register_value in self . register_values :
if i % 8 == 0 :
s + = " \n "
s + = " %#8.8x " % register_value
i + = 1
return s
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . reg_sets = list ( )
def unpack ( self , mach_file , data ) :
reg_set = Mach . UnixThreadLoadCommand . ThreadState ( )
reg_set . unpack ( data )
self . reg_sets . append ( reg_set )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
for reg_set in self . reg_sets :
s + = " %s " % reg_set
return s
class DYLDInfoOnlyLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . rebase_off = 0
self . rebase_size = 0
self . bind_off = 0
self . bind_size = 0
self . weak_bind_off = 0
self . weak_bind_size = 0
self . lazy_bind_off = 0
self . lazy_bind_size = 0
self . export_off = 0
self . export_size = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
self . rebase_off , self . rebase_size , self . bind_off , self . bind_size , self . weak_bind_off , self . weak_bind_size , self . lazy_bind_off , self . lazy_bind_size , self . export_off , self . export_size = data . get_n_uint32 ( 10 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " rebase_off = %#8.8x , rebase_size = %u , " % ( self . rebase_off , self . rebase_size )
s + = " bind_off = %#8.8x , bind_size = %u , " % ( self . bind_off , self . bind_size )
s + = " weak_bind_off = %#8.8x , weak_bind_size = %u , " % ( self . weak_bind_off , self . weak_bind_size )
s + = " lazy_bind_off = %#8.8x , lazy_bind_size = %u , " % ( self . lazy_bind_off , self . lazy_bind_size )
s + = " export_off = %#8.8x , export_size = %u , " % ( self . export_off , self . export_size )
return s
class DYLDSymtabLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . ilocalsym = 0
self . nlocalsym = 0
self . iextdefsym = 0
self . nextdefsym = 0
self . iundefsym = 0
self . nundefsym = 0
self . tocoff = 0
self . ntoc = 0
self . modtaboff = 0
self . nmodtab = 0
self . extrefsymoff = 0
self . nextrefsyms = 0
self . indirectsymoff = 0
self . nindirectsyms = 0
self . extreloff = 0
self . nextrel = 0
self . locreloff = 0
self . nlocrel = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
self . ilocalsym , self . nlocalsym , self . iextdefsym , self . nextdefsym , self . iundefsym , self . nundefsym , self . tocoff , self . ntoc , self . modtaboff , self . nmodtab , self . extrefsymoff , self . nextrefsyms , self . indirectsymoff , self . nindirectsyms , self . extreloff , self . nextrel , self . locreloff , self . nlocrel = data . get_n_uint32 ( 18 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
# s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
# s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
# s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
# s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
# s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
# s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
# s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
# s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
# s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, self.nlocrel)
s + = " ilocalsym = %-10u , nlocalsym = %u \n " % ( self . ilocalsym , self . nlocalsym )
s + = " iextdefsym = %-10u , nextdefsym = %u \n " % ( self . iextdefsym , self . nextdefsym )
s + = " iundefsym = %-10u , nundefsym = %u \n " % ( self . iundefsym , self . nundefsym )
s + = " tocoff = %#8.8x , ntoc = %u \n " % ( self . tocoff , self . ntoc )
s + = " modtaboff = %#8.8x , nmodtab = %u \n " % ( self . modtaboff , self . nmodtab )
s + = " extrefsymoff = %#8.8x , nextrefsyms = %u \n " % ( self . extrefsymoff , self . nextrefsyms )
s + = " indirectsymoff = %#8.8x , nindirectsyms = %u \n " % ( self . indirectsymoff , self . nindirectsyms )
s + = " extreloff = %#8.8x , nextrel = %u \n " % ( self . extreloff , self . nextrel )
s + = " locreloff = %#8.8x , nlocrel = %u " % ( self . locreloff , self . nlocrel )
return s
class SymtabLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . symoff = 0
self . nsyms = 0
self . stroff = 0
self . strsize = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
self . symoff , self . nsyms , self . stroff , self . strsize = data . get_n_uint32 ( 4 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " symoff = %#8.8x , nsyms = %u , stroff = %#8.8x , strsize = %u " % ( self . symoff , self . nsyms , self . stroff , self . strsize )
return s
class UUIDLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . uuid = None
def unpack ( self , mach_file , data ) :
uuid_data = data . get_n_uint8 ( 16 )
uuid_str = ' '
for byte in uuid_data :
uuid_str + = ' %2.2x ' % byte
self . uuid = uuid . UUID ( uuid_str )
mach_file . uuid = self . uuid
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = self . uuid . __str__ ( )
return s
class DataBlobLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . dataoff = 0
self . datasize = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
self . dataoff , self . datasize = data . get_n_uint32 ( 2 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " dataoff = %#8.8x , datasize = %u " % ( self . dataoff , self . datasize )
return s
class EncryptionInfoLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . cryptoff = 0
self . cryptsize = 0
self . cryptid = 0
def unpack ( self , mach_file , data ) :
byte_order_char = mach_file . magic . get_byte_order ( )
self . cryptoff , self . cryptsize , self . cryptid = data . get_n_uint32 ( 3 )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
s + = " file-range = [ %#8.8x - %#8.8x ), cryptsize = %u , cryptid = %u " % ( self . cryptoff , self . cryptoff + self . cryptsize , self . cryptsize , self . cryptid )
return s
class SegmentLoadCommand ( LoadCommand ) :
def __init__ ( self , lc ) :
Mach . LoadCommand . __init__ ( self , lc . command , lc . length , lc . file_off )
self . segname = None
self . vmaddr = 0
self . vmsize = 0
self . fileoff = 0
self . filesize = 0
self . maxprot = 0
self . initprot = 0
self . nsects = 0
self . flags = 0
def unpack ( self , mach_file , data ) :
is_64 = self . command . get_enum_value ( ) == LC_SEGMENT_64 ;
self . segname = data . get_fixed_length_c_string ( 16 , ' ' , True )
if is_64 :
self . vmaddr , self . vmsize , self . fileoff , self . filesize = data . get_n_uint64 ( 4 )
else :
self . vmaddr , self . vmsize , self . fileoff , self . filesize = data . get_n_uint32 ( 4 )
self . maxprot , self . initprot , self . nsects , self . flags = data . get_n_uint32 ( 4 )
mach_file . segments . append ( self )
for i in range ( self . nsects ) :
section = Mach . Section ( )
section . unpack ( is_64 , data )
section . index = len ( mach_file . sections )
mach_file . sections . append ( section )
def __str__ ( self ) :
s = Mach . LoadCommand . __str__ ( self ) ;
if self . command . get_enum_value ( ) == LC_SEGMENT :
s + = " %#8.8x %#8.8x %#8.8x %#8.8x " % ( self . vmaddr , self . vmsize , self . fileoff , self . filesize )
else :
s + = " %#16.16x %#16.16x %#16.16x %#16.16x " % ( self . vmaddr , self . vmsize , self . fileoff , self . filesize )
s + = " %s %s %3u %#8.8x " % ( vm_prot_names [ self . maxprot ] , vm_prot_names [ self . initprot ] , self . nsects , self . flags )
s + = ' ' + self . segname
return s
class NList :
class Type :
class Stab ( dict_utils . Enum ) :
enum = {
' N_GSYM ' : N_GSYM ,
' N_FNAME ' : N_FNAME ,
' N_FUN ' : N_FUN ,
' N_STSYM ' : N_STSYM ,
' N_LCSYM ' : N_LCSYM ,
' N_BNSYM ' : N_BNSYM ,
' N_OPT ' : N_OPT ,
' N_RSYM ' : N_RSYM ,
' N_SLINE ' : N_SLINE ,
' N_ENSYM ' : N_ENSYM ,
' N_SSYM ' : N_SSYM ,
' N_SO ' : N_SO ,
' N_OSO ' : N_OSO ,
' N_LSYM ' : N_LSYM ,
' N_BINCL ' : N_BINCL ,
' N_SOL ' : N_SOL ,
' N_PARAMS ' : N_PARAMS ,
' N_VERSION ' : N_VERSION ,
' N_OLEVEL ' : N_OLEVEL ,
' N_PSYM ' : N_PSYM ,
' N_EINCL ' : N_EINCL ,
' N_ENTRY ' : N_ENTRY ,
' N_LBRAC ' : N_LBRAC ,
' N_EXCL ' : N_EXCL ,
' N_RBRAC ' : N_RBRAC ,
' N_BCOMM ' : N_BCOMM ,
' N_ECOMM ' : N_ECOMM ,
' N_ECOML ' : N_ECOML ,
' N_LENG ' : N_LENG
}
def __init__ ( self , magic = 0 ) :
dict_utils . Enum . __init__ ( self , magic , self . enum )
def __init__ ( self , t = 0 ) :
self . value = t
def __str__ ( self ) :
n_type = self . value
if n_type & N_STAB :
stab = Mach . NList . Type . Stab ( self . value )
return ' %s ' % stab
else :
type = self . value & N_TYPE
type_str = ' '
if type == N_UNDF :
type_str = ' N_UNDF '
elif type == N_ABS :
type_str = ' N_ABS '
elif type == N_SECT :
type_str = ' N_SECT '
elif type == N_PBUD :
type_str = ' N_PBUD '
elif type == N_INDR :
type_str = ' N_INDR '
else :
type_str = " ??? ( %#2.2x ) " % type
if n_type & N_PEXT :
type_str + = ' | PEXT '
if n_type & N_EXT :
type_str + = ' | EXT '
return type_str
def __init__ ( self ) :
self . index = 0
self . name_offset = 0
self . name = 0
self . type = Mach . NList . Type ( )
self . sect_idx = 0
self . desc = 0
self . value = 0
def unpack ( self , mach_file , data , symtab_lc ) :
self . index = len ( mach_file . symbols )
self . name_offset = data . get_uint32 ( )
self . type . value , self . sect_idx = data . get_n_uint8 ( 2 )
self . desc = data . get_uint16 ( )
if mach_file . is_64_bit ( ) :
self . value = data . get_uint64 ( )
else :
self . value = data . get_uint32 ( )
data . push_offset_and_seek ( mach_file . file_off + symtab_lc . stroff + self . name_offset )
#print "get string for symbol[%u]" % self.index
self . name = data . get_c_string ( )
data . pop_offset_and_seek ( )
def __str__ ( self ) :
name_display = ' '
if len ( self . name ) :
name_display = ' " %s " ' % self . name
return ' %#8.8x %#2.2x ( %-20s ) %#2.2x %#4.4x %16.16x %s ' % ( self . name_offset , self . type . value , self . type , self . sect_idx , self . desc , self . value , name_display )
class Interactive ( cmd . Cmd ) :
''' Interactive command interpreter to mach-o files. '''
def __init__ ( self , mach , options ) :
cmd . Cmd . __init__ ( self )
self . intro = ' Interactive mach-o command interpreter '
self . prompt = ' mach-o: %s %% ' % mach . path
self . mach = mach
self . options = options
def default ( self , line ) :
''' Catch all for unknown command, which will exit the interpreter. '''
print " uknown command: %s " % line
return True
def do_q ( self , line ) :
''' Quit command '''
return True
def do_quit ( self , line ) :
''' Quit command '''
return True
def do_header ( self , line ) :
''' Dump mach-o file headers '''
self . mach . dump_header ( True , self . options )
return False
def do_load ( self , line ) :
''' Dump all mach-o load commands '''
self . mach . dump_load_commands ( True , self . options )
return False
def do_sections ( self , line ) :
''' Dump all mach-o sections '''
self . mach . dump_sections ( True , self . options )
return False
def do_symtab ( self , line ) :
''' Dump all mach-o symbols in the symbol table '''
self . mach . dump_symtab ( True , self . options )
return False
if __name__ == ' __main__ ' :
parser = optparse . OptionParser ( description = ' A script that parses skinny and universal mach-o files. ' )
parser . add_option ( ' --arch ' , ' -a ' , type = ' string ' , metavar = ' arch ' , dest = ' archs ' , action = ' append ' , help = ' specify one or more architectures by name ' )
parser . add_option ( ' -v ' , ' --verbose ' , action = ' store_true ' , dest = ' verbose ' , help = ' display verbose debug info ' , default = False )
parser . add_option ( ' -H ' , ' --header ' , action = ' store_true ' , dest = ' dump_header ' , help = ' dump the mach-o file header ' , default = False )
parser . add_option ( ' -l ' , ' --load-commands ' , action = ' store_true ' , dest = ' dump_load_commands ' , help = ' dump the mach-o load commands ' , default = False )
parser . add_option ( ' -s ' , ' --symtab ' , action = ' store_true ' , dest = ' dump_symtab ' , help = ' dump the mach-o symbol table ' , default = False )
parser . add_option ( ' -S ' , ' --sections ' , action = ' store_true ' , dest = ' dump_sections ' , help = ' dump the mach-o sections ' , default = False )
parser . add_option ( ' --section ' , type = ' string ' , metavar = ' sectname ' , dest = ' section_names ' , action = ' append ' , help = ' Specify one or more section names to dump ' , default = [ ] )
parser . add_option ( ' -o ' , ' --out ' , type = ' string ' , dest = ' outfile ' , help = ' Used in conjunction with the --section=NAME option to save a single section \' s data to disk. ' , default = False )
parser . add_option ( ' -i ' , ' --interactive ' , action = ' store_true ' , dest = ' interactive ' , help = ' enable interactive mode ' , default = False )
parser . add_option ( ' -m ' , ' --mangled ' , action = ' store_true ' , dest = ' find_mangled ' , help = ' dump all mangled names in a mach file ' , default = False )
parser . add_option ( ' -c ' , ' --compare ' , action = ' store_true ' , dest = ' compare ' , help = ' compare two mach files ' , default = False )
parser . add_option ( ' -M ' , ' --extract-modules ' , action = ' store_true ' , dest = ' extract_modules ' , help = ' Extract modules from file ' , default = False )
parser . add_option ( ' -C ' , ' --count ' , type = ' int ' , dest = ' max_count ' , help = ' Sets the max byte count when dumping section data ' , default = - 1 )
( options , mach_files ) = parser . parse_args ( )
if options . extract_modules :
if options . section_names :
print " error: can ' t use --section option with the --extract-modules option "
exit ( 1 )
if not options . outfile :
print " error: the --output=FILE option must be specified with the --extract-modules option "
exit ( 1 )
options . section_names . append ( " __apple_ast " )
if options . compare :
if len ( mach_files ) == 2 :
mach_a = Mach ( )
mach_b = Mach ( )
mach_a . parse ( mach_files [ 0 ] )
mach_b . parse ( mach_files [ 1 ] )
mach_a . compare ( mach_b )
else :
print ' error: --compare takes two mach files as arguments '
else :
if not ( options . dump_header or options . dump_load_commands or options . dump_symtab or options . dump_sections or options . find_mangled or options . section_names ) :
options . dump_header = True
options . dump_load_commands = True
if options . verbose :
print ' options ' , options
print ' mach_files ' , mach_files
for path in mach_files :
mach = Mach ( )
mach . parse ( path )
if options . interactive :
interpreter = Mach . Interactive ( mach , options )
interpreter . cmdloop ( )
else :
mach . dump ( options )