2012-04-25 09:25:04 +08:00
//===-- head_find.c ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file compiles into a dylib and can be used on darwin to find data that
// is contained in active malloc blocks. To use this make the project, then
// load the shared library in a debug session while you are stopped:
//
// (lldb) process load /path/to/libheap.dylib
//
// Now you can use the "find_pointer_in_heap" and "find_cstring_in_heap"
// functions in the expression parser.
//
// This will grep everything in all active allocation blocks and print and
// malloc blocks that contain the pointer 0x112233000000:
//
// (lldb) expression find_pointer_in_heap (0x112233000000)
//
// This will grep everything in all active allocation blocks and print and
// malloc blocks that contain the C string "hello" (as a substring, no
// NULL termination included):
//
// (lldb) expression find_cstring_in_heap ("hello")
//
// The results will be printed to the STDOUT of the inferior program. The
// return value of the "find_pointer_in_heap" function is the number of
// pointer references that were found. A quick example shows
//
// (lldb) expr find_pointer_in_heap(0x0000000104000410)
// (uint32_t) $5 = 0x00000002
// 0x104000740: 0x0000000104000410 found in malloc block 0x104000730 + 16 (malloc_size = 48)
// 0x100820060: 0x0000000104000410 found in malloc block 0x100820000 + 96 (malloc_size = 4096)
//
// From the above output we see that 0x104000410 was found in the malloc block
// at 0x104000730 and 0x100820000. If we want to see what these blocks are, we
// can display the memory for this block using the "address" ("A" for short)
// format. The address format shows pointers, and if those pointers point to
// objects that have symbols or know data contents, it will display information
// about the pointers:
//
// (lldb) memory read --format address --count 1 0x104000730
// 0x104000730: 0x0000000100002460 (void *)0x0000000100002488: MyString
//
// We can see that the first block is a "MyString" object that contains our
// pointer value at offset 16.
//
// Looking at the next pointers, are a bit more tricky:
// (lldb) memory read -fA 0x100820000 -c1
// 0x100820000: 0x4f545541a1a1a1a1
// (lldb) memory read 0x100820000
// 0x100820000: a1 a1 a1 a1 41 55 54 4f 52 45 4c 45 41 53 45 21 ....AUTORELEASE!
// 0x100820010: 78 00 82 00 01 00 00 00 60 f9 e8 75 ff 7f 00 00 x.......`..u....
//
// This is an objective C auto release pool object that contains our pointer.
// C++ classes will show up if they are virtual as something like:
// (lldb) memory read --format address --count 1 0x104008000
// 0x104008000: 0x109008000 vtable for lldb_private::Process
//
// This is a clue that the 0x104008000 is a "lldb_private::Process *".
//===----------------------------------------------------------------------===//
# include <assert.h>
# include <ctype.h>
# include <mach/mach.h>
# include <malloc/malloc.h>
# include <stdio.h>
# include <stdlib.h>
# include <vector>
2012-06-28 03:57:13 +08:00
//----------------------------------------------------------------------
// Redefine private types from "/usr/local/include/stack_logging.h"
//----------------------------------------------------------------------
typedef struct {
uint32_t type_flags ;
uint64_t stack_identifier ;
uint64_t argument ;
mach_vm_address_t address ;
} mach_stack_logging_record_t ;
//----------------------------------------------------------------------
// Redefine private defines from "/usr/local/include/stack_logging.h"
//----------------------------------------------------------------------
# define stack_logging_type_free 0
# define stack_logging_type_generic 1
# define stack_logging_type_alloc 2
# define stack_logging_type_dealloc 4
//----------------------------------------------------------------------
// Redefine private function prototypes from
// "/usr/local/include/stack_logging.h"
//----------------------------------------------------------------------
extern " C " kern_return_t
__mach_stack_logging_set_file_path (
task_t task ,
char * file_path
) ;
extern " C " kern_return_t
__mach_stack_logging_get_frames (
task_t task ,
mach_vm_address_t address ,
mach_vm_address_t * stack_frames_buffer ,
uint32_t max_stack_frames ,
uint32_t * count
) ;
extern " C " kern_return_t
__mach_stack_logging_enumerate_records (
task_t task ,
mach_vm_address_t address ,
void enumerator ( mach_stack_logging_record_t , void * ) ,
void * context
) ;
extern " C " kern_return_t
__mach_stack_logging_frames_for_uniqued_stack (
task_t task ,
uint64_t stack_identifier ,
mach_vm_address_t * stack_frames_buffer ,
uint32_t max_stack_frames ,
uint32_t * count
) ;
//----------------------------------------------------------------------
// Redefine private gloval variables prototypes from
// "/usr/local/include/stack_logging.h"
//----------------------------------------------------------------------
extern " C " int stack_logging_enable_logging ;
extern " C " int stack_logging_dontcompact ;
//----------------------------------------------------------------------
// Local defines
//----------------------------------------------------------------------
2012-04-25 09:25:04 +08:00
# define MAX_FRAMES 1024
2012-06-28 03:57:13 +08:00
//----------------------------------------------------------------------
// Local Typedefs and Types
//----------------------------------------------------------------------
2012-04-25 09:25:04 +08:00
typedef void range_callback_t ( task_t task , void * baton , unsigned type , uint64_t ptr_addr , uint64_t ptr_size ) ;
typedef void zone_callback_t ( void * info , const malloc_zone_t * zone ) ;
struct range_callback_info_t
{
zone_callback_t * zone_callback ;
range_callback_t * range_callback ;
void * baton ;
} ;
enum data_type_t
{
eDataTypeAddress ,
eDataTypeContainsData
} ;
struct aligned_data_t
{
const uint8_t * buffer ;
uint32_t size ;
uint32_t align ;
} ;
struct range_contains_data_callback_info_t
{
data_type_t type ;
const void * lookup_addr ;
union
{
uintptr_t addr ;
aligned_data_t data ;
} ;
uint32_t match_count ;
bool done ;
} ;
struct malloc_match
{
void * addr ;
intptr_t size ;
intptr_t offset ;
} ;
2012-05-11 07:17:28 +08:00
struct malloc_stack_entry
{
2012-05-11 10:42:36 +08:00
const void * address ;
2012-05-11 07:17:28 +08:00
uint64_t argument ;
uint32_t type_flags ;
2012-05-11 07:37:52 +08:00
std : : vector < uintptr_t > frames ;
2012-05-11 07:17:28 +08:00
} ;
2012-06-28 03:57:13 +08:00
//----------------------------------------------------------------------
// Local global variables
//----------------------------------------------------------------------
2012-04-25 09:25:04 +08:00
std : : vector < malloc_match > g_matches ;
const void * g_lookup_addr = 0 ;
2012-05-11 07:17:28 +08:00
std : : vector < malloc_stack_entry > g_malloc_stack_history ;
2012-04-25 09:25:04 +08:00
mach_vm_address_t g_stack_frames [ MAX_FRAMES ] ;
2012-05-11 10:42:36 +08:00
char g_error_string [ PATH_MAX ] ;
2012-04-25 09:25:04 +08:00
//----------------------------------------------------------------------
// task_peek
//
// Reads memory from this tasks address space. This callback is needed
// by the code that iterates through all of the malloc blocks to read
// the memory in this process.
//----------------------------------------------------------------------
static kern_return_t
task_peek ( task_t task , vm_address_t remote_address , vm_size_t size , void * * local_memory )
{
* local_memory = ( void * ) remote_address ;
return KERN_SUCCESS ;
}
static const void
foreach_zone_in_this_process ( range_callback_info_t * info )
{
if ( info = = NULL | | info - > zone_callback = = NULL )
return ;
vm_address_t * zones = NULL ;
unsigned int num_zones = 0 ;
kern_return_t err = malloc_get_all_zones ( 0 , task_peek , & zones , & num_zones ) ;
if ( KERN_SUCCESS = = err )
{
for ( unsigned int i = 0 ; i < num_zones ; + + i )
{
info - > zone_callback ( info , ( const malloc_zone_t * ) zones [ i ] ) ;
}
}
}
//----------------------------------------------------------------------
// dump_malloc_block_callback
//
// A simple callback that will dump each malloc block and all available
// info from the enumeration callback perpective.
//----------------------------------------------------------------------
static void
dump_malloc_block_callback ( task_t task , void * baton , unsigned type , uint64_t ptr_addr , uint64_t ptr_size )
{
printf ( " task = 0x%4.4x: baton = %p, type = %u, ptr_addr = 0x%llx + 0x%llu \n " , task , baton , type , ptr_addr , ptr_size ) ;
}
static void
ranges_callback ( task_t task , void * baton , unsigned type , vm_range_t * ptrs , unsigned count )
{
range_callback_info_t * info = ( range_callback_info_t * ) baton ;
while ( count - - ) {
info - > range_callback ( task , info - > baton , type , ptrs - > address , ptrs - > size ) ;
ptrs + + ;
}
}
static void
enumerate_range_in_zone ( void * baton , const malloc_zone_t * zone )
{
range_callback_info_t * info = ( range_callback_info_t * ) baton ;
if ( zone & & zone - > introspect )
zone - > introspect - > enumerator ( mach_task_self ( ) ,
info ,
MALLOC_PTR_IN_USE_RANGE_TYPE ,
( vm_address_t ) zone ,
task_peek ,
ranges_callback ) ;
}
static void
range_info_callback ( task_t task , void * baton , unsigned type , uint64_t ptr_addr , uint64_t ptr_size )
{
const uint64_t end_addr = ptr_addr + ptr_size ;
range_contains_data_callback_info_t * info = ( range_contains_data_callback_info_t * ) baton ;
switch ( info - > type )
{
case eDataTypeAddress :
if ( ptr_addr < = info - > addr & & info - > addr < end_addr )
{
+ + info - > match_count ;
malloc_match match = { ( void * ) ptr_addr , ptr_size , info - > addr - ptr_addr } ;
g_matches . push_back ( match ) ;
}
break ;
case eDataTypeContainsData :
{
const uint32_t size = info - > data . size ;
if ( size < ptr_size ) // Make sure this block can contain this data
{
uint8_t * ptr_data = NULL ;
if ( task_peek ( task , ptr_addr , ptr_size , ( void * * ) & ptr_data ) = = KERN_SUCCESS )
{
const void * buffer = info - > data . buffer ;
assert ( ptr_data ) ;
const uint32_t align = info - > data . align ;
for ( uint64_t addr = ptr_addr ;
addr < end_addr & & ( ( end_addr - addr ) > = size ) ;
addr + = align , ptr_data + = align )
{
if ( memcmp ( buffer , ptr_data , size ) = = 0 )
{
+ + info - > match_count ;
malloc_match match = { ( void * ) ptr_addr , ptr_size , addr - ptr_addr } ;
g_matches . push_back ( match ) ;
}
}
}
else
{
printf ( " 0x%llx: error: couldn't read %llu bytes \n " , ptr_addr , ptr_size ) ;
}
}
}
break ;
}
}
2012-05-11 07:17:28 +08:00
static void
get_stack_for_address_enumerator ( mach_stack_logging_record_t stack_record , void * task_ptr )
{
uint32_t num_frames = 0 ;
kern_return_t err = __mach_stack_logging_frames_for_uniqued_stack ( * ( task_t * ) task_ptr ,
stack_record . stack_identifier ,
g_stack_frames ,
MAX_FRAMES ,
& num_frames ) ;
g_malloc_stack_history . resize ( g_malloc_stack_history . size ( ) + 1 ) ;
2012-05-11 07:20:50 +08:00
g_malloc_stack_history . back ( ) . address = ( void * ) stack_record . address ;
2012-05-11 07:17:28 +08:00
g_malloc_stack_history . back ( ) . type_flags = stack_record . type_flags ;
g_malloc_stack_history . back ( ) . argument = stack_record . argument ;
if ( num_frames > 0 )
g_malloc_stack_history . back ( ) . frames . assign ( g_stack_frames , g_stack_frames + num_frames ) ;
g_malloc_stack_history . back ( ) . frames . push_back ( 0 ) ; // Terminate the frames with zero
}
malloc_stack_entry *
2012-05-11 10:42:36 +08:00
get_stack_history_for_address ( const void * addr , int history )
2012-05-11 07:17:28 +08:00
{
std : : vector < malloc_stack_entry > empty ;
g_malloc_stack_history . swap ( empty ) ;
2012-05-11 10:42:36 +08:00
if ( ! stack_logging_enable_logging | | ( history & & ! stack_logging_dontcompact ) )
{
if ( history )
strncpy ( g_error_string , " error: stack history logging is not enabled, set MallocStackLoggingNoCompact=1 in the environment when launching to enable stack history logging. " , sizeof ( g_error_string ) ) ;
else
strncpy ( g_error_string , " error: stack logging is not enabled, set MallocStackLogging=1 in the environment when launching to enable stack logging. " , sizeof ( g_error_string ) ) ;
return NULL ;
}
kern_return_t err ;
2012-05-11 07:17:28 +08:00
task_t task = mach_task_self ( ) ;
2012-05-11 10:42:36 +08:00
if ( history )
{
err = __mach_stack_logging_enumerate_records ( task ,
( mach_vm_address_t ) addr ,
get_stack_for_address_enumerator ,
& task ) ;
}
else
{
uint32_t num_frames = 0 ;
err = __mach_stack_logging_get_frames ( task , ( mach_vm_address_t ) addr , g_stack_frames , MAX_FRAMES , & num_frames ) ;
if ( err = = 0 & & num_frames > 0 )
{
g_malloc_stack_history . resize ( 1 ) ;
g_malloc_stack_history . back ( ) . address = addr ;
g_malloc_stack_history . back ( ) . type_flags = stack_logging_type_alloc ;
g_malloc_stack_history . back ( ) . argument = 0 ;
if ( num_frames > 0 )
g_malloc_stack_history . back ( ) . frames . assign ( g_stack_frames , g_stack_frames + num_frames ) ;
g_malloc_stack_history . back ( ) . frames . push_back ( 0 ) ; // Terminate the frames with zero
}
}
2012-05-11 07:17:28 +08:00
// Append an empty entry
if ( g_malloc_stack_history . empty ( ) )
return NULL ;
g_malloc_stack_history . resize ( g_malloc_stack_history . size ( ) + 1 ) ;
2012-05-11 07:20:50 +08:00
g_malloc_stack_history . back ( ) . address = 0 ;
2012-05-11 07:17:28 +08:00
g_malloc_stack_history . back ( ) . type_flags = 0 ;
g_malloc_stack_history . back ( ) . argument = 0 ;
return g_malloc_stack_history . data ( ) ;
}
2012-04-25 09:25:04 +08:00
//----------------------------------------------------------------------
// find_pointer_in_heap
//
// Finds a pointer value inside one or more currently valid malloc
// blocks.
//----------------------------------------------------------------------
malloc_match *
find_pointer_in_heap ( const void * addr )
{
g_matches . clear ( ) ;
// Setup "info" to look for a malloc block that contains data
// that is the a pointer
range_contains_data_callback_info_t data_info ;
data_info . type = eDataTypeContainsData ; // Check each block for data
g_lookup_addr = addr ;
data_info . data . buffer = ( uint8_t * ) & addr ; // What data? The pointer value passed in
data_info . data . size = sizeof ( addr ) ; // How many bytes? The byte size of a pointer
data_info . data . align = sizeof ( addr ) ; // Align to a pointer byte size
data_info . match_count = 0 ; // Initialize the match count to zero
data_info . done = false ; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone , range_info_callback , & data_info } ;
foreach_zone_in_this_process ( & info ) ;
if ( g_matches . empty ( ) )
return NULL ;
malloc_match match = { NULL , 0 , 0 } ;
g_matches . push_back ( match ) ;
return g_matches . data ( ) ;
}
//----------------------------------------------------------------------
// find_cstring_in_heap
//
// Finds a C string inside one or more currently valid malloc blocks.
//----------------------------------------------------------------------
malloc_match *
find_cstring_in_heap ( const char * s )
{
g_matches . clear ( ) ;
if ( s = = NULL | | s [ 0 ] = = ' \0 ' )
{
printf ( " error: invalid argument (empty cstring) \n " ) ;
return NULL ;
}
// Setup "info" to look for a malloc block that contains data
// that is the C string passed in aligned on a 1 byte boundary
range_contains_data_callback_info_t data_info ;
data_info . type = eDataTypeContainsData ; // Check each block for data
g_lookup_addr = s ; // If an expression was used, then fill in the resolved address we are looking up
data_info . data . buffer = ( uint8_t * ) s ; // What data? The C string passed in
data_info . data . size = strlen ( s ) ; // How many bytes? The length of the C string
data_info . data . align = 1 ; // Data doesn't need to be aligned, so set the alignment to 1
data_info . match_count = 0 ; // Initialize the match count to zero
data_info . done = false ; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone , range_info_callback , & data_info } ;
foreach_zone_in_this_process ( & info ) ;
if ( g_matches . empty ( ) )
return NULL ;
malloc_match match = { NULL , 0 , 0 } ;
g_matches . push_back ( match ) ;
return g_matches . data ( ) ;
}
//----------------------------------------------------------------------
// find_block_for_address
//
// Find the malloc block that whose address range contains "addr".
//----------------------------------------------------------------------
malloc_match *
find_block_for_address ( const void * addr )
{
g_matches . clear ( ) ;
// Setup "info" to look for a malloc block that contains data
// that is the C string passed in aligned on a 1 byte boundary
range_contains_data_callback_info_t data_info ;
g_lookup_addr = addr ; // If an expression was used, then fill in the resolved address we are looking up
data_info . type = eDataTypeAddress ; // Check each block to see if the block contains the address passed in
data_info . addr = ( uintptr_t ) addr ; // What data? The C string passed in
data_info . match_count = 0 ; // Initialize the match count to zero
data_info . done = false ; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone , range_info_callback , & data_info } ;
foreach_zone_in_this_process ( & info ) ;
if ( g_matches . empty ( ) )
return NULL ;
malloc_match match = { NULL , 0 , 0 } ;
g_matches . push_back ( match ) ;
return g_matches . data ( ) ;
}