diff --git a/pwndbg/glibc.py b/pwndbg/glibc.py index f0908a4a..f16e5446 100644 --- a/pwndbg/glibc.py +++ b/pwndbg/glibc.py @@ -3,6 +3,7 @@ Get information about the GLibc """ import functools +import os import re import gdb @@ -60,16 +61,14 @@ def _get_version(): @pwndbg.lib.memoize.reset_on_start @pwndbg.lib.memoize.reset_on_objfile def get_got_plt_address(): - libc_filename = next( - ( - objfile.filename - for objfile in gdb.objfiles() - if re.search(r"^libc(\.|-.+\.)so", objfile.filename.split("/")[-1]) - ), - None, - ) - if libc_filename: - for line in pwndbg.gdblib.info.files().splitlines(): + # Try every possible object file, to find which one has `.got.plt` section showed in `info files` + for libc_filename in ( + objfile.filename + for objfile in gdb.objfiles() + if re.search(r"^libc(\.|-.+\.)so", os.path.basename(objfile.filename)) + ): + out = pwndbg.gdblib.info.files() + for line in out.splitlines(): if libc_filename in line and ".got.plt" in line: return int(line.strip().split()[0], 16) return 0 diff --git a/pwndbg/heap/ptmalloc.py b/pwndbg/heap/ptmalloc.py index 7234230a..cfdb8eb9 100644 --- a/pwndbg/heap/ptmalloc.py +++ b/pwndbg/heap/ptmalloc.py @@ -1086,15 +1086,12 @@ class HeuristicHeap(Heap): found = True break tmp_arena = self.malloc_state(tmp_next) - try: + if ( + pwndbg.gdblib.vmmap.find(tmp_arena.get_field_address("next")) + is not None + ): tmp_next = int(tmp_arena["next"]) - except (gdb.MemoryError, gdb.error, OverflowError): - # Since we are just guessing the correct address by reading every possible address, it has high possibility to get the following errors when reading an invalid address: - # 1) If we try to read unmapped memory, we will get `gdb.MemoryError` - # 2) `tmp_arena["next"]` will try to use `gdb.Value(tmp_next+offset)` during `pwndbg.gdblib.memory.poi`, but if `tmp_next+offset` >= 2 ** 64 which is too big for GDB, it will raise `OverflowError: int too big to convert` - # 3) Since GDB's Python API is buggy sometimes, to catch some weird things we missed, we also catch the `gdb.error` here :) - # (So `gdb.error` is not necessary, this can be removed if we are sure the above first two cases can cover all possible errors) - + else: # if `&tmp_arena->next` is not valid, the linked list is broken, break this while loop and try `addr+pwndbg.gdblib.arch.ptrsize` again break if found: @@ -1514,7 +1511,7 @@ class HeuristicHeap(Heap): @property def mp(self): - mp_via_config = int(str(pwndbg.gdblib.config.mp_), 0) + mp_via_config = int(str(pwndbg.gdblib.config.mp), 0) mp_via_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( "mp_" ) or pwndbg.gdblib.symbol.address("mp_") @@ -1634,24 +1631,28 @@ class HeuristicHeap(Heap): arena = self.main_arena except SymbolUnresolvableError: arena = None + region = None + # Try to find heap region via `main_arena.top` if self._main_arena_addr and arena: region = self.get_region(arena["top"]) - else: - # If we can't find main_arena via heuristics, try to find it via vmmap - region = next(p for p in pwndbg.gdblib.vmmap.get() if "heap]" in p.objfile) - possible_sbrk_base = region.start - - sbrk_offset = self.malloc_par(0).get_field_address("sbrk_base") - # try to search sbrk_base in a part of libc page - result = pwndbg.search.search( - pwndbg.gdblib.arch.pack(possible_sbrk_base), - start=libc_page.start, - end=libc_page.end, + # If we can't use `main_arena` to find the heap region, try to find it via vmmap + region = region or next( + (p for p in pwndbg.gdblib.vmmap.get() if "[heap]" == p.objfile), None ) - try: - self._mp_addr = next(result) - sbrk_offset - except StopIteration: - pass + if region is not None: + possible_sbrk_base = region.start + + sbrk_offset = self.malloc_par(0).get_field_address("sbrk_base") + # try to search sbrk_base in a part of libc page + result = pwndbg.search.search( + pwndbg.gdblib.arch.pack(possible_sbrk_base), + start=libc_page.start, + end=libc_page.end, + ) + try: + self._mp_addr = next(result) - sbrk_offset + except StopIteration: + pass if self._mp_addr and pwndbg.gdblib.vmmap.find(self._mp_addr) is not None: self._mp = self.malloc_par(self._mp_addr) @@ -1692,11 +1693,11 @@ class HeuristicHeap(Heap): _int_malloc_instructions = pwndbg.disasm.near( _int_malloc_addr, 25, show_prev_insns=False ) - # cmp qword ptr [rip + global_max_fast_offset], 0x1f + # find first `cmp` instruction like: `cmp something, qword ptr [rip + disp]` global_max_fast_ref = next( instr for instr in _int_malloc_instructions - if instr.mnemonic == "cmp" and instr.op_str.startswith("qword ptr [rip +") + if instr.mnemonic == "cmp" and "qword ptr [rip +" in instr.op_str ) self._global_max_fast_addr = global_max_fast_ref.next + global_max_fast_ref.disp elif pwndbg.gdblib.arch.current == "i386" and self.possible_page_of_symbols: @@ -1857,32 +1858,43 @@ class HeuristicHeap(Heap): """Find the boundaries of the heap containing `addr`, default to the boundaries of the heap containing the top chunk for the thread's arena. """ - arena = self.get_arena(addr) - if arena is not None and arena.address > 0: + try: region = self.get_region(addr) if addr else self.get_region(self.get_arena()["top"]) - else: - # If we can't find an arena via heuristics, try to find it via vmmap - region = next(p for p in pwndbg.gdblib.vmmap.get() if "heap]" in p.objfile) + except Exception: + # Although `self.get_arena` should only raise `SymbolUnresolvableError`, we catch all exceptions here to avoid some bugs in main_arena's heuristics break this function :) + pass + # If we can't use arena to find the heap region, we use vmmap to find the heap region + region = next((p for p in pwndbg.gdblib.vmmap.get() if "[heap]" == p.objfile), None) + if region is not None and addr is not None: + region = None if addr not in region else region # Occasionally, the [heap] vm region and the actual start of the heap are # different, e.g. [heap] starts at 0x61f000 but mp_.sbrk_base is 0x620000. # Return an adjusted Page object if this is the case. if not self._mp_addr: - self.mp # try to fetch the mp_ structure to make sure it's initialized + try: + self.mp # try to fetch the mp_ structure to make sure it's initialized + except Exception: + # Although `self.mp` should only raise `SymbolUnresolvableError`, we catch all exceptions here to avoid some bugs in mp_'s heuristics break this function :) + pass if self._mp_addr: # sometimes we can't find mp_ via heuristics page = pwndbg.lib.memory.Page(0, 0, 0, 0) - sbrk_base = int(self.mp["sbrk_base"]) - if region == self.get_region(sbrk_base): - if sbrk_base != region.vaddr: - page.vaddr = sbrk_base - page.memsz = region.memsz - (sbrk_base - region.vaddr) - return page + # make sure mp["sbrk_base"] is valid + if self.get_region(self.mp.get_field_address("sbrk_base")) and self.get_region( + self.mp["sbrk_base"] + ): + sbrk_base = int(self.mp["sbrk_base"]) + if region == self.get_region(sbrk_base): + if sbrk_base != region.vaddr: + page.vaddr = sbrk_base + page.memsz = region.memsz - (sbrk_base - region.vaddr) + return page return region def is_initialized(self): # TODO/FIXME: If main_arena['top'] is been modified to 0, this will not work. # try to use vmmap or main_arena.top to find the heap return ( - any("heap]" in x.objfile for x in pwndbg.gdblib.vmmap.get()) + any("[heap]" == x.objfile for x in pwndbg.gdblib.vmmap.get()) or self.main_arena["top"] != 0 ) diff --git a/tests/heap/test_heap.py b/tests/heap/test_heap.py index 4c4fcb9a..89d48cb3 100644 --- a/tests/heap/test_heap.py +++ b/tests/heap/test_heap.py @@ -1,7 +1,14 @@ import gdb import pwndbg +import pwndbg.gdblib.arch +import pwndbg.gdblib.memory +import pwndbg.gdblib.symbol +import pwndbg.gdblib.typeinfo +import pwndbg.glibc +import pwndbg.heap import tests +from pwndbg.heap.ptmalloc import SymbolUnresolvableError HEAP_MALLOC_CHUNK = tests.binaries.get("heap_malloc_chunk.out") @@ -102,3 +109,257 @@ def test_malloc_chunk_command_heuristic(start_binary): for name in chunk_types: assert results[name] == expected[name] + + +class mock_for_heuristic: + def __init__(self, mock_symbols=[], mock_all=False, mess_up_memory=False): + self.mock_symbols = ( + mock_symbols # every symbol's address in the list will be mocked to `None` + ) + self.mock_all = mock_all # all symbols will be mocked to `None` + # Save `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses` before mocking + self.saved_address_func = pwndbg.gdblib.symbol.address + self.saved_static_linkage_symbol_address_func = ( + pwndbg.gdblib.symbol.static_linkage_symbol_address + ) + # We mess up the memory in the page of the symbols, to make sure that the heuristic will not succeed by parsing the memory + self.mess_up_memory = mess_up_memory + if mess_up_memory: + # Save all the memory before we mess it up + self.page = pwndbg.heap.current.possible_page_of_symbols + self.saved_memory = pwndbg.gdblib.memory.read(self.page.vaddr, self.page.memsz) + + def __enter__(self): + def mock(original): + def _mock(symbol, *args, **kwargs): + if self.mock_all: + return None + for s in self.mock_symbols: + if s == symbol: + return None + return original(symbol, *args, **kwargs) + + return _mock + + # Mock `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses` + pwndbg.gdblib.symbol.address = mock(pwndbg.gdblib.symbol.address) + pwndbg.gdblib.symbol.static_linkage_symbol_address = mock( + pwndbg.gdblib.symbol.static_linkage_symbol_address + ) + if self.mess_up_memory: + # Fill the page with `0xff` + pwndbg.gdblib.memory.write(self.page.vaddr, b"\xff" * self.page.memsz) + + def __exit__(self, exc_type, exc_value, traceback): + # Restore `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses` + pwndbg.gdblib.symbol.address = self.saved_address_func + pwndbg.gdblib.symbol.static_linkage_symbol_address = ( + self.saved_static_linkage_symbol_address_func + ) + if self.mess_up_memory: + # Restore the memory + pwndbg.gdblib.memory.write(self.page.vaddr, self.saved_memory) + + +def test_main_arena_heuristic(start_binary): + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + # Use the debug symbol to get the address of `main_arena` + main_arena_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( + "main_arena" + ) or pwndbg.gdblib.symbol.address("main_arena") + + # Level 1: We check we can get the address of `main_arena` from debug symbols and the struct of `main_arena` is correct + assert pwndbg.heap.current.main_arena is not None + # Check the address of `main_arena` is correct + assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol + # Check the struct size is correct + assert ( + pwndbg.heap.current.main_arena.type.sizeof + == pwndbg.gdblib.typeinfo.lookup_types("struct malloc_state").sizeof + ) + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2.1: We check we can get the address of `main_arena` by parsing the assembly code of `malloc_trim` + with mock_for_heuristic(["main_arena"], mess_up_memory=True): + assert pwndbg.heap.current.main_arena is not None + # Check the address of `main_arena` is correct + assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2.2: No `__malloc_hook` this time, because it's possible to find `main_arena` by some magic about it + with mock_for_heuristic(["main_arena", "__malloc_hook"], mess_up_memory=True): + assert pwndbg.heap.current.main_arena is not None + # Check the address of `main_arena` is correct + assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 3: We check we can get the address of `main_arena` by parsing the memory + with mock_for_heuristic(mock_all=True): + # Check the address of `main_arena` is correct + assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol + + +def test_mp_heuristic(start_binary): + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + # Use the debug symbol to get the address of `mp_` + mp_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( + "mp_" + ) or pwndbg.gdblib.symbol.address("mp_") + + # Level 1: We check we can get the address of `mp_` from debug symbols and the struct of `mp_` is correct + assert pwndbg.heap.current.mp is not None + # Check the address of `main_arena` is correct + assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol + # Check the struct size is correct + # FIXME: We still have bug for GLIBC >= 2.35 in this heuristic because the size of `malloc_par` is changed + # So this test will fail for the tests on ubuntu 22.04 + # TODO: Fix the bug and enable this test + if pwndbg.glibc.get_version() < (2, 35): + assert ( + pwndbg.heap.current.mp.type.sizeof + == pwndbg.gdblib.typeinfo.lookup_types("struct malloc_par").sizeof + ) + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2: We check we can get the address of `mp_` by parsing the assembly code of `__libc_free` + with mock_for_heuristic(["mp_"], mess_up_memory=True): + assert pwndbg.heap.current.mp is not None + # Check the address of `mp_` is correct + assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 3: We check we can get the address of `mp_` by parsing the memory + # FIXME: We still have bug for GLIBC >= 2.35 in this heuristic because the size of `malloc_par` is changed + # So this test will fail for the tests on ubuntu 22.04 + # TODO: Fix the bug and enable this test + if pwndbg.glibc.get_version() < (2, 35): + with mock_for_heuristic(mock_all=True): + # Check the address of `mp_` is correct + assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol + + +def test_global_max_fast_heuristic(start_binary): + # TODO: Support other architectures or different libc versions + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + # Use the debug symbol to find the address of `global_max_fast` + global_max_fast_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( + "global_max_fast" + ) or pwndbg.gdblib.symbol.address("global_max_fast") + assert global_max_fast_addr_via_debug_symbol is not None + + # Level 1: We check we can get the address of `global_max_fast` from debug symbols and the value of `global_max_fast` is correct + assert pwndbg.heap.current.global_max_fast is not None + # Check the address of `global_max_fast` is correct + assert pwndbg.heap.current._global_max_fast_addr == global_max_fast_addr_via_debug_symbol + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2: We check we can get the address of `global_max_fast` by parsing the assembly code of `__libc_free` + # Mock the address of `global_max_fast` to None + with mock_for_heuristic(["global_max_fast"]): + # Use heuristic to find `global_max_fast` + assert pwndbg.heap.current.global_max_fast is not None + # Check the address of `global_max_fast` is correct + assert pwndbg.heap.current._global_max_fast_addr == global_max_fast_addr_via_debug_symbol + + +def test_thread_cache_heuristic(start_binary): + # TODO: Support other architectures or different libc versions + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + # Use the debug symbol to find the address of `thread_cache` + tcache_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( + "tcache" + ) or pwndbg.gdblib.symbol.address("tcache") + thread_cache_addr_via_debug_symbol = pwndbg.gdblib.memory.u(tcache_addr_via_debug_symbol) + + # Level 1: We check we can get the address of `thread_cache` from debug symbols and the struct of `thread_cache` is correct + assert pwndbg.heap.current.thread_cache is not None + # Check the address of `thread_cache` is correct + assert pwndbg.heap.current.thread_cache.address == thread_cache_addr_via_debug_symbol + # Check the struct size is correct + assert ( + pwndbg.heap.current.thread_cache.type.sizeof + == pwndbg.gdblib.typeinfo.lookup_types("struct tcache_perthread_struct").sizeof + ) + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2: We check we can get the address of `thread_cache` by parsing the assembly code of `__libc_malloc` + # TODO: Find a good way to check we scuessfully get the address of `thread_cache` by parsing the assembly code instead of using the first chunk of `thread_cache` + # Note: This only useful when we can NOT find the heap boundaries and the the arena is been shared, it should not be a big problem in most of the cases + + # Level 3: We check we can get the address of `thread_cache` by using the first chunk + # Note: This will NOT work when can NOT find the heap boundaries or the the arena is been shared + with mock_for_heuristic(["tcache", "__libc_malloc"]): + # Check the address of `thread_cache` is correct + assert pwndbg.heap.current.thread_cache.address == thread_cache_addr_via_debug_symbol + + +def test_thread_arena_heuristic(start_binary): + # TODO: Support other architectures or different libc versions + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + # Use the debug symbol to find the value of `thread_arena` + thread_arena_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address( + "thread_arena" + ) or pwndbg.gdblib.symbol.address("thread_arena") + assert thread_arena_via_debug_symbol is not None + thread_arena_via_debug_symbol = pwndbg.gdblib.memory.u(thread_arena_via_debug_symbol) + assert thread_arena_via_debug_symbol > 0 + + # Level 1: We check we can get the address of `thread_arena` from debug symbols and the value of `thread_arena` is correct + assert pwndbg.heap.current.thread_arena is not None + # Check the address of `thread_arena` is correct + assert pwndbg.heap.current.thread_arena == thread_arena_via_debug_symbol + pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg + + # Level 2: We check we can get the address of `thread_arena` by parsing the assembly code of `__libc_calloc` + # Mock the address of `thread_arena` to None + with mock_for_heuristic(["thread_arena"]): + assert pwndbg.gdblib.symbol.address("thread_arena") is None + # Check the value of `thread_arena` is correct + assert pwndbg.heap.current.thread_arena == thread_arena_via_debug_symbol + + +def test_heuristic_fail_gracefully(start_binary): + # TODO: Support other architectures or different libc versions + start_binary(HEAP_MALLOC_CHUNK) + gdb.execute("set resolve-heap-via-heuristic on") + gdb.execute("break break_here") + gdb.execute("continue") + + def _test_heuristic_fail_gracefully(name): + try: + getattr(pwndbg.heap.current, name) + raise AssertionError( + "The heuristic for pwndbg.heap.current.%s should fail with SymbolUnresolvableError" + % name + ) + except SymbolUnresolvableError: + # That's the only exception we expect + pass + + # Mock all address and mess up the memory + with mock_for_heuristic(mock_all=True, mess_up_memory=True): + _test_heuristic_fail_gracefully("main_arena") + _test_heuristic_fail_gracefully("mp") + _test_heuristic_fail_gracefully("global_max_fast") + _test_heuristic_fail_gracefully("thread_cache") + _test_heuristic_fail_gracefully("thread_arena")