Unit tests and some bugs fix for the heap heuristics (#1283)

This commit is contained in:
Alan Li 2022-10-14 12:07:07 +08:00 committed by GitHub
parent e456b317d7
commit b4cdcdfcfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 321 additions and 49 deletions

View File

@ -3,6 +3,7 @@ Get information about the GLibc
"""
import functools
import os
import re
import gdb
@ -60,16 +61,14 @@ def _get_version():
@pwndbg.lib.memoize.reset_on_start
@pwndbg.lib.memoize.reset_on_objfile
def get_got_plt_address():
libc_filename = next(
(
objfile.filename
for objfile in gdb.objfiles()
if re.search(r"^libc(\.|-.+\.)so", objfile.filename.split("/")[-1])
),
None,
)
if libc_filename:
for line in pwndbg.gdblib.info.files().splitlines():
# Try every possible object file, to find which one has `.got.plt` section showed in `info files`
for libc_filename in (
objfile.filename
for objfile in gdb.objfiles()
if re.search(r"^libc(\.|-.+\.)so", os.path.basename(objfile.filename))
):
out = pwndbg.gdblib.info.files()
for line in out.splitlines():
if libc_filename in line and ".got.plt" in line:
return int(line.strip().split()[0], 16)
return 0

View File

@ -1086,15 +1086,12 @@ class HeuristicHeap(Heap):
found = True
break
tmp_arena = self.malloc_state(tmp_next)
try:
if (
pwndbg.gdblib.vmmap.find(tmp_arena.get_field_address("next"))
is not None
):
tmp_next = int(tmp_arena["next"])
except (gdb.MemoryError, gdb.error, OverflowError):
# Since we are just guessing the correct address by reading every possible address, it has high possibility to get the following errors when reading an invalid address:
# 1) If we try to read unmapped memory, we will get `gdb.MemoryError`
# 2) `tmp_arena["next"]` will try to use `gdb.Value(tmp_next+offset)` during `pwndbg.gdblib.memory.poi`, but if `tmp_next+offset` >= 2 ** 64 which is too big for GDB, it will raise `OverflowError: int too big to convert`
# 3) Since GDB's Python API is buggy sometimes, to catch some weird things we missed, we also catch the `gdb.error` here :)
# (So `gdb.error` is not necessary, this can be removed if we are sure the above first two cases can cover all possible errors)
else:
# if `&tmp_arena->next` is not valid, the linked list is broken, break this while loop and try `addr+pwndbg.gdblib.arch.ptrsize` again
break
if found:
@ -1514,7 +1511,7 @@ class HeuristicHeap(Heap):
@property
def mp(self):
mp_via_config = int(str(pwndbg.gdblib.config.mp_), 0)
mp_via_config = int(str(pwndbg.gdblib.config.mp), 0)
mp_via_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"mp_"
) or pwndbg.gdblib.symbol.address("mp_")
@ -1634,24 +1631,28 @@ class HeuristicHeap(Heap):
arena = self.main_arena
except SymbolUnresolvableError:
arena = None
region = None
# Try to find heap region via `main_arena.top`
if self._main_arena_addr and arena:
region = self.get_region(arena["top"])
else:
# If we can't find main_arena via heuristics, try to find it via vmmap
region = next(p for p in pwndbg.gdblib.vmmap.get() if "heap]" in p.objfile)
possible_sbrk_base = region.start
sbrk_offset = self.malloc_par(0).get_field_address("sbrk_base")
# try to search sbrk_base in a part of libc page
result = pwndbg.search.search(
pwndbg.gdblib.arch.pack(possible_sbrk_base),
start=libc_page.start,
end=libc_page.end,
# If we can't use `main_arena` to find the heap region, try to find it via vmmap
region = region or next(
(p for p in pwndbg.gdblib.vmmap.get() if "[heap]" == p.objfile), None
)
try:
self._mp_addr = next(result) - sbrk_offset
except StopIteration:
pass
if region is not None:
possible_sbrk_base = region.start
sbrk_offset = self.malloc_par(0).get_field_address("sbrk_base")
# try to search sbrk_base in a part of libc page
result = pwndbg.search.search(
pwndbg.gdblib.arch.pack(possible_sbrk_base),
start=libc_page.start,
end=libc_page.end,
)
try:
self._mp_addr = next(result) - sbrk_offset
except StopIteration:
pass
if self._mp_addr and pwndbg.gdblib.vmmap.find(self._mp_addr) is not None:
self._mp = self.malloc_par(self._mp_addr)
@ -1692,11 +1693,11 @@ class HeuristicHeap(Heap):
_int_malloc_instructions = pwndbg.disasm.near(
_int_malloc_addr, 25, show_prev_insns=False
)
# cmp qword ptr [rip + global_max_fast_offset], 0x1f
# find first `cmp` instruction like: `cmp something, qword ptr [rip + disp]`
global_max_fast_ref = next(
instr
for instr in _int_malloc_instructions
if instr.mnemonic == "cmp" and instr.op_str.startswith("qword ptr [rip +")
if instr.mnemonic == "cmp" and "qword ptr [rip +" in instr.op_str
)
self._global_max_fast_addr = global_max_fast_ref.next + global_max_fast_ref.disp
elif pwndbg.gdblib.arch.current == "i386" and self.possible_page_of_symbols:
@ -1857,32 +1858,43 @@ class HeuristicHeap(Heap):
"""Find the boundaries of the heap containing `addr`, default to the
boundaries of the heap containing the top chunk for the thread's arena.
"""
arena = self.get_arena(addr)
if arena is not None and arena.address > 0:
try:
region = self.get_region(addr) if addr else self.get_region(self.get_arena()["top"])
else:
# If we can't find an arena via heuristics, try to find it via vmmap
region = next(p for p in pwndbg.gdblib.vmmap.get() if "heap]" in p.objfile)
except Exception:
# Although `self.get_arena` should only raise `SymbolUnresolvableError`, we catch all exceptions here to avoid some bugs in main_arena's heuristics break this function :)
pass
# If we can't use arena to find the heap region, we use vmmap to find the heap region
region = next((p for p in pwndbg.gdblib.vmmap.get() if "[heap]" == p.objfile), None)
if region is not None and addr is not None:
region = None if addr not in region else region
# Occasionally, the [heap] vm region and the actual start of the heap are
# different, e.g. [heap] starts at 0x61f000 but mp_.sbrk_base is 0x620000.
# Return an adjusted Page object if this is the case.
if not self._mp_addr:
self.mp # try to fetch the mp_ structure to make sure it's initialized
try:
self.mp # try to fetch the mp_ structure to make sure it's initialized
except Exception:
# Although `self.mp` should only raise `SymbolUnresolvableError`, we catch all exceptions here to avoid some bugs in mp_'s heuristics break this function :)
pass
if self._mp_addr: # sometimes we can't find mp_ via heuristics
page = pwndbg.lib.memory.Page(0, 0, 0, 0)
sbrk_base = int(self.mp["sbrk_base"])
if region == self.get_region(sbrk_base):
if sbrk_base != region.vaddr:
page.vaddr = sbrk_base
page.memsz = region.memsz - (sbrk_base - region.vaddr)
return page
# make sure mp["sbrk_base"] is valid
if self.get_region(self.mp.get_field_address("sbrk_base")) and self.get_region(
self.mp["sbrk_base"]
):
sbrk_base = int(self.mp["sbrk_base"])
if region == self.get_region(sbrk_base):
if sbrk_base != region.vaddr:
page.vaddr = sbrk_base
page.memsz = region.memsz - (sbrk_base - region.vaddr)
return page
return region
def is_initialized(self):
# TODO/FIXME: If main_arena['top'] is been modified to 0, this will not work.
# try to use vmmap or main_arena.top to find the heap
return (
any("heap]" in x.objfile for x in pwndbg.gdblib.vmmap.get())
any("[heap]" == x.objfile for x in pwndbg.gdblib.vmmap.get())
or self.main_arena["top"] != 0
)

View File

@ -1,7 +1,14 @@
import gdb
import pwndbg
import pwndbg.gdblib.arch
import pwndbg.gdblib.memory
import pwndbg.gdblib.symbol
import pwndbg.gdblib.typeinfo
import pwndbg.glibc
import pwndbg.heap
import tests
from pwndbg.heap.ptmalloc import SymbolUnresolvableError
HEAP_MALLOC_CHUNK = tests.binaries.get("heap_malloc_chunk.out")
@ -102,3 +109,257 @@ def test_malloc_chunk_command_heuristic(start_binary):
for name in chunk_types:
assert results[name] == expected[name]
class mock_for_heuristic:
def __init__(self, mock_symbols=[], mock_all=False, mess_up_memory=False):
self.mock_symbols = (
mock_symbols # every symbol's address in the list will be mocked to `None`
)
self.mock_all = mock_all # all symbols will be mocked to `None`
# Save `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses` before mocking
self.saved_address_func = pwndbg.gdblib.symbol.address
self.saved_static_linkage_symbol_address_func = (
pwndbg.gdblib.symbol.static_linkage_symbol_address
)
# We mess up the memory in the page of the symbols, to make sure that the heuristic will not succeed by parsing the memory
self.mess_up_memory = mess_up_memory
if mess_up_memory:
# Save all the memory before we mess it up
self.page = pwndbg.heap.current.possible_page_of_symbols
self.saved_memory = pwndbg.gdblib.memory.read(self.page.vaddr, self.page.memsz)
def __enter__(self):
def mock(original):
def _mock(symbol, *args, **kwargs):
if self.mock_all:
return None
for s in self.mock_symbols:
if s == symbol:
return None
return original(symbol, *args, **kwargs)
return _mock
# Mock `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses`
pwndbg.gdblib.symbol.address = mock(pwndbg.gdblib.symbol.address)
pwndbg.gdblib.symbol.static_linkage_symbol_address = mock(
pwndbg.gdblib.symbol.static_linkage_symbol_address
)
if self.mess_up_memory:
# Fill the page with `0xff`
pwndbg.gdblib.memory.write(self.page.vaddr, b"\xff" * self.page.memsz)
def __exit__(self, exc_type, exc_value, traceback):
# Restore `pwndbg.gdblib.symbol.address` and `pwndbg.gdblib.symbol.addresses`
pwndbg.gdblib.symbol.address = self.saved_address_func
pwndbg.gdblib.symbol.static_linkage_symbol_address = (
self.saved_static_linkage_symbol_address_func
)
if self.mess_up_memory:
# Restore the memory
pwndbg.gdblib.memory.write(self.page.vaddr, self.saved_memory)
def test_main_arena_heuristic(start_binary):
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
# Use the debug symbol to get the address of `main_arena`
main_arena_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"main_arena"
) or pwndbg.gdblib.symbol.address("main_arena")
# Level 1: We check we can get the address of `main_arena` from debug symbols and the struct of `main_arena` is correct
assert pwndbg.heap.current.main_arena is not None
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol
# Check the struct size is correct
assert (
pwndbg.heap.current.main_arena.type.sizeof
== pwndbg.gdblib.typeinfo.lookup_types("struct malloc_state").sizeof
)
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2.1: We check we can get the address of `main_arena` by parsing the assembly code of `malloc_trim`
with mock_for_heuristic(["main_arena"], mess_up_memory=True):
assert pwndbg.heap.current.main_arena is not None
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2.2: No `__malloc_hook` this time, because it's possible to find `main_arena` by some magic about it
with mock_for_heuristic(["main_arena", "__malloc_hook"], mess_up_memory=True):
assert pwndbg.heap.current.main_arena is not None
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 3: We check we can get the address of `main_arena` by parsing the memory
with mock_for_heuristic(mock_all=True):
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol
def test_mp_heuristic(start_binary):
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
# Use the debug symbol to get the address of `mp_`
mp_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"mp_"
) or pwndbg.gdblib.symbol.address("mp_")
# Level 1: We check we can get the address of `mp_` from debug symbols and the struct of `mp_` is correct
assert pwndbg.heap.current.mp is not None
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol
# Check the struct size is correct
# FIXME: We still have bug for GLIBC >= 2.35 in this heuristic because the size of `malloc_par` is changed
# So this test will fail for the tests on ubuntu 22.04
# TODO: Fix the bug and enable this test
if pwndbg.glibc.get_version() < (2, 35):
assert (
pwndbg.heap.current.mp.type.sizeof
== pwndbg.gdblib.typeinfo.lookup_types("struct malloc_par").sizeof
)
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2: We check we can get the address of `mp_` by parsing the assembly code of `__libc_free`
with mock_for_heuristic(["mp_"], mess_up_memory=True):
assert pwndbg.heap.current.mp is not None
# Check the address of `mp_` is correct
assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 3: We check we can get the address of `mp_` by parsing the memory
# FIXME: We still have bug for GLIBC >= 2.35 in this heuristic because the size of `malloc_par` is changed
# So this test will fail for the tests on ubuntu 22.04
# TODO: Fix the bug and enable this test
if pwndbg.glibc.get_version() < (2, 35):
with mock_for_heuristic(mock_all=True):
# Check the address of `mp_` is correct
assert pwndbg.heap.current.mp.address == mp_addr_via_debug_symbol
def test_global_max_fast_heuristic(start_binary):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
# Use the debug symbol to find the address of `global_max_fast`
global_max_fast_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"global_max_fast"
) or pwndbg.gdblib.symbol.address("global_max_fast")
assert global_max_fast_addr_via_debug_symbol is not None
# Level 1: We check we can get the address of `global_max_fast` from debug symbols and the value of `global_max_fast` is correct
assert pwndbg.heap.current.global_max_fast is not None
# Check the address of `global_max_fast` is correct
assert pwndbg.heap.current._global_max_fast_addr == global_max_fast_addr_via_debug_symbol
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2: We check we can get the address of `global_max_fast` by parsing the assembly code of `__libc_free`
# Mock the address of `global_max_fast` to None
with mock_for_heuristic(["global_max_fast"]):
# Use heuristic to find `global_max_fast`
assert pwndbg.heap.current.global_max_fast is not None
# Check the address of `global_max_fast` is correct
assert pwndbg.heap.current._global_max_fast_addr == global_max_fast_addr_via_debug_symbol
def test_thread_cache_heuristic(start_binary):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
# Use the debug symbol to find the address of `thread_cache`
tcache_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"tcache"
) or pwndbg.gdblib.symbol.address("tcache")
thread_cache_addr_via_debug_symbol = pwndbg.gdblib.memory.u(tcache_addr_via_debug_symbol)
# Level 1: We check we can get the address of `thread_cache` from debug symbols and the struct of `thread_cache` is correct
assert pwndbg.heap.current.thread_cache is not None
# Check the address of `thread_cache` is correct
assert pwndbg.heap.current.thread_cache.address == thread_cache_addr_via_debug_symbol
# Check the struct size is correct
assert (
pwndbg.heap.current.thread_cache.type.sizeof
== pwndbg.gdblib.typeinfo.lookup_types("struct tcache_perthread_struct").sizeof
)
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2: We check we can get the address of `thread_cache` by parsing the assembly code of `__libc_malloc`
# TODO: Find a good way to check we scuessfully get the address of `thread_cache` by parsing the assembly code instead of using the first chunk of `thread_cache`
# Note: This only useful when we can NOT find the heap boundaries and the the arena is been shared, it should not be a big problem in most of the cases
# Level 3: We check we can get the address of `thread_cache` by using the first chunk
# Note: This will NOT work when can NOT find the heap boundaries or the the arena is been shared
with mock_for_heuristic(["tcache", "__libc_malloc"]):
# Check the address of `thread_cache` is correct
assert pwndbg.heap.current.thread_cache.address == thread_cache_addr_via_debug_symbol
def test_thread_arena_heuristic(start_binary):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
# Use the debug symbol to find the value of `thread_arena`
thread_arena_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"thread_arena"
) or pwndbg.gdblib.symbol.address("thread_arena")
assert thread_arena_via_debug_symbol is not None
thread_arena_via_debug_symbol = pwndbg.gdblib.memory.u(thread_arena_via_debug_symbol)
assert thread_arena_via_debug_symbol > 0
# Level 1: We check we can get the address of `thread_arena` from debug symbols and the value of `thread_arena` is correct
assert pwndbg.heap.current.thread_arena is not None
# Check the address of `thread_arena` is correct
assert pwndbg.heap.current.thread_arena == thread_arena_via_debug_symbol
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 2: We check we can get the address of `thread_arena` by parsing the assembly code of `__libc_calloc`
# Mock the address of `thread_arena` to None
with mock_for_heuristic(["thread_arena"]):
assert pwndbg.gdblib.symbol.address("thread_arena") is None
# Check the value of `thread_arena` is correct
assert pwndbg.heap.current.thread_arena == thread_arena_via_debug_symbol
def test_heuristic_fail_gracefully(start_binary):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic on")
gdb.execute("break break_here")
gdb.execute("continue")
def _test_heuristic_fail_gracefully(name):
try:
getattr(pwndbg.heap.current, name)
raise AssertionError(
"The heuristic for pwndbg.heap.current.%s should fail with SymbolUnresolvableError"
% name
)
except SymbolUnresolvableError:
# That's the only exception we expect
pass
# Mock all address and mess up the memory
with mock_for_heuristic(mock_all=True, mess_up_memory=True):
_test_heuristic_fail_gracefully("main_arena")
_test_heuristic_fail_gracefully("mp")
_test_heuristic_fail_gracefully("global_max_fast")
_test_heuristic_fail_gracefully("thread_cache")
_test_heuristic_fail_gracefully("thread_arena")