From eccfd91d869598496fa9f10c10f2429a656a5ff9 Mon Sep 17 00:00:00 2001 From: charif <72693057+chrf01@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:13:44 +0200 Subject: [PATCH] add kallsyms + klookup (#2462) * Add kallsyms parser * Add klookup command * add klookup docs --- docs/commands/index.md | 1 + docs/commands/klookup/klookup.md | 27 ++ pwndbg/commands/__init__.py | 1 + pwndbg/commands/klookup.py | 32 ++ pwndbg/gdblib/kernel/kallsyms.py | 401 ++++++++++++++++++ .../tests/system/test_gdblib_kernel.py | 6 + 6 files changed, 468 insertions(+) create mode 100644 docs/commands/klookup/klookup.md create mode 100644 pwndbg/commands/klookup.py create mode 100644 pwndbg/gdblib/kernel/kallsyms.py diff --git a/docs/commands/index.md b/docs/commands/index.md index 1f37c982..e8f841e5 100644 --- a/docs/commands/index.md +++ b/docs/commands/index.md @@ -155,6 +155,7 @@ - [kchecksec](kchecksec/kchecksec.md) Checks for kernel hardening configuration options. - [kcmdline](kcmdline/kcmdline.md) Return the kernel commandline (/proc/cmdline). - [kconfig](kconfig/kconfig.md) Outputs the kernel config (requires CONFIG_IKCONFIG). +- [klookup](klookup/klookup.md) Lookup kernel symbols. - [kversion](kversion/kversion.md) Outputs the kernel version (/proc/version). - [slab](slab/slab.md) Prints information about the slab allocator diff --git a/docs/commands/klookup/klookup.md b/docs/commands/klookup/klookup.md new file mode 100644 index 00000000..a26520a9 --- /dev/null +++ b/docs/commands/klookup/klookup.md @@ -0,0 +1,27 @@ + + + + +# klookup + +## Description + + +Lookup kernel symbols. +## Usage: + + +```bash +usage: klookup [-h] [symbol] +``` +## Positional Arguments + +|Positional Argument|Help| +| :--- | :--- | +|`symbol`| Symbol or address to lookup.| + +## Optional Arguments + +|Short|Long|Default|Help| +| :--- | :--- | :--- | :--- | +|`-h`|`--help`||show this help message and exit| diff --git a/pwndbg/commands/__init__.py b/pwndbg/commands/__init__.py index c669fbe2..2945945c 100644 --- a/pwndbg/commands/__init__.py +++ b/pwndbg/commands/__init__.py @@ -711,6 +711,7 @@ def load_commands() -> None: import pwndbg.commands.kcmdline import pwndbg.commands.kconfig import pwndbg.commands.killthreads + import pwndbg.commands.klookup import pwndbg.commands.kversion import pwndbg.commands.linkmap import pwndbg.commands.memoize diff --git a/pwndbg/commands/klookup.py b/pwndbg/commands/klookup.py new file mode 100644 index 00000000..b724229e --- /dev/null +++ b/pwndbg/commands/klookup.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import argparse + +import pwndbg.commands +import pwndbg.gdblib.kernel.kallsyms +from pwndbg.color import message +from pwndbg.commands import CommandCategory + +parser = argparse.ArgumentParser(description="Lookup kernel symbols") + +parser.add_argument("symbol", type=str, help="Address or symbol name to lookup") + + +@pwndbg.commands.ArgparsedCommand(parser, category=CommandCategory.KERNEL) +@pwndbg.commands.OnlyWhenQemuKernel +@pwndbg.commands.OnlyWhenPagingEnabled +def klookup(symbol: str) -> None: + ksyms = pwndbg.gdblib.kernel.kallsyms.get() + try: + symbol_addr = int(symbol) + for k, v in ksyms.items(): + if v[0] == symbol_addr: + print(message.success(f"{k} = {symbol_addr:#x}")) + return + print(message.error(f"No symbol found at {symbol_addr:#x}")) + except ValueError: + if symbol in ksyms: + addr = ksyms[symbol][0] + print(message.success(f"{symbol} = {addr:#x}")) + else: + print(message.error(f"No symbol found for {symbol}")) diff --git a/pwndbg/gdblib/kernel/kallsyms.py b/pwndbg/gdblib/kernel/kallsyms.py new file mode 100644 index 00000000..5302cd53 --- /dev/null +++ b/pwndbg/gdblib/kernel/kallsyms.py @@ -0,0 +1,401 @@ +from __future__ import annotations + +from struct import unpack_from + +from pwnlib.util.packing import p16 +from pwnlib.util.packing import u32 +from pwnlib.util.packing import u64 + +import pwndbg.aglib +import pwndbg.color.message as M +import pwndbg.commands +import pwndbg.gdblib.kernel +import pwndbg.lib.cache +import pwndbg.search + + +@pwndbg.lib.cache.cache_until("start") +def get(): + ks = Kallsyms() + return ks.kallsyms + + +class Kallsyms: + """ + - linux_banner >= 6.4 + - ... <= 6.4 + - kallsyms_offsets + - kallsyms_relative_base + - kallsyms_num_syms + - kallsyms_names + - kallsyms_markers + - kallsyms_token_table + - kallsyms_token_index + - kallsyms_offsets >= 6.4 + - kallsyms_relative_base >= 6.4 + """ + + def __init__(self): + self.kallsyms = {} + self.kbase = pwndbg.gdblib.kernel.kbase() + + mapping = pwndbg.gdblib.kernel.get_first_kernel_ro() + self.r_base = mapping.vaddr + self.kernel_ro_mem = pwndbg.gdblib.memory.read(mapping.vaddr, mapping.memsz) + + self.kernel_version = pwndbg.gdblib.kernel.krelease() + self.is_offsets = False + + self.rbase_offset = 0 + + self.token_table = self.find_token_table() + # TODO: if self.token_table is None its maybe an uncompressed kallsyms + + self.token_index = self.find_token_index() + self.markers = self.find_markers() + self.num_syms = self.find_num_syms() + self.offsets = self.find_offsets() + + if self.is_offsets: + self.rbase_offset = self.find_relative_base() + + self.names = self.find_names() + self.kernel_addresses = self.get_kernel_addresses() + self.parse_symbol_table() + + def find_token_table(self) -> int: + """ + This function searches for the kallsyms_token_table structure in the kernel memory. + The kallsyms_token_table contains 256 zero-terminated tokens from which symbol names are built. + Example structure: + 0xffffffff827b2f00: "mm" + 0xffffffff827b2f03: "tim" + 0xffffffff827b2f07: "bu" + 0xffffffff827b2f0a: "ode_" + 0xffffffff827b2f0f: "robestub" + + 0xffffffff827b2fdb: "0" + 0xffffffff827b2fdd: "1" + 0xffffffff827b2fdf: "2" + 0xffffffff827b2fe1: "3" + 0xffffffff827b2fe3: "4" + 0xffffffff827b2fe5: "5" + 0xffffffff827b2fe7: "6" + 0xffffffff827b2fe9: "7" + 0xffffffff827b2feb: "8" + 0xffffffff827b2fed: "9" + """ + sequence_to_find = b"".join(b"%c\0" % i for i in range(ord("0"), ord("9") + 1)) + sequences_to_avoid = [b":\0", b"\0\0", b"\0\1", b"\0\2", b"ASCII\0"] + + position = 0 + + candidates = [] + ascii_candidates = [] + + while True: + position = self.kernel_ro_mem.find(sequence_to_find, position + 1) + if position == -1: + break + + for seq in sequences_to_avoid: + pos = position + len(sequence_to_find) + if self.kernel_ro_mem[pos : pos + len(seq)] == seq: + break + else: + candidates.append(position) + + if 32 <= self.kernel_ro_mem[pos : pos + 1][0] < 126: + ascii_candidates.append(position) + + if len(candidates) != 1: + if len(ascii_candidates) == 1: + candidates = ascii_candidates + elif len(candidates) == 0: + print(M.error("No candidates for token_table")) + return None + + position = candidates[0] + + current_index = 0x30 + + position -= 1 + for tokens_backwards in range(current_index): + for chars_in_token in range(50): + position -= 1 + assert position >= 0 + + if self.kernel_ro_mem[position] == 0 or self.kernel_ro_mem[position] > ord("z"): + break + + if chars_in_token >= 50 - 1: + print(M.error("This structure is not a kallsyms_token_table")) + return None + + position += 1 + position += -position % 4 + + return position + + def find_token_index(self) -> int | None: + """ + This function searches for the kallsyms_token_index structure in the kernel memory + starting at kallsyms_token_table. The token index table provides offsets into the kallsyms_token_table + for each 256 byte-valued sub-table. + The kallsyms_token_index is typically located immediately after + the kallsyms_token_table in the kernel's read-only data section. + Example structure: + 0xffffffff827b3288: 0x0000 0x0003 0x0007 0x000a 0x000f 0x0018 0x001f 0x0023 + 0xffffffff827b3298: 0x0027 0x0031 0x0035 0x0038 0x003b 0x0043 0x0047 0x004a + 0xffffffff827b32a8: 0x004f 0x0053 0x0056 0x0059 0x005d 0x0061 0x0067 0x006b + 0xffffffff827b32b8: 0x006e 0x0071 0x0076 0x007c 0x0080 0x0088 0x008b 0x008f + 0xffffffff827b32c8: 0x0094 0x0098 0x009b 0x009f 0x00a3 0x00a8 0x00ab 0x00b0 + """ + position = self.token_table + + token_table_head = self.kernel_ro_mem[position : position + 256] + + token_offsets = [p16(0)] + + pos = 0 + + while True: + pos = token_table_head.find(b"\0", pos + 1) + if pos == -1: + break + token_offsets.append(p16(pos + 1)) + + seq_to_find = b"".join(token_offsets) + + position = self.kernel_ro_mem.find(seq_to_find, self.token_table) + if position == -1: + print(M.error("Unable to find the kallsyms_token_index")) + return None + + return position + + def find_markers(self) -> int | None: + """ + This function searches for the kallsyms_markers structure in the kernel memory + starting at kallsyms_token_table and search backwards. The markers table contains + offsets to the corresponding symbol name for each kernel symbol. + The kallsyms_markers table is typically located immediately before the kallsyms_token_table + in the kernel's read-only data section. + Example structure: + 0xffffffff827b2430: 0x00000000 0x00000b2a 0x00001762 0x000023f6 + 0xffffffff827b2440: 0x00002fe4 0x00003c9d 0x0000487c 0x000056fd + 0xffffffff827b2450: 0x00006597 0x000073b9 0x000081be 0x00008f21 + 0xffffffff827b2460: 0x00009c94 0x0000a958 0x0000b632 0x0000c193 + 0xffffffff827b2470: 0x0000ce0b 0x0000db98 0x0000ea3e 0x0000f80a + 0xffffffff827b2480: 0x000105be 0x000112d3 0x00011f8c 0x00012d75 + 0xffffffff827b2490: 0x0001384d 0x0001446e 0x00015138 0x00015d8c + """ + if self.kernel_version < (4, 20): + elem_size = pwndbg.aglib.arch.ptrsize + else: + elem_size = 4 + + seq_to_find = b"\0" * elem_size + + position = self.token_table - 1 + + while position > 0 and self.kernel_ro_mem[position] == 0: + position -= 1 + + for _ in range(32): + position = self.kernel_ro_mem.rfind(seq_to_find, 0, position) + + if position == -1: + print(M.error("Failed to find kallsyms_markers")) + return None + + position -= position % elem_size # aligning + size_marker = {4: "I", 8: "Q"}[elem_size] + + entries = unpack_from(f"<4{size_marker}", self.kernel_ro_mem, position) + + if entries[0] != 0: + continue + + for i in range(1, len(entries)): + if entries[i - 1] + 0x200 > entries[i] or entries[i - 1] + 0x4000 < entries[i]: + break + else: + return position + + return None + + def find_num_syms(self): + """ + This function searches for the kallsyms_num_syms variable in the kernel memory + starting at kallsyms_markers. The kallsyms_num_syms holds the number of kernel symbols + in the symbol table. + The kallsyms_num_syms variable is typically located before the kallsyms_names table in the kernel's + read-only data section. + In newer kernel versions the kallsyms_num_syms is immediately behind the linux_banner and in older version + its behind kallsyms_base_relative or kallsyms_addresses (it depends on CONFIG_KALLSYMS_BASE_RELATIVE y/n) + """ + if self.kernel_version < (6, 4): + # try to find num_syms by walking backwards and looking + # for data like this: a kernel address followed by num_syms + # 0xffffffff823f8000 0x000000000001417c + position = self.markers - 8 + + while True: + qword = u64(self.kernel_ro_mem[position : position + 8]) + if (qword >> 32) & 0xFFFFFFFF == 0 and qword > 0: + before_qword = u64(self.kernel_ro_mem[position - 8 : position]) + if (before_qword >> 48) & 0xFFFF == 0xFFFF and (before_qword & 0xFFF) == 0: + # should be kallsyms_num_syms + return position + + position -= 8 + else: + # search from kallsyms_markers backwards and look for the linux_banner symbol + # the kallsyms_num_syms should be behind the linux_banner string + position = self.kernel_ro_mem.rfind(b"Linux version", 0, self.markers - 8) + + if position == -1: + return None + + while True: + position = position + 1 + if self.kernel_ro_mem[position] == 0: + break + + position = (position + 7) & ~7 # alignment + return position + + def find_offsets(self): + """ + This function searches for the kallsyms_offsets/kallsyms_addresses table in the kernel memory + starting at kallsyms_token_index. The offsets/addresses table containts offsets / addresses of each + symbol in the kernel. + The kallsyms_addresses is typically located before the kallsyms_num_syms variable in the kernel's read-only + data section. + Example structure: + 0xffffffff827b3488: 0x00000000 0x00000000 0x00001000 0x00002000 + 0xffffffff827b3498: 0x00006000 0x0000b000 0x0000c000 0x0000d000 + 0xffffffff827b34a8: 0x00015000 0x00015008 0x00015010 0x00015018 + 0xffffffff827b34b8: 0x00015020 0x00015022 0x00015030 0x00015050 + 0xffffffff827b34c8: 0x00015450 0x00015460 0x00015860 0x00015888 + 0xffffffff827b34d8: 0x00015890 0x00015898 0x000158a0 0x000159c0 + """ + forward_search = self.kernel_version >= (6, 4) + + if forward_search: + position = self.token_index + self.is_offsets = True + else: + # kallsyms_offsets is at the top + position = self.num_syms + nsyms = u64(self.kernel_ro_mem[position : position + 8]) + + if ( + self.kbase - 0x20000 + < u64(self.kernel_ro_mem[position - 8 : position]) + <= self.kbase + ): + # it should be kallsyms_offsets + self.is_offsets = True + position -= 8 + + dword = u32(self.kernel_ro_mem[position - 4 : position]) + + if dword == 0x0: + position -= 4 + + return position - (nsyms * 4) + + return position - (nsyms * 8) + + while True: + qword = u64(self.kernel_ro_mem[position : position + 8]) + if qword & 0xFFFFFFFF == 0: + return position + + position += 8 + + def find_relative_base(self): + """ + This function searches for the kallsyms_relative_base variable in the kernel memory. + The relative base is used to calculate the actual virtual addresses of symbols from + their offsets in the kallsyms_offsets table. + The kallsyms_relative_base variable is typically located after the kallsyms_offsets table + in the kernel's read-only data section. + """ + position = self.offsets + nsyms = u64(self.kernel_ro_mem[self.num_syms : self.num_syms + 8]) + + position = position + (nsyms * 4) + position = (position + 7) & ~7 + + return position + + def find_names(self): + return self.num_syms + 8 + + def get_kernel_addresses(self): + kernel_addresses = [] + + rbase = u64(self.kernel_ro_mem[self.rbase_offset : self.rbase_offset + 8]) + # TODO: nsyms is 4 bytes long not 8 + nsyms = u64(self.kernel_ro_mem[self.num_syms : self.num_syms + 8]) + size_marker = "i" if self.is_offsets else "Q" + kernel_addresses = list( + unpack_from(f"<{nsyms}{size_marker}", self.kernel_ro_mem, self.offsets) + ) + + if not self.is_offsets: + return kernel_addresses + + number_of_negative_items = len([offset for offset in kernel_addresses if offset < 0]) + abs_percpu = number_of_negative_items / len(kernel_addresses) >= 0.5 + + for idx, offset in enumerate(kernel_addresses): + if abs_percpu: + if offset < 0: + offset = rbase - 1 - offset + else: + offset = rbase + offset + else: + offset = rbase + offset + + kernel_addresses[idx] = offset + + return kernel_addresses + + def parse_symbol_table(self): + tokens = self.get_token_table() + symbol_names = [] + position = self.names + numsyms = u64(self.kernel_ro_mem[self.num_syms : self.num_syms + 8]) + + for _ in range(numsyms): + length = self.kernel_ro_mem[position] + position += 1 + + symbol_name = "" + for _ in range(length): + symbol_token_index = self.kernel_ro_mem[position] + symbol_token = tokens[symbol_token_index] + position += 1 + symbol_name += symbol_token + symbol_names.append(symbol_name) + + for addr, name in zip(self.kernel_addresses, symbol_names): + self.kallsyms[name[1:]] = (addr, name[0]) + + def get_token_table(self): + tokens = [] + position = self.token_table + + for num_token in range(256): + token = "" + while self.kernel_ro_mem[position]: + token += chr(self.kernel_ro_mem[position]) + position += 1 + position += 1 + tokens.append(token) + + return tokens diff --git a/tests/qemu-tests/tests/system/test_gdblib_kernel.py b/tests/qemu-tests/tests/system/test_gdblib_kernel.py index 9da066a9..5f5900aa 100644 --- a/tests/qemu-tests/tests/system/test_gdblib_kernel.py +++ b/tests/qemu-tests/tests/system/test_gdblib_kernel.py @@ -58,3 +58,9 @@ def test_gdblib_kernel_kbase(): assert base == pwndbg.gdblib.symbol.address("_text") or base == pwndbg.gdblib.symbol.address( "_stext" ) + + +@pytest.mark.skipif(not pwndbg.gdblib.kernel.has_debug_syms(), reason="test requires debug symbols") +def test_gdblib_kernel_kallsyms(): + ks = pwndbg.gdblib.kernel.kallsyms.get() + assert ks["commit_creds"][0] == pwndbg.gdblib.symbol.address("commit_creds")