Improve RISCV support (#1770)

* Improve RISCV support

This is a resurrection of #829

Co-authored-by: Tobias Faller <faller@endiio.com>

* Silence bogus vermin warning

* Fix relative backwards jump calculations

The target address wouldn't be truncated to the pointer size.

* Add basic qemu-user test

* Run qemu-user tests in CI

* Make shfmt happy

* Fix pwntools < 4.11.0 support

* Support RISCV32 for pwntools < 4.11.0 as well

---------

Co-authored-by: Tobias Faller <faller@endiio.com>
This commit is contained in:
peace-maker 2023-07-04 14:05:35 +02:00 committed by GitHub
parent 52d729f574
commit a6cc19aa5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 306 additions and 36 deletions

View File

@ -85,6 +85,11 @@ jobs:
sudo sysctl -w kernel.yama.ptrace_scope=0 sudo sysctl -w kernel.yama.ptrace_scope=0
./tests.sh --cov ./tests.sh --cov
- name: Run qemu-user tests
working-directory: ./tests/qemu-tests
run: |
./test_qemu.sh
- name: Process coverage data - name: Process coverage data
run: | run: |
coverage combine coverage combine

View File

@ -19,6 +19,7 @@ try:
import pwndbg.disasm.jump import pwndbg.disasm.jump
import pwndbg.disasm.mips import pwndbg.disasm.mips
import pwndbg.disasm.ppc import pwndbg.disasm.ppc
import pwndbg.disasm.riscv
import pwndbg.disasm.sparc import pwndbg.disasm.sparc
import pwndbg.disasm.x86 import pwndbg.disasm.x86
import pwndbg.heap import pwndbg.heap

View File

@ -105,6 +105,10 @@ def get(instruction):
if not target: if not target:
return [] return []
if pwndbg.gdblib.arch.current in ["rv32", "rv64"]:
target += instruction.address
target &= pwndbg.gdblib.arch.ptrmask
name = pwndbg.gdblib.symbol.get(target) name = pwndbg.gdblib.symbol.get(target)
if not name: if not name:
return [] return []

View File

@ -15,7 +15,8 @@ arches = {
"mips": mips, "mips": mips,
"x86-64": amd64, "x86-64": amd64,
"aarch64": aarch64, "aarch64": aarch64,
"riscv:rv64": riscv64, "rv32": riscv64,
"rv64": riscv64,
} }

View File

@ -35,6 +35,8 @@ CapstoneArch = {
"powerpc": CS_ARCH_PPC, "powerpc": CS_ARCH_PPC,
"mips": CS_ARCH_MIPS, "mips": CS_ARCH_MIPS,
"sparc": CS_ARCH_SPARC, "sparc": CS_ARCH_SPARC,
"rv32": CS_ARCH_RISCV,
"rv64": CS_ARCH_RISCV,
} }
CapstoneEndian = { CapstoneEndian = {
@ -56,6 +58,8 @@ VariableInstructionSizeMax = {
"x86-64": 16, "x86-64": 16,
"i8086": 16, "i8086": 16,
"mips": 8, "mips": 8,
"rv32": 22,
"rv64": 22,
} }
backward_cache: DefaultDict = collections.defaultdict(lambda: None) backward_cache: DefaultDict = collections.defaultdict(lambda: None)
@ -117,6 +121,11 @@ def get_disassembler(pc):
): ):
extra = CS_MODE_MIPS32R6 extra = CS_MODE_MIPS32R6
elif pwndbg.gdblib.arch.current == "rv32":
extra = CS_MODE_RISCV32 | CS_MODE_RISCVC # novermin
elif pwndbg.gdblib.arch.current == "rv64":
extra = CS_MODE_RISCV64 | CS_MODE_RISCVC # novermin
else: else:
extra = None extra = None

95
pwndbg/disasm/riscv.py Normal file
View File

@ -0,0 +1,95 @@
from capstone import * # noqa: F403
from capstone.riscv import * # noqa: F403
import pwndbg.gdblib.arch
import pwndbg.gdblib.regs
class DisassemblyAssistant(pwndbg.disasm.arch.DisassemblyAssistant):
def __init__(self, architecture):
super(DisassemblyAssistant, self).__init__(architecture)
self.architecture = architecture
def _is_condition_taken(self, instruction):
# B-type instructions have two source registers that are compared
src1_unsigned = self.register(instruction, instruction.op_find(CS_OP_REG, 1))
# compressed instructions c.beqz and c.bnez only use one register operand.
if instruction.op_count(CS_OP_REG) > 1:
src2_unsigned = self.register(instruction, instruction.op_find(CS_OP_REG, 2))
else:
src2_unsigned = 0
if self.architecture == "rv32":
src1_signed = src1_unsigned - ((src1_unsigned & 0x80000000) << 1)
src2_signed = src2_unsigned - ((src2_unsigned & 0x80000000) << 1)
elif self.architecture == "rv64":
src1_signed = src1_unsigned - ((src1_unsigned & 0x80000000_00000000) << 1)
src2_signed = src2_unsigned - ((src2_unsigned & 0x80000000_00000000) << 1)
else:
raise NotImplementedError("architecture '{}' not implemented".format(self.architecture))
return {
RISCV_INS_BEQ: src1_signed == src2_signed,
RISCV_INS_BNE: src1_signed != src2_signed,
RISCV_INS_BLT: src1_signed < src2_signed,
RISCV_INS_BGE: src1_signed >= src2_signed,
RISCV_INS_BLTU: src1_unsigned < src2_unsigned,
RISCV_INS_BGEU: src1_unsigned >= src2_unsigned,
RISCV_INS_C_BEQZ: src1_signed == 0,
RISCV_INS_C_BNEZ: src1_signed != 0,
}.get(instruction.id, None)
def condition(self, instruction):
"""Checks if the current instruction is a jump that is taken.
Returns None if the instruction is executed unconditionally,
True if the instruction is executed for sure, False otherwise.
"""
# JAL / JALR is unconditional
if RISCV_GRP_CALL in instruction.groups:
return None
# We can't reason about anything except the current instruction
# as the comparison result is dependent on the register state.
if instruction.address != pwndbg.gdblib.regs.pc:
return False
# Determine if the conditional jump is taken
if RISCV_GRP_BRANCH_RELATIVE in instruction.groups:
return self._is_condition_taken(instruction)
return None
def next(self, instruction, call=False):
"""Return the address of the jump / conditional jump,
None if the next address is not dependent on instruction.
"""
ptrmask = pwndbg.gdblib.arch.ptrmask
# JAL is unconditional and independent of current register status
if instruction.id in [RISCV_INS_JAL, RISCV_INS_C_JAL]:
return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask
# We can't reason about anything except the current instruction
# as the comparison result is dependent on the register state.
if instruction.address != pwndbg.gdblib.regs.pc:
return None
# Determine if the conditional jump is taken
if RISCV_GRP_BRANCH_RELATIVE in instruction.groups and self._is_condition_taken(
instruction
):
return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask
# Determine the target address of the indirect jump
if instruction.id in [RISCV_INS_JALR, RISCV_INS_C_JALR]:
target = (
self.register(instruction, instruction.op_find(CS_OP_REG, 1))
+ instruction.op_find(CS_OP_IMM, 1).imm
) & ptrmask
# Clear the lowest bit without knowing the register width
return target ^ (target & 1)
return super().next(instruction, call)
assistant_rv32 = DisassemblyAssistant("rv32")
assistant_rv64 = DisassemblyAssistant("rv64")

View File

@ -8,6 +8,7 @@ import re
import capstone as C import capstone as C
import gdb import gdb
import unicorn as U import unicorn as U
import unicorn.riscv_const
import pwndbg.disasm import pwndbg.disasm
import pwndbg.gdblib.arch import pwndbg.gdblib.arch
@ -36,6 +37,8 @@ arch_to_UC = {
"arm": U.UC_ARCH_ARM, "arm": U.UC_ARCH_ARM,
"aarch64": U.UC_ARCH_ARM64, "aarch64": U.UC_ARCH_ARM64,
# 'powerpc': U.UC_ARCH_PPC, # 'powerpc': U.UC_ARCH_PPC,
"rv32": U.UC_ARCH_RISCV,
"rv64": U.UC_ARCH_RISCV,
} }
arch_to_UC_consts = { arch_to_UC_consts = {
@ -45,6 +48,8 @@ arch_to_UC_consts = {
"sparc": parse_consts(U.sparc_const), "sparc": parse_consts(U.sparc_const),
"arm": parse_consts(U.arm_const), "arm": parse_consts(U.arm_const),
"aarch64": parse_consts(U.arm64_const), "aarch64": parse_consts(U.arm64_const),
"rv32": parse_consts(U.riscv_const),
"rv64": parse_consts(U.riscv_const),
} }
# Map our internal architecture names onto Unicorn Engine's architecture types. # Map our internal architecture names onto Unicorn Engine's architecture types.
@ -56,6 +61,8 @@ arch_to_CS = {
"arm": C.CS_ARCH_ARM, "arm": C.CS_ARCH_ARM,
"aarch64": C.CS_ARCH_ARM64, "aarch64": C.CS_ARCH_ARM64,
# 'powerpc': C.CS_ARCH_PPC, # 'powerpc': C.CS_ARCH_PPC,
"rv32": C.CS_ARCH_RISCV,
"rv64": C.CS_ARCH_RISCV,
} }
DEBUG = False DEBUG = False
@ -87,6 +94,7 @@ arch_to_SYSCALL = {
U.UC_ARCH_ARM: [C.arm_const.ARM_INS_SVC], U.UC_ARCH_ARM: [C.arm_const.ARM_INS_SVC],
U.UC_ARCH_ARM64: [C.arm64_const.ARM64_INS_SVC], U.UC_ARCH_ARM64: [C.arm64_const.ARM64_INS_SVC],
U.UC_ARCH_PPC: [C.ppc_const.PPC_INS_SC], U.UC_ARCH_PPC: [C.ppc_const.PPC_INS_SC],
U.UC_ARCH_RISCV: [C.riscv_const.RISCV_INS_ECALL],
} }
blacklisted_regs = ["ip", "cs", "ds", "es", "fs", "gs", "ss", "fsbase", "gsbase"] blacklisted_regs = ["ip", "cs", "ds", "es", "fs", "gs", "ss", "fsbase", "gsbase"]

View File

@ -7,7 +7,7 @@ from pwndbg.lib.arch import Arch
# TODO: x86-64 needs to come before i386 in the current implementation, make # TODO: x86-64 needs to come before i386 in the current implementation, make
# this order-independent # this order-independent
ARCHS = ("x86-64", "i386", "aarch64", "mips", "powerpc", "sparc", "arm", "armcm", "riscv:rv64") ARCHS = ("x86-64", "i386", "aarch64", "mips", "powerpc", "sparc", "arm", "armcm", "rv32", "rv64")
# mapping between gdb and pwntools arch names # mapping between gdb and pwntools arch names
pwnlib_archs_mapping = { pwnlib_archs_mapping = {
@ -19,9 +19,26 @@ pwnlib_archs_mapping = {
"sparc": "sparc", "sparc": "sparc",
"arm": "arm", "arm": "arm",
"armcm": "thumb", "armcm": "thumb",
"riscv:rv64": "riscv",
} }
# https://github.com/Gallopsled/pwntools/pull/2177
pwnlib_version = list(map(int, pwnlib.__version__.split(".")[:2]))
if pwnlib_version[0] == 4 and pwnlib_version[1] < 11:
pwnlib_archs_mapping.update(
{
"rv32": "riscv",
"rv64": "riscv",
}
)
else:
pwnlib_archs_mapping.update(
{
"rv32": "riscv32",
"rv64": "riscv64",
}
)
arch = Arch("i386", typeinfo.ptrsize, "little") arch = Arch("i386", typeinfo.ptrsize, "little")

View File

@ -103,7 +103,8 @@ def get() -> Tuple[pwndbg.lib.memory.Page, ...]:
"i386", "i386",
"x86-64", "x86-64",
"aarch64", "aarch64",
"riscv:rv64", "rv32",
"rv64",
): ):
# If kernel_vmmap_via_pt is not set to the default value of "deprecated", # If kernel_vmmap_via_pt is not set to the default value of "deprecated",
# That means the user was explicitly setting it themselves and need to # That means the user was explicitly setting it themselves and need to

View File

@ -71,6 +71,7 @@ linux_aarch64 = ABI(["x0", "x1", "x2", "x3"], 16, 0)
linux_mips = ABI(["$a0", "$a1", "$a2", "$a3"], 4, 0) linux_mips = ABI(["$a0", "$a1", "$a2", "$a3"], 4, 0)
linux_ppc = ABI(["r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"], 4, 0) linux_ppc = ABI(["r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"], 4, 0)
linux_ppc64 = ABI(["r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"], 8, 0) linux_ppc64 = ABI(["r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"], 8, 0)
linux_riscv32 = ABI(["a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7"], 4, 0)
linux_riscv64 = ABI(["a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7"], 8, 0) linux_riscv64 = ABI(["a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7"], 8, 0)
linux_i386_syscall = SyscallABI(["eax", "ebx", "ecx", "edx", "esi", "edi", "ebp"], 4, 0) linux_i386_syscall = SyscallABI(["eax", "ebx", "ecx", "edx", "esi", "edi", "ebp"], 4, 0)
@ -80,6 +81,7 @@ linux_aarch64_syscall = SyscallABI(["x8", "x0", "x1", "x2", "x3", "x4", "x5"], 1
linux_mips_syscall = SyscallABI(["$v0", "$a0", "$a1", "$a2", "$a3"], 4, 0) linux_mips_syscall = SyscallABI(["$v0", "$a0", "$a1", "$a2", "$a3"], 4, 0)
linux_ppc_syscall = SyscallABI(["r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9"], 4, 0) linux_ppc_syscall = SyscallABI(["r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9"], 4, 0)
linux_ppc64_syscall = SyscallABI(["r0", "r3", "r4", "r5", "r6", "r7", "r8"], 8, 0) linux_ppc64_syscall = SyscallABI(["r0", "r3", "r4", "r5", "r6", "r7", "r8"], 8, 0)
linux_riscv32_syscall = SyscallABI(["a7", "a0", "a1", "a2", "a3", "a4", "a5", "a6"], 4, 0)
linux_riscv64_syscall = SyscallABI(["a7", "a0", "a1", "a2", "a3", "a4", "a5", "a6"], 8, 0) linux_riscv64_syscall = SyscallABI(["a7", "a0", "a1", "a2", "a3", "a4", "a5", "a6"], 8, 0)
linux_i386_sigreturn = SigreturnABI(["eax"], 4, 0) linux_i386_sigreturn = SigreturnABI(["eax"], 4, 0)
@ -100,7 +102,8 @@ DEFAULT_ABIS = {
(32, "mips", "linux"): linux_mips, (32, "mips", "linux"): linux_mips,
(32, "powerpc", "linux"): linux_ppc, (32, "powerpc", "linux"): linux_ppc,
(64, "powerpc", "linux"): linux_ppc64, (64, "powerpc", "linux"): linux_ppc64,
(64, "riscv64", "linux"): linux_riscv64, (32, "rv32", "linux"): linux_riscv32,
(64, "rv64", "linux"): linux_riscv64,
} }
SYSCALL_ABIS = { SYSCALL_ABIS = {
@ -112,7 +115,8 @@ SYSCALL_ABIS = {
(32, "mips", "linux"): linux_mips_syscall, (32, "mips", "linux"): linux_mips_syscall,
(32, "powerpc", "linux"): linux_ppc_syscall, (32, "powerpc", "linux"): linux_ppc_syscall,
(64, "powerpc", "linux"): linux_ppc64_syscall, (64, "powerpc", "linux"): linux_ppc64_syscall,
(64, "riscv64", "linux"): linux_riscv64_syscall, (32, "rv32", "linux"): linux_riscv32_syscall,
(64, "rv64", "linux"): linux_riscv64_syscall,
} }
SIGRETURN_ABIS = { SIGRETURN_ABIS = {

View File

@ -472,6 +472,28 @@ mips = RegisterSet(
retval="v0", retval="v0",
) )
# https://riscv.org/technical/specifications/
# Volume 1, Unprivileged Spec v. 20191213
# Chapter 25 - RISC-V Assembly Programmers Handbook
# x0 => zero (Hard-wired zero)
# x1 => ra (Return address)
# x2 => sp (Stack pointer)
# x3 => gp (Global pointer)
# x4 => tp (Thread pointer)
# x5 => t0 (Temporary/alternate link register)
# x67 => t12 (Temporaries)
# x8 => s0/fp (Saved register/frame pointer)
# x9 => s1 (Saved register)
# x10-11 => a01 (Function arguments/return values)
# x1217 => a27 (Function arguments)
# x1827 => s211 (Saved registers)
# x2831 => t36 (Temporaries)
# f07 => ft07 (FP temporaries)
# f89 => fs01 (FP saved registers)
# f1011 => fa01 (FP arguments/return values)
# f1217 => fa27 (FP arguments)
# f1827 => fs211 (FP saved registers)
# f2831 => ft811 (FP temporaries)
riscv = RegisterSet( riscv = RegisterSet(
pc="pc", pc="pc",
stack="sp", stack="sp",
@ -518,7 +540,8 @@ reg_sets = {
"i386": i386, "i386": i386,
"i8086": i386, "i8086": i386,
"x86-64": amd64, "x86-64": amd64,
"riscv:rv64": riscv, "rv32": riscv,
"rv64": riscv,
"mips": mips, "mips": mips,
"sparc": sparc, "sparc": sparc,
"arm": arm, "arm": arm,

View File

@ -1,4 +1,4 @@
capstone==4.0.2 capstone==5.0.0rc4
psutil==5.9.5 psutil==5.9.5
pwntools==4.10.0 pwntools==4.10.0
pycparser==2.21 pycparser==2.21

View File

@ -95,7 +95,10 @@ install_apt() {
parallel \ parallel \
netcat-openbsd \ netcat-openbsd \
qemu-system-x86 \ qemu-system-x86 \
qemu-system-arm qemu-system-arm \
qemu-user \
gcc-aarch64-linux-gnu \
gcc-riscv64-linux-gnu
if [[ "$1" == "22.04" ]]; then if [[ "$1" == "22.04" ]]; then
sudo apt install shfmt sudo apt install shfmt

View File

@ -1,3 +1,15 @@
.PHONY: all
all: reference-binary.aarch64.out reference-binary.riscv64.out
reference-binary.aarch64.out : reference-binary.aarch64.c reference-binary.aarch64.out : reference-binary.aarch64.c
@echo "[+] Building '$@'" @echo "[+] Building '$@'"
@aarch64-linux-gnu-gcc $(CFLAGS) $(EXTRA_FLAGS) -w -o $@ $? $(LDFLAGS) @aarch64-linux-gnu-gcc $(CFLAGS) $(EXTRA_FLAGS) -w -o $@ $? $(LDFLAGS)
# apt install crossbuild-essential-riscv64
reference-binary.riscv64.out : reference-binary.riscv64.c
@echo "[+] Building '$@'"
@riscv64-linux-gnu-gcc -march=rv64gc -mabi=lp64d -g $(CFLAGS) $(EXTRA_FLAGS) -w -o $@ $? $(LDFLAGS)
clean:
rm reference-binary.aarch64.out
rm reference-binary.riscv64.out

View File

@ -0,0 +1,10 @@
#include <stdio.h>
int main(int argc, char const* argv[]) {
if (argc > 1) {
puts("Enough args");
} else {
puts("Not enough args");
}
return 0;
}

View File

@ -2,13 +2,59 @@
make -C binaries make -C binaries
qemu-aarch64 \ ROOT_DIR="$(readlink -f ../../)"
-g 1234 \ GDB_INIT_PATH="$ROOT_DIR/gdbinit.py"
-L /usr/aarch64-linux-gnu/ \ COVERAGERC_PATH="$ROOT_DIR/pyproject.toml"
./binaries/reference-binary.aarch64.out &
gdb-multiarch \ handle_sigint() {
-ex "file ./binaries/reference-binary.aarch64.out" \ echo "Exiting..." >&2
-ex "target remote :1234" \ pkill qemu-aarch64
-ex "source ./tests/user/test_aarch64.py" \ pkill qemu-riscv64
-ex "quit" exit 1
}
trap handle_sigint SIGINT
gdb_load_pwndbg=(--command "$GDB_INIT_PATH" -ex "set exception-verbose on")
run_gdb() {
COVERAGE_FILE=$ROOT_DIR/.cov/coverage \
COVERAGE_PROCESS_START=$COVERAGERC_PATH \
PWNDBG_DISABLE_COLORS=1 \
gdb-multiarch --silent --nx --nh "${gdb_load_pwndbg[@]}" "$@" -ex "quit" 2> /dev/null
return $?
}
test_arch() {
local arch="$1"
qemu-${arch} \
-g 1234 \
-L /usr/${arch}-linux-gnu/ \
./binaries/reference-binary.${arch}.out &
run_gdb \
-ex "set sysroot /usr/${arch}-linux-gnu/" \
-ex "file ./binaries/reference-binary.${arch}.out" \
-ex 'py import coverage;coverage.process_startup()' \
-ex "target remote :1234" \
-ex "source ./tests/user/test_${arch}.py"
local result=$?
pkill qemu-${arch}
return $result
}
ARCHS=("aarch64" "riscv64")
FAILED_TESTS=()
for arch in "${ARCHS[@]}"; do
test_arch "$arch"
if [ $? -ne 0 ]; then
FAILED_TESTS+=("$arch")
fi
done
if [ "${#FAILED_TESTS[@]}" -ne 0 ]; then
echo ""
echo "Failing tests: ${FAILED_TESTS[@]}"
echo ""
exit 1
fi

View File

@ -1,28 +1,38 @@
import sys
import traceback
import gdb import gdb
import pwndbg import pwndbg
gdb.execute("break break_here") try:
assert pwndbg.gdblib.symbol.address("main") == 0x5500000A1C gdb.execute("break break_here")
gdb.execute("continue") assert pwndbg.gdblib.symbol.address("main") == 0x5500000A1C
gdb.execute("continue")
gdb.execute("argv", to_string=True) gdb.execute("argv", to_string=True)
assert gdb.execute("argc", to_string=True) == 1 assert gdb.execute("argc", to_string=True).strip() == "1"
gdb.execute("auxv", to_string=True) gdb.execute("auxv", to_string=True)
assert gdb.execute("cpsr", to_string=True) == "cpsr 0x60000000 [ n Z C v q pan il d a i f el sp ]" assert (
gdb.execute("context", to_string=True) gdb.execute("cpsr", to_string=True, from_tty=False).strip()
gdb.execute("hexdump", to_string=True) == "cpsr 0x60000000 [ n Z C v q pan il d a i f el sp ]"
gdb.execute("telescope", to_string=True) )
gdb.execute("context", to_string=True)
gdb.execute("hexdump", to_string=True)
gdb.execute("telescope", to_string=True)
# TODO: Broken # TODO: Broken
gdb.execute("retaddr", to_string=True) gdb.execute("retaddr", to_string=True)
# Broken # Broken
gdb.execute("procinfo", to_string=True) gdb.execute("procinfo", to_string=True)
# Broken # Broken
gdb.execute("vmmap", to_string=True) gdb.execute("vmmap", to_string=True)
gdb.execute("piebase", to_string=True) gdb.execute("piebase", to_string=True)
gdb.execute("nextret", to_string=True) gdb.execute("nextret", to_string=True)
except AssertionError:
traceback.print_exc(file=sys.stdout)
sys.exit(1)

View File

@ -0,0 +1,21 @@
import re
import sys
import traceback
import gdb
import pwndbg
try:
gdb.execute("break 4")
assert pwndbg.gdblib.symbol.address("main") == 0x4000000668
gdb.execute("continue")
gdb.execute("nextcall", to_string=True)
# verify call argument are enriched
assembly = gdb.execute("nearpc", to_string=True)
assert re.search(r"s.*'Not enough args'", assembly), assembly
except AssertionError:
traceback.print_exc(file=sys.stdout)
sys.exit(1)