2019-05-27 14:55:05 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2012-03-05 21:32:22 +08:00
|
|
|
/*
|
|
|
|
* Kernel Probes Jump Optimization (Optprobes)
|
|
|
|
*
|
|
|
|
* Copyright (C) IBM Corporation, 2002, 2004
|
|
|
|
* Copyright (C) Hitachi Ltd., 2012
|
|
|
|
*/
|
|
|
|
#include <linux/kprobes.h>
|
2020-05-12 20:19:12 +08:00
|
|
|
#include <linux/perf_event.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <linux/preempt.h>
|
2016-09-20 05:04:18 +08:00
|
|
|
#include <linux/extable.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
#include <linux/kdebug.h>
|
|
|
|
#include <linux/kallsyms.h>
|
|
|
|
#include <linux/ftrace.h>
|
2020-09-04 23:30:25 +08:00
|
|
|
#include <linux/objtool.h>
|
2020-06-09 12:32:42 +08:00
|
|
|
#include <linux/pgtable.h>
|
2020-08-18 21:57:43 +08:00
|
|
|
#include <linux/static_call.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2016-04-27 03:23:24 +08:00
|
|
|
#include <asm/text-patching.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <asm/desc.h>
|
2016-12-25 03:46:01 +08:00
|
|
|
#include <linux/uaccess.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
#include <asm/alternative.h>
|
|
|
|
#include <asm/insn.h>
|
|
|
|
#include <asm/debugreg.h>
|
2017-05-09 06:58:47 +08:00
|
|
|
#include <asm/set_memory.h>
|
2017-08-03 10:39:26 +08:00
|
|
|
#include <asm/sections.h>
|
2018-01-19 00:15:20 +08:00
|
|
|
#include <asm/nospec-branch.h>
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2012-09-28 16:15:22 +08:00
|
|
|
#include "common.h"
|
2012-03-05 21:32:22 +08:00
|
|
|
|
|
|
|
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
|
|
|
{
|
|
|
|
struct optimized_kprobe *op;
|
|
|
|
struct kprobe *kp;
|
|
|
|
long offs;
|
|
|
|
int i;
|
|
|
|
|
2019-10-09 19:57:17 +08:00
|
|
|
for (i = 0; i < JMP32_INSN_SIZE; i++) {
|
2012-03-05 21:32:22 +08:00
|
|
|
kp = get_kprobe((void *)addr - i);
|
|
|
|
/* This function only handles jump-optimized kprobe */
|
|
|
|
if (kp && kprobe_optimized(kp)) {
|
|
|
|
op = container_of(kp, struct optimized_kprobe, kp);
|
|
|
|
/* If op->list is not empty, op is under optimizing */
|
|
|
|
if (list_empty(&op->list))
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return addr;
|
|
|
|
found:
|
|
|
|
/*
|
|
|
|
* If the kprobe can be optimized, original bytes which can be
|
|
|
|
* overwritten by jump destination address. In this case, original
|
|
|
|
* bytes must be recovered from op->optinsn.copied_insn buffer.
|
|
|
|
*/
|
2020-06-17 15:37:53 +08:00
|
|
|
if (copy_from_kernel_nofault(buf, (void *)addr,
|
2017-03-29 13:03:56 +08:00
|
|
|
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
|
|
|
return 0UL;
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
if (addr == (unsigned long)kp->addr) {
|
|
|
|
buf[0] = kp->opcode;
|
2019-10-09 19:57:17 +08:00
|
|
|
memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
|
2012-03-05 21:32:22 +08:00
|
|
|
} else {
|
|
|
|
offs = addr - (unsigned long)kp->addr - 1;
|
2019-10-09 19:57:17 +08:00
|
|
|
memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
|
2012-03-05 21:32:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return (unsigned long)buf;
|
|
|
|
}
|
|
|
|
|
2020-03-05 17:21:30 +08:00
|
|
|
static void synthesize_clac(kprobe_opcode_t *addr)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Can't be static_cpu_has() due to how objtool treats this feature bit.
|
|
|
|
* This isn't a fast path anyway.
|
|
|
|
*/
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_SMAP))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Replace the NOP3 with CLAC */
|
|
|
|
addr[0] = 0x0f;
|
|
|
|
addr[1] = 0x01;
|
|
|
|
addr[2] = 0xca;
|
|
|
|
}
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
|
2014-04-17 16:17:47 +08:00
|
|
|
static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
*addr++ = 0x48;
|
|
|
|
*addr++ = 0xbf;
|
|
|
|
#else
|
|
|
|
*addr++ = 0xb8;
|
|
|
|
#endif
|
|
|
|
*(unsigned long *)addr = val;
|
|
|
|
}
|
|
|
|
|
2013-08-06 06:02:41 +08:00
|
|
|
asm (
|
2019-02-13 00:11:47 +08:00
|
|
|
".pushsection .rodata\n"
|
2017-06-28 23:11:06 +08:00
|
|
|
"optprobe_template_func:\n"
|
2012-03-05 21:32:22 +08:00
|
|
|
".global optprobe_template_entry\n"
|
|
|
|
"optprobe_template_entry:\n"
|
|
|
|
#ifdef CONFIG_X86_64
|
2022-03-26 10:27:40 +08:00
|
|
|
" pushq $" __stringify(__KERNEL_DS) "\n"
|
|
|
|
/* Save the 'sp - 8', this will be fixed later. */
|
2012-03-05 21:32:22 +08:00
|
|
|
" pushq %rsp\n"
|
|
|
|
" pushfq\n"
|
2020-03-05 17:21:30 +08:00
|
|
|
".global optprobe_template_clac\n"
|
|
|
|
"optprobe_template_clac:\n"
|
|
|
|
ASM_NOP3
|
2012-03-05 21:32:22 +08:00
|
|
|
SAVE_REGS_STRING
|
|
|
|
" movq %rsp, %rsi\n"
|
|
|
|
".global optprobe_template_val\n"
|
|
|
|
"optprobe_template_val:\n"
|
|
|
|
ASM_NOP5
|
|
|
|
ASM_NOP5
|
|
|
|
".global optprobe_template_call\n"
|
|
|
|
"optprobe_template_call:\n"
|
|
|
|
ASM_NOP5
|
2022-03-26 10:27:40 +08:00
|
|
|
/* Copy 'regs->flags' into 'regs->ss'. */
|
2019-05-08 05:25:54 +08:00
|
|
|
" movq 18*8(%rsp), %rdx\n"
|
2022-03-26 10:27:40 +08:00
|
|
|
" movq %rdx, 20*8(%rsp)\n"
|
2012-03-05 21:32:22 +08:00
|
|
|
RESTORE_REGS_STRING
|
2022-03-26 10:27:40 +08:00
|
|
|
/* Skip 'regs->flags' and 'regs->sp'. */
|
|
|
|
" addq $16, %rsp\n"
|
|
|
|
/* And pop flags register from 'regs->ss'. */
|
2012-03-05 21:32:22 +08:00
|
|
|
" popfq\n"
|
|
|
|
#else /* CONFIG_X86_32 */
|
2022-03-26 10:27:40 +08:00
|
|
|
" pushl %ss\n"
|
|
|
|
/* Save the 'sp - 4', this will be fixed later. */
|
2019-05-08 05:25:54 +08:00
|
|
|
" pushl %esp\n"
|
|
|
|
" pushfl\n"
|
2020-03-05 17:21:30 +08:00
|
|
|
".global optprobe_template_clac\n"
|
|
|
|
"optprobe_template_clac:\n"
|
|
|
|
ASM_NOP3
|
2012-03-05 21:32:22 +08:00
|
|
|
SAVE_REGS_STRING
|
|
|
|
" movl %esp, %edx\n"
|
|
|
|
".global optprobe_template_val\n"
|
|
|
|
"optprobe_template_val:\n"
|
|
|
|
ASM_NOP5
|
|
|
|
".global optprobe_template_call\n"
|
|
|
|
"optprobe_template_call:\n"
|
|
|
|
ASM_NOP5
|
2022-03-26 10:27:40 +08:00
|
|
|
/* Copy 'regs->flags' into 'regs->ss'. */
|
2019-05-08 05:25:54 +08:00
|
|
|
" movl 14*4(%esp), %edx\n"
|
2022-03-26 10:27:40 +08:00
|
|
|
" movl %edx, 16*4(%esp)\n"
|
2012-03-05 21:32:22 +08:00
|
|
|
RESTORE_REGS_STRING
|
2022-03-26 10:27:40 +08:00
|
|
|
/* Skip 'regs->flags' and 'regs->sp'. */
|
|
|
|
" addl $8, %esp\n"
|
|
|
|
/* And pop flags register from 'regs->ss'. */
|
2019-05-08 05:25:54 +08:00
|
|
|
" popfl\n"
|
2012-03-05 21:32:22 +08:00
|
|
|
#endif
|
|
|
|
".global optprobe_template_end\n"
|
2017-06-28 23:11:06 +08:00
|
|
|
"optprobe_template_end:\n"
|
2019-02-13 00:11:47 +08:00
|
|
|
".popsection\n");
|
2017-06-28 23:11:06 +08:00
|
|
|
|
|
|
|
void optprobe_template_func(void);
|
|
|
|
STACK_FRAME_NON_STANDARD(optprobe_template_func);
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2020-03-05 17:21:30 +08:00
|
|
|
#define TMPL_CLAC_IDX \
|
|
|
|
((long)optprobe_template_clac - (long)optprobe_template_entry)
|
2012-03-05 21:32:22 +08:00
|
|
|
#define TMPL_MOVE_IDX \
|
2017-08-18 16:25:08 +08:00
|
|
|
((long)optprobe_template_val - (long)optprobe_template_entry)
|
2012-03-05 21:32:22 +08:00
|
|
|
#define TMPL_CALL_IDX \
|
2017-08-18 16:25:08 +08:00
|
|
|
((long)optprobe_template_call - (long)optprobe_template_entry)
|
2012-03-05 21:32:22 +08:00
|
|
|
#define TMPL_END_IDX \
|
2017-08-18 16:25:08 +08:00
|
|
|
((long)optprobe_template_end - (long)optprobe_template_entry)
|
2012-03-05 21:32:22 +08:00
|
|
|
|
|
|
|
/* Optimized kprobe call back function: called from optinsn */
|
2014-04-17 16:18:14 +08:00
|
|
|
static void
|
|
|
|
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
/* This is possible if op is under delayed unoptimizing */
|
|
|
|
if (kprobe_disabled(&op->kp))
|
|
|
|
return;
|
|
|
|
|
2017-09-19 18:00:59 +08:00
|
|
|
preempt_disable();
|
2012-03-05 21:32:22 +08:00
|
|
|
if (kprobe_running()) {
|
|
|
|
kprobes_inc_nmissed_count(&op->kp);
|
|
|
|
} else {
|
2017-09-19 17:59:39 +08:00
|
|
|
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
2022-03-26 10:27:40 +08:00
|
|
|
/* Adjust stack pointer */
|
|
|
|
regs->sp += sizeof(long);
|
2012-03-05 21:32:22 +08:00
|
|
|
/* Save skipped registers */
|
|
|
|
regs->cs = __KERNEL_CS;
|
2019-05-08 05:25:54 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
2012-03-05 21:32:22 +08:00
|
|
|
regs->gs = 0;
|
|
|
|
#endif
|
2019-10-09 19:57:17 +08:00
|
|
|
regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
|
2012-03-05 21:32:22 +08:00
|
|
|
regs->orig_ax = ~0UL;
|
|
|
|
|
|
|
|
__this_cpu_write(current_kprobe, &op->kp);
|
|
|
|
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
|
|
|
opt_pre_handler(&op->kp, regs);
|
|
|
|
__this_cpu_write(current_kprobe, NULL);
|
|
|
|
}
|
2018-10-20 17:47:53 +08:00
|
|
|
preempt_enable();
|
2012-03-05 21:32:22 +08:00
|
|
|
}
|
2014-04-17 16:18:14 +08:00
|
|
|
NOKPROBE_SYMBOL(optimized_callback);
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2017-08-18 16:24:00 +08:00
|
|
|
static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
2017-03-29 13:05:06 +08:00
|
|
|
struct insn insn;
|
2012-03-05 21:32:22 +08:00
|
|
|
int len = 0, ret;
|
|
|
|
|
2019-10-09 19:57:17 +08:00
|
|
|
while (len < JMP32_INSN_SIZE) {
|
kprobes/x86: Fix instruction patching corruption when copying more than one RIP-relative instruction
After copy_optimized_instructions() copies several instructions
to the working buffer it tries to fix up the real RIP address, but it
adjusts the RIP-relative instruction with an incorrect RIP address
for the 2nd and subsequent instructions due to a bug in the logic.
This will break the kernel pretty badly (with likely outcomes such as
a kernel freeze, a crash, or worse) because probed instructions can refer
to the wrong data.
For example putting kprobes on cpumask_next() typically hits this bug.
cpumask_next() is normally like below if CONFIG_CPUMASK_OFFSTACK=y
(in this case nr_cpumask_bits is an alias of nr_cpu_ids):
<cpumask_next>:
48 89 f0 mov %rsi,%rax
8b 35 7b fb e2 00 mov 0xe2fb7b(%rip),%esi # ffffffff82db9e64 <nr_cpu_ids>
55 push %rbp
...
If we put a kprobe on it and it gets jump-optimized, it gets
patched by the kprobes code like this:
<cpumask_next>:
e9 95 7d 07 1e jmpq 0xffffffffa000207a
7b fb jnp 0xffffffff81f8a2e2 <cpumask_next+2>
e2 00 loop 0xffffffff81f8a2e9 <cpumask_next+9>
55 push %rbp
This shows that the first two MOV instructions were copied to a
trampoline buffer at 0xffffffffa000207a.
Here is the disassembled result of the trampoline, skipping
the optprobe template instructions:
# Dump of assembly code from 0xffffffffa000207a to 0xffffffffa00020ea:
54 push %rsp
...
48 83 c4 08 add $0x8,%rsp
9d popfq
48 89 f0 mov %rsi,%rax
8b 35 82 7d db e2 mov -0x1d24827e(%rip),%esi # 0xffffffff82db9e67 <nr_cpu_ids+3>
This dump shows that the second MOV accesses *(nr_cpu_ids+3) instead of
the original *nr_cpu_ids. This leads to a kernel freeze because
cpumask_next() always returns 0 and for_each_cpu() never ends.
Fix this by adding 'len' correctly to the real RIP address while
copying.
[ mingo: Improved the changelog. ]
Reported-by: Michael Rodin <michael@rodin.online>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # v4.15+
Fixes: 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()")
Link: http://lkml.kernel.org/r/153504457253.22602.1314289671019919596.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-08-24 01:16:12 +08:00
|
|
|
ret = __copy_instruction(dest + len, src + len, real + len, &insn);
|
2017-03-29 13:05:06 +08:00
|
|
|
if (!ret || !can_boost(&insn, src + len))
|
2012-03-05 21:32:22 +08:00
|
|
|
return -EINVAL;
|
|
|
|
len += ret;
|
|
|
|
}
|
|
|
|
/* Check whether the address range is reserved */
|
|
|
|
if (ftrace_text_reserved(src, src + len - 1) ||
|
|
|
|
alternatives_text_reserved(src, src + len - 1) ||
|
2020-08-18 21:57:43 +08:00
|
|
|
jump_label_text_reserved(src, src + len - 1) ||
|
|
|
|
static_call_text_reserved(src, src + len - 1))
|
2012-03-05 21:32:22 +08:00
|
|
|
return -EBUSY;
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check whether insn is indirect jump */
|
2018-01-19 00:15:20 +08:00
|
|
|
static int __insn_is_indirect_jump(struct insn *insn)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
return ((insn->opcode.bytes[0] == 0xff &&
|
|
|
|
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
|
|
|
|
insn->opcode.bytes[0] == 0xea); /* Segment based jump */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check whether insn jumps into specified address range */
|
|
|
|
static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
|
|
|
|
{
|
|
|
|
unsigned long target = 0;
|
|
|
|
|
|
|
|
switch (insn->opcode.bytes[0]) {
|
|
|
|
case 0xe0: /* loopne */
|
|
|
|
case 0xe1: /* loope */
|
|
|
|
case 0xe2: /* loop */
|
|
|
|
case 0xe3: /* jcxz */
|
|
|
|
case 0xe9: /* near relative jump */
|
|
|
|
case 0xeb: /* short relative jump */
|
|
|
|
break;
|
|
|
|
case 0x0f:
|
|
|
|
if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
|
|
|
|
break;
|
|
|
|
return 0;
|
|
|
|
default:
|
|
|
|
if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
|
|
|
|
break;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
target = (unsigned long)insn->next_byte + insn->immediate.value;
|
|
|
|
|
|
|
|
return (start <= target && target <= start + len);
|
|
|
|
}
|
|
|
|
|
2018-01-19 00:15:20 +08:00
|
|
|
static int insn_is_indirect_jump(struct insn *insn)
|
|
|
|
{
|
|
|
|
int ret = __insn_is_indirect_jump(insn);
|
|
|
|
|
|
|
|
#ifdef CONFIG_RETPOLINE
|
|
|
|
/*
|
|
|
|
* Jump to x86_indirect_thunk_* is treated as an indirect jump.
|
|
|
|
* Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
|
|
|
|
* older gcc may use indirect jump. So we add this check instead of
|
|
|
|
* replace indirect-jump check.
|
|
|
|
*/
|
|
|
|
if (!ret)
|
|
|
|
ret = insn_jump_into_range(insn,
|
|
|
|
(unsigned long)__indirect_thunk_start,
|
|
|
|
(unsigned long)__indirect_thunk_end -
|
|
|
|
(unsigned long)__indirect_thunk_start);
|
|
|
|
#endif
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-12-11 15:04:17 +08:00
|
|
|
static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
|
|
|
|
{
|
|
|
|
unsigned char ops;
|
|
|
|
|
|
|
|
for (; addr < eaddr; addr++) {
|
|
|
|
if (get_kernel_nofault(ops, (void *)addr) < 0 ||
|
|
|
|
ops != INT3_INSN_OPCODE)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
/* Decode whole function to ensure any instructions don't jump into target */
|
2014-04-17 16:17:47 +08:00
|
|
|
static int can_optimize(unsigned long paddr)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
unsigned long addr, size = 0, offset = 0;
|
|
|
|
struct insn insn;
|
|
|
|
kprobe_opcode_t buf[MAX_INSN_SIZE];
|
|
|
|
|
|
|
|
/* Lookup symbol including addr */
|
|
|
|
if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do not optimize in the entry code due to the unstable
|
2017-08-03 10:39:26 +08:00
|
|
|
* stack handling and registers setup.
|
2012-03-05 21:32:22 +08:00
|
|
|
*/
|
2017-08-03 10:39:26 +08:00
|
|
|
if (((paddr >= (unsigned long)__entry_text_start) &&
|
2020-06-10 14:37:01 +08:00
|
|
|
(paddr < (unsigned long)__entry_text_end)))
|
2012-03-05 21:32:22 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Check there is enough space for a relative jump. */
|
2019-10-09 19:57:17 +08:00
|
|
|
if (size - offset < JMP32_INSN_SIZE)
|
2012-03-05 21:32:22 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Decode instructions */
|
|
|
|
addr = paddr - offset;
|
|
|
|
while (addr < paddr - offset + size) { /* Decode until function end */
|
2014-11-14 23:39:57 +08:00
|
|
|
unsigned long recovered_insn;
|
2020-11-17 01:10:11 +08:00
|
|
|
int ret;
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
if (search_exception_tables(addr))
|
|
|
|
/*
|
|
|
|
* Since some fixup code will jumps into this function,
|
|
|
|
* we can't optimize kprobe in this function.
|
|
|
|
*/
|
|
|
|
return 0;
|
2014-11-14 23:39:57 +08:00
|
|
|
recovered_insn = recover_probed_instruction(buf, addr);
|
2015-02-20 22:07:30 +08:00
|
|
|
if (!recovered_insn)
|
|
|
|
return 0;
|
2020-11-17 01:10:11 +08:00
|
|
|
|
2021-03-26 23:12:00 +08:00
|
|
|
ret = insn_decode_kernel(&insn, (void *)recovered_insn);
|
2020-11-17 01:10:11 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return 0;
|
|
|
|
|
2020-12-11 15:04:17 +08:00
|
|
|
/*
|
|
|
|
* In the case of detecting unknown breakpoint, this could be
|
|
|
|
* a padding INT3 between functions. Let's check that all the
|
|
|
|
* rest of the bytes are also INT3.
|
|
|
|
*/
|
2019-10-09 19:57:17 +08:00
|
|
|
if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
|
2020-12-11 15:04:17 +08:00
|
|
|
return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
/* Recover address */
|
|
|
|
insn.kaddr = (void *)addr;
|
|
|
|
insn.next_byte = (void *)(addr + insn.length);
|
|
|
|
/* Check any instructions don't jump into target */
|
|
|
|
if (insn_is_indirect_jump(&insn) ||
|
2019-10-09 19:57:17 +08:00
|
|
|
insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
|
|
|
|
DISP32_SIZE))
|
2012-03-05 21:32:22 +08:00
|
|
|
return 0;
|
|
|
|
addr += insn.length;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check optimized_kprobe can actually be optimized. */
|
2014-04-17 16:17:47 +08:00
|
|
|
int arch_check_optimized_kprobe(struct optimized_kprobe *op)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct kprobe *p;
|
|
|
|
|
|
|
|
for (i = 1; i < op->optinsn.size; i++) {
|
|
|
|
p = get_kprobe(op->kp.addr + i);
|
|
|
|
if (p && !kprobe_disabled(p))
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check the addr is within the optimized instructions. */
|
2014-04-17 16:17:47 +08:00
|
|
|
int arch_within_optimized_kprobe(struct optimized_kprobe *op,
|
2021-09-14 22:40:07 +08:00
|
|
|
kprobe_opcode_t *addr)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
2021-09-14 22:40:07 +08:00
|
|
|
return (op->kp.addr <= addr &&
|
|
|
|
op->kp.addr + op->optinsn.size > addr);
|
2012-03-05 21:32:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Free optimized instruction slot */
|
2014-04-17 16:17:47 +08:00
|
|
|
static
|
2012-03-05 21:32:22 +08:00
|
|
|
void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
|
|
|
|
{
|
2020-05-12 20:19:12 +08:00
|
|
|
u8 *slot = op->optinsn.insn;
|
|
|
|
if (slot) {
|
|
|
|
int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
|
|
|
|
|
|
|
|
/* Record the perf event before freeing the slot */
|
|
|
|
if (dirty)
|
|
|
|
perf_event_text_poke(slot, slot, len, NULL, 0);
|
|
|
|
|
|
|
|
free_optinsn_slot(slot, dirty);
|
2012-03-05 21:32:22 +08:00
|
|
|
op->optinsn.insn = NULL;
|
|
|
|
op->optinsn.size = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-17 16:17:47 +08:00
|
|
|
void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
__arch_remove_optimized_kprobe(op, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy replacing target instructions
|
|
|
|
* Target instructions MUST be relocatable (checked inside)
|
|
|
|
* This is called when new aggr(opt)probe is allocated or reused.
|
|
|
|
*/
|
2015-01-05 19:29:32 +08:00
|
|
|
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
|
|
|
struct kprobe *__unused)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
2017-08-18 16:24:00 +08:00
|
|
|
u8 *buf = NULL, *slot;
|
|
|
|
int ret, len;
|
2012-03-05 21:32:22 +08:00
|
|
|
long rel;
|
|
|
|
|
|
|
|
if (!can_optimize((unsigned long)op->kp.addr))
|
|
|
|
return -EILSEQ;
|
|
|
|
|
2017-08-18 16:24:00 +08:00
|
|
|
buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
|
|
|
|
if (!buf)
|
2012-03-05 21:32:22 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2017-08-18 16:24:00 +08:00
|
|
|
op->optinsn.insn = slot = get_optinsn_slot();
|
|
|
|
if (!slot) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
/*
|
|
|
|
* Verify if the address gap is in 2GB range, because this uses
|
|
|
|
* a relative jump.
|
|
|
|
*/
|
2019-10-09 19:57:17 +08:00
|
|
|
rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
|
2014-07-28 20:20:19 +08:00
|
|
|
if (abs(rel) > 0x7fffffff) {
|
2017-08-18 16:24:00 +08:00
|
|
|
ret = -ERANGE;
|
|
|
|
goto err;
|
2014-07-28 20:20:19 +08:00
|
|
|
}
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2017-08-18 16:24:00 +08:00
|
|
|
/* Copy arch-dep-instance from template */
|
2017-08-18 16:25:08 +08:00
|
|
|
memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
|
2012-03-05 21:32:22 +08:00
|
|
|
|
|
|
|
/* Copy instructions into the out-of-line buffer */
|
2017-08-18 16:24:00 +08:00
|
|
|
ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
|
|
|
|
slot + TMPL_END_IDX);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
2012-03-05 21:32:22 +08:00
|
|
|
op->optinsn.size = ret;
|
2017-08-18 16:24:00 +08:00
|
|
|
len = TMPL_END_IDX + op->optinsn.size;
|
2012-03-05 21:32:22 +08:00
|
|
|
|
2020-03-05 17:21:30 +08:00
|
|
|
synthesize_clac(buf + TMPL_CLAC_IDX);
|
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
/* Set probe information */
|
|
|
|
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
|
|
|
|
|
|
|
|
/* Set probe function call */
|
2017-08-18 16:24:00 +08:00
|
|
|
synthesize_relcall(buf + TMPL_CALL_IDX,
|
|
|
|
slot + TMPL_CALL_IDX, optimized_callback);
|
2012-03-05 21:32:22 +08:00
|
|
|
|
|
|
|
/* Set returning jmp instruction at the tail of out-of-line buffer */
|
2017-08-18 16:24:00 +08:00
|
|
|
synthesize_reljump(buf + len, slot + len,
|
2012-03-05 21:32:22 +08:00
|
|
|
(u8 *)op->kp.addr + op->optinsn.size);
|
2019-10-09 19:57:17 +08:00
|
|
|
len += JMP32_INSN_SIZE;
|
2017-08-18 16:24:00 +08:00
|
|
|
|
2020-05-12 20:19:12 +08:00
|
|
|
/*
|
|
|
|
* Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
|
|
|
|
* used in __arch_remove_optimized_kprobe().
|
|
|
|
*/
|
|
|
|
|
2019-09-02 20:02:59 +08:00
|
|
|
/* We have to use text_poke() for instruction buffer because it is RO */
|
2020-05-12 20:19:12 +08:00
|
|
|
perf_event_text_poke(slot, NULL, 0, buf, len);
|
2017-08-18 16:24:00 +08:00
|
|
|
text_poke(slot, buf, len);
|
2020-05-12 20:19:12 +08:00
|
|
|
|
2017-08-18 16:24:00 +08:00
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
kfree(buf);
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
err:
|
|
|
|
__arch_remove_optimized_kprobe(op, 0);
|
|
|
|
goto out;
|
2012-03-05 21:32:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-11-11 21:02:10 +08:00
|
|
|
* Replace breakpoints (INT3) with relative jumps (JMP.d32).
|
2012-03-05 21:32:22 +08:00
|
|
|
* Caller must call with locking kprobe_mutex and text_mutex.
|
2019-11-11 21:02:10 +08:00
|
|
|
*
|
|
|
|
* The caller will have installed a regular kprobe and after that issued
|
|
|
|
* syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
|
|
|
|
* the 4 bytes after the INT3 are unused and can now be overwritten.
|
2012-03-05 21:32:22 +08:00
|
|
|
*/
|
2014-04-17 16:17:47 +08:00
|
|
|
void arch_optimize_kprobes(struct list_head *oplist)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
struct optimized_kprobe *op, *tmp;
|
2019-10-09 19:57:17 +08:00
|
|
|
u8 insn_buff[JMP32_INSN_SIZE];
|
2012-03-05 21:32:22 +08:00
|
|
|
|
|
|
|
list_for_each_entry_safe(op, tmp, oplist, list) {
|
2013-07-18 19:47:50 +08:00
|
|
|
s32 rel = (s32)((long)op->optinsn.insn -
|
2019-10-09 19:57:17 +08:00
|
|
|
((long)op->kp.addr + JMP32_INSN_SIZE));
|
2013-07-18 19:47:50 +08:00
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
WARN_ON(kprobe_disabled(&op->kp));
|
2013-07-18 19:47:50 +08:00
|
|
|
|
|
|
|
/* Backup instructions which will be replaced by jump address */
|
2019-10-09 19:57:17 +08:00
|
|
|
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
|
|
|
|
DISP32_SIZE);
|
2013-07-18 19:47:50 +08:00
|
|
|
|
2019-10-09 19:57:17 +08:00
|
|
|
insn_buff[0] = JMP32_INSN_OPCODE;
|
2019-04-25 19:03:31 +08:00
|
|
|
*(s32 *)(&insn_buff[1]) = rel;
|
2013-07-18 19:47:50 +08:00
|
|
|
|
2019-10-09 19:57:17 +08:00
|
|
|
text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
|
2013-07-18 19:47:50 +08:00
|
|
|
|
2012-03-05 21:32:22 +08:00
|
|
|
list_del_init(&op->list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-11 21:02:10 +08:00
|
|
|
/*
|
|
|
|
* Replace a relative jump (JMP.d32) with a breakpoint (INT3).
|
|
|
|
*
|
|
|
|
* After that, we can restore the 4 bytes after the INT3 to undo what
|
|
|
|
* arch_optimize_kprobes() scribbled. This is safe since those bytes will be
|
|
|
|
* unused once the INT3 lands.
|
|
|
|
*/
|
2014-04-17 16:17:47 +08:00
|
|
|
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
2020-05-12 20:19:12 +08:00
|
|
|
u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
|
|
|
|
u8 old[JMP32_INSN_SIZE];
|
|
|
|
u8 *addr = op->kp.addr;
|
|
|
|
|
|
|
|
memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
|
|
|
|
memcpy(new + INT3_INSN_SIZE,
|
|
|
|
op->optinsn.copied_insn,
|
|
|
|
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
|
|
|
|
|
|
|
text_poke(addr, new, INT3_INSN_SIZE);
|
x86/kprobes: Fix ordering while text-patching
Kprobes does something like:
register:
arch_arm_kprobe()
text_poke(INT3)
/* guarantees nothing, INT3 will become visible at some point, maybe */
kprobe_optimizer()
/* guarantees the bytes after INT3 are unused */
synchronize_rcu_tasks();
text_poke_bp(JMP32);
/* implies IPI-sync, kprobe really is enabled */
unregister:
__disarm_kprobe()
unoptimize_kprobe()
text_poke_bp(INT3 + tail);
/* implies IPI-sync, so tail is guaranteed visible */
arch_disarm_kprobe()
text_poke(old);
/* guarantees nothing, old will maybe become visible */
synchronize_rcu()
free-stuff
Now the problem is that on register, the synchronize_rcu_tasks() does
not imply sufficient to guarantee all CPUs have already observed INT3
(although in practice this is exceedingly unlikely not to have
happened) (similar to how MEMBARRIER_CMD_PRIVATE_EXPEDITED does not
imply MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE).
Worse, even if it did, we'd have to do 2 synchronize calls to provide
the guarantee we're looking for, the first to ensure INT3 is visible,
the second to guarantee nobody is then still using the instruction
bytes after INT3.
Similar on unregister; the synchronize_rcu() between
__unregister_kprobe_top() and __unregister_kprobe_bottom() does not
guarantee all CPUs are free of the INT3 (and observe the old text).
Therefore, sprinkle some IPI-sync love around. This guarantees that
all CPUs agree on the text and RCU once again provides the required
guaranteed.
Tested-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20191111132458.162172862@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-10-10 03:15:28 +08:00
|
|
|
text_poke_sync();
|
2020-05-12 20:19:12 +08:00
|
|
|
text_poke(addr + INT3_INSN_SIZE,
|
|
|
|
new + INT3_INSN_SIZE,
|
|
|
|
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
|
|
|
text_poke_sync();
|
|
|
|
|
|
|
|
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
|
2012-03-05 21:32:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Recover original instructions and breakpoints from relative jumps.
|
|
|
|
* Caller must call with locking kprobe_mutex.
|
|
|
|
*/
|
|
|
|
extern void arch_unoptimize_kprobes(struct list_head *oplist,
|
|
|
|
struct list_head *done_list)
|
|
|
|
{
|
|
|
|
struct optimized_kprobe *op, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(op, tmp, oplist, list) {
|
2013-07-18 19:47:50 +08:00
|
|
|
arch_unoptimize_kprobe(op);
|
2012-03-05 21:32:22 +08:00
|
|
|
list_move(&op->list, done_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-17 16:18:14 +08:00
|
|
|
int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
|
2012-03-05 21:32:22 +08:00
|
|
|
{
|
|
|
|
struct optimized_kprobe *op;
|
|
|
|
|
|
|
|
if (p->flags & KPROBE_FLAG_OPTIMIZED) {
|
|
|
|
/* This kprobe is really able to run optimized path. */
|
|
|
|
op = container_of(p, struct optimized_kprobe, kp);
|
|
|
|
/* Detour through copied instructions */
|
|
|
|
regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
|
|
|
|
if (!reenter)
|
|
|
|
reset_current_kprobe();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2014-04-17 16:18:14 +08:00
|
|
|
NOKPROBE_SYMBOL(setup_detour_execution);
|