[PATCH] Kprobes: Track kprobe on a per_cpu basis - x86_64 changes

x86_64 changes to track kprobe execution on a per-cpu basis.  We now track the
kprobe state machine independently on each cpu using a arch specific kprobe
control block.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Ananth N Mavinakayanahalli 2005-11-07 01:00:12 -08:00 committed by Linus Torvalds
parent f215d985e9
commit e7a510f92c
2 changed files with 89 additions and 59 deletions

View File

@ -44,17 +44,10 @@
#include <asm/kdebug.h> #include <asm/kdebug.h>
static DECLARE_MUTEX(kprobe_mutex); static DECLARE_MUTEX(kprobe_mutex);
static struct kprobe *current_kprobe;
static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags;
static struct kprobe *kprobe_prev;
static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
static struct pt_regs jprobe_saved_regs;
static long *jprobe_saved_rsp;
void jprobe_return_end(void); void jprobe_return_end(void);
/* copy of the kernel stack at the probe fire time */ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/* /*
* returns non-zero if opcode modifies the interrupt flag. * returns non-zero if opcode modifies the interrupt flag.
@ -236,29 +229,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
up(&kprobe_mutex); up(&kprobe_mutex);
} }
static inline void save_previous_kprobe(void) static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{ {
kprobe_prev = current_kprobe; kcb->prev_kprobe.kp = kprobe_running();
kprobe_status_prev = kprobe_status; kcb->prev_kprobe.status = kcb->kprobe_status;
kprobe_old_rflags_prev = kprobe_old_rflags; kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags;
kprobe_saved_rflags_prev = kprobe_saved_rflags; kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
} }
static inline void restore_previous_kprobe(void) static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{ {
current_kprobe = kprobe_prev; __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
kprobe_status = kprobe_status_prev; kcb->kprobe_status = kcb->prev_kprobe.status;
kprobe_old_rflags = kprobe_old_rflags_prev; kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags;
kprobe_saved_rflags = kprobe_saved_rflags_prev; kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
} }
static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{ {
current_kprobe = p; __get_cpu_var(current_kprobe) = p;
kprobe_saved_rflags = kprobe_old_rflags kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
= (regs->eflags & (TF_MASK | IF_MASK)); = (regs->eflags & (TF_MASK | IF_MASK));
if (is_IF_modifier(p->ainsn.insn)) if (is_IF_modifier(p->ainsn.insn))
kprobe_saved_rflags &= ~IF_MASK; kcb->kprobe_saved_rflags &= ~IF_MASK;
} }
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@ -301,6 +295,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
struct kprobe *p; struct kprobe *p;
int ret = 0; int ret = 0;
kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Check we're not actually recursing */ /* Check we're not actually recursing */
if (kprobe_running()) { if (kprobe_running()) {
@ -308,13 +303,13 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
Disarm the probe we just hit, and ignore it. */ Disarm the probe we just hit, and ignore it. */
p = get_kprobe(addr); p = get_kprobe(addr);
if (p) { if (p) {
if (kprobe_status == KPROBE_HIT_SS && if (kcb->kprobe_status == KPROBE_HIT_SS &&
*p->ainsn.insn == BREAKPOINT_INSTRUCTION) { *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
regs->eflags &= ~TF_MASK; regs->eflags &= ~TF_MASK;
regs->eflags |= kprobe_saved_rflags; regs->eflags |= kcb->kprobe_saved_rflags;
unlock_kprobes(); unlock_kprobes();
goto no_kprobe; goto no_kprobe;
} else if (kprobe_status == KPROBE_HIT_SSDONE) { } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/* TODO: Provide re-entrancy from /* TODO: Provide re-entrancy from
* post_kprobes_handler() and avoid exception * post_kprobes_handler() and avoid exception
* stack corruption while single-stepping on * stack corruption while single-stepping on
@ -322,6 +317,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
*/ */
arch_disarm_kprobe(p); arch_disarm_kprobe(p);
regs->rip = (unsigned long)p->addr; regs->rip = (unsigned long)p->addr;
reset_current_kprobe();
ret = 1; ret = 1;
} else { } else {
/* We have reentered the kprobe_handler(), since /* We have reentered the kprobe_handler(), since
@ -331,15 +327,15 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
* of the new probe without calling any user * of the new probe without calling any user
* handlers. * handlers.
*/ */
save_previous_kprobe(); save_previous_kprobe(kcb);
set_current_kprobe(p, regs); set_current_kprobe(p, regs, kcb);
p->nmissed++; p->nmissed++;
prepare_singlestep(p, regs); prepare_singlestep(p, regs);
kprobe_status = KPROBE_REENTER; kcb->kprobe_status = KPROBE_REENTER;
return 1; return 1;
} }
} else { } else {
p = current_kprobe; p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) { if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe; goto ss_probe;
} }
@ -374,8 +370,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
* in post_kprobe_handler() * in post_kprobe_handler()
*/ */
preempt_disable(); preempt_disable();
kprobe_status = KPROBE_HIT_ACTIVE; set_current_kprobe(p, regs, kcb);
set_current_kprobe(p, regs); kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (p->pre_handler && p->pre_handler(p, regs)) if (p->pre_handler && p->pre_handler(p, regs))
/* handler has already set things up, so skip ss setup */ /* handler has already set things up, so skip ss setup */
@ -383,7 +379,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
ss_probe: ss_probe:
prepare_singlestep(p, regs); prepare_singlestep(p, regs);
kprobe_status = KPROBE_HIT_SS; kcb->kprobe_status = KPROBE_HIT_SS;
return 1; return 1;
no_kprobe: no_kprobe:
@ -451,6 +447,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
regs->rip = orig_ret_address; regs->rip = orig_ret_address;
reset_current_kprobe();
unlock_kprobes(); unlock_kprobes();
preempt_enable_no_resched(); preempt_enable_no_resched();
@ -484,7 +481,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* that is atop the stack is the address following the copied instruction. * that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction. * We need to make it the address following the original instruction.
*/ */
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) static void __kprobes resume_execution(struct kprobe *p,
struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{ {
unsigned long *tos = (unsigned long *)regs->rsp; unsigned long *tos = (unsigned long *)regs->rsp;
unsigned long next_rip = 0; unsigned long next_rip = 0;
@ -499,7 +497,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
switch (*insn) { switch (*insn) {
case 0x9c: /* pushfl */ case 0x9c: /* pushfl */
*tos &= ~(TF_MASK | IF_MASK); *tos &= ~(TF_MASK | IF_MASK);
*tos |= kprobe_old_rflags; *tos |= kcb->kprobe_old_rflags;
break; break;
case 0xc3: /* ret/lret */ case 0xc3: /* ret/lret */
case 0xcb: case 0xcb:
@ -544,24 +542,28 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
*/ */
int __kprobes post_kprobe_handler(struct pt_regs *regs) int __kprobes post_kprobe_handler(struct pt_regs *regs)
{ {
if (!kprobe_running()) struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (!cur)
return 0; return 0;
if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kprobe_status = KPROBE_HIT_SSDONE; kcb->kprobe_status = KPROBE_HIT_SSDONE;
current_kprobe->post_handler(current_kprobe, regs, 0); cur->post_handler(cur, regs, 0);
} }
resume_execution(current_kprobe, regs); resume_execution(cur, regs, kcb);
regs->eflags |= kprobe_saved_rflags; regs->eflags |= kcb->kprobe_saved_rflags;
/* Restore the original saved kprobes variables and continue. */ /* Restore the original saved kprobes variables and continue. */
if (kprobe_status == KPROBE_REENTER) { if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(); restore_previous_kprobe(kcb);
goto out; goto out;
} else { } else {
unlock_kprobes(); unlock_kprobes();
} }
reset_current_kprobe();
out: out:
preempt_enable_no_resched(); preempt_enable_no_resched();
@ -579,14 +581,17 @@ out:
/* Interrupts disabled, kprobe_lock held. */ /* Interrupts disabled, kprobe_lock held. */
int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{ {
if (current_kprobe->fault_handler struct kprobe *cur = kprobe_running();
&& current_kprobe->fault_handler(current_kprobe, regs, trapnr)) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1; return 1;
if (kprobe_status & KPROBE_HIT_SS) { if (kcb->kprobe_status & KPROBE_HIT_SS) {
resume_execution(current_kprobe, regs); resume_execution(cur, regs, kcb);
regs->eflags |= kprobe_old_rflags; regs->eflags |= kcb->kprobe_old_rflags;
reset_current_kprobe();
unlock_kprobes(); unlock_kprobes();
preempt_enable_no_resched(); preempt_enable_no_resched();
} }
@ -629,10 +634,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{ {
struct jprobe *jp = container_of(p, struct jprobe, kp); struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr; unsigned long addr;
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
jprobe_saved_regs = *regs; kcb->jprobe_saved_regs = *regs;
jprobe_saved_rsp = (long *) regs->rsp; kcb->jprobe_saved_rsp = (long *) regs->rsp;
addr = (unsigned long)jprobe_saved_rsp; addr = (unsigned long)(kcb->jprobe_saved_rsp);
/* /*
* As Linus pointed out, gcc assumes that the callee * As Linus pointed out, gcc assumes that the callee
* owns the argument space and could overwrite it, e.g. * owns the argument space and could overwrite it, e.g.
@ -640,7 +646,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* we also save and restore enough stack bytes to cover * we also save and restore enough stack bytes to cover
* the argument area. * the argument area.
*/ */
memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
MIN_STACK_SIZE(addr));
regs->eflags &= ~IF_MASK; regs->eflags &= ~IF_MASK;
regs->rip = (unsigned long)(jp->entry); regs->rip = (unsigned long)(jp->entry);
return 1; return 1;
@ -648,34 +655,38 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
void __kprobes jprobe_return(void) void __kprobes jprobe_return(void)
{ {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
asm volatile (" xchg %%rbx,%%rsp \n" asm volatile (" xchg %%rbx,%%rsp \n"
" int3 \n" " int3 \n"
" .globl jprobe_return_end \n" " .globl jprobe_return_end \n"
" jprobe_return_end: \n" " jprobe_return_end: \n"
" nop \n"::"b" " nop \n"::"b"
(jprobe_saved_rsp):"memory"); (kcb->jprobe_saved_rsp):"memory");
} }
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{ {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
u8 *addr = (u8 *) (regs->rip - 1); u8 *addr = (u8 *) (regs->rip - 1);
unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
struct jprobe *jp = container_of(p, struct jprobe, kp); struct jprobe *jp = container_of(p, struct jprobe, kp);
if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
if ((long *)regs->rsp != jprobe_saved_rsp) { if ((long *)regs->rsp != kcb->jprobe_saved_rsp) {
struct pt_regs *saved_regs = struct pt_regs *saved_regs =
container_of(jprobe_saved_rsp, struct pt_regs, rsp); container_of(kcb->jprobe_saved_rsp,
struct pt_regs, rsp);
printk("current rsp %p does not match saved rsp %p\n", printk("current rsp %p does not match saved rsp %p\n",
(long *)regs->rsp, jprobe_saved_rsp); (long *)regs->rsp, kcb->jprobe_saved_rsp);
printk("Saved registers for jprobe %p\n", jp); printk("Saved registers for jprobe %p\n", jp);
show_registers(saved_regs); show_registers(saved_regs);
printk("Current registers\n"); printk("Current registers\n");
show_registers(regs); show_registers(regs);
BUG(); BUG();
} }
*regs = jprobe_saved_regs; *regs = kcb->jprobe_saved_regs;
memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
MIN_STACK_SIZE(stack_addr)); MIN_STACK_SIZE(stack_addr));
return 1; return 1;
} }

View File

@ -25,6 +25,7 @@
*/ */
#include <linux/types.h> #include <linux/types.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/percpu.h>
struct pt_regs; struct pt_regs;
@ -48,6 +49,24 @@ struct arch_specific_insn {
kprobe_opcode_t *insn; kprobe_opcode_t *insn;
}; };
struct prev_kprobe {
struct kprobe *kp;
unsigned long status;
unsigned long old_rflags;
unsigned long saved_rflags;
};
/* per-cpu kprobe control block */
struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_old_rflags;
unsigned long kprobe_saved_rflags;
long *jprobe_saved_rsp;
struct pt_regs jprobe_saved_regs;
kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
struct prev_kprobe prev_kprobe;
};
/* trap3/1 are intr gates for kprobes. So, restore the status of IF, /* trap3/1 are intr gates for kprobes. So, restore the status of IF,
* if necessary, before executing the original int3/1 (trap) handler. * if necessary, before executing the original int3/1 (trap) handler.
*/ */