Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core

Pull uprobes fixes + cleanups from Oleg Nesterov.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2012-09-19 17:03:07 +02:00
commit d0616c1775
7 changed files with 171 additions and 43 deletions

View File

@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
} }
extern void set_task_blockstep(struct task_struct *task, bool on);
/* /*
* from system description table in BIOS. Mostly for MCA use, but * from system description table in BIOS. Mostly for MCA use, but
* others may find it useful: * others may find it useful:

View File

@ -42,10 +42,11 @@ struct arch_uprobe {
}; };
struct arch_uprobe_task { struct arch_uprobe_task {
unsigned long saved_trap_nr;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned long saved_scratch_register; unsigned long saved_scratch_register;
#endif #endif
unsigned int saved_trap_nr;
unsigned int saved_tf;
}; };
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);

View File

@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
return 1; return 1;
} }
void set_task_blockstep(struct task_struct *task, bool on)
{
unsigned long debugctl;
/*
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
* wrong if task != current, SIGKILL can wakeup the stopped
* tracee and set/clear can play with the running task, this
* can confuse the next __switch_to_xtra().
*/
local_irq_disable();
debugctl = get_debugctlmsr();
if (on) {
debugctl |= DEBUGCTLMSR_BTF;
set_tsk_thread_flag(task, TIF_BLOCKSTEP);
} else {
debugctl &= ~DEBUGCTLMSR_BTF;
clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
}
if (task == current)
update_debugctlmsr(debugctl);
local_irq_enable();
}
/* /*
* Enable single or block step. * Enable single or block step.
*/ */
@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
* So no one should try to use debugger block stepping in a program * So no one should try to use debugger block stepping in a program
* that uses user-mode single stepping itself. * that uses user-mode single stepping itself.
*/ */
if (enable_single_step(child) && block) { if (enable_single_step(child) && block)
unsigned long debugctl = get_debugctlmsr(); set_task_blockstep(child, true);
else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
debugctl |= DEBUGCTLMSR_BTF; set_task_blockstep(child, false);
update_debugctlmsr(debugctl);
set_tsk_thread_flag(child, TIF_BLOCKSTEP);
} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
} }
void user_enable_single_step(struct task_struct *child) void user_enable_single_step(struct task_struct *child)
@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
/* /*
* Make sure block stepping (BTF) is disabled. * Make sure block stepping (BTF) is disabled.
*/ */
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
unsigned long debugctl = get_debugctlmsr(); set_task_blockstep(child, false);
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
/* Always clear TIF_SINGLESTEP... */ /* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP); clear_tsk_thread_flag(child, TIF_SINGLESTEP);

View File

@ -41,6 +41,9 @@
/* Adjust the return address of a call insn */ /* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL 0x2 #define UPROBE_FIX_CALL 0x2
/* Instruction will modify TF, don't change it */
#define UPROBE_FIX_SETF 0x4
#define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_AX 0x8000
#define UPROBE_FIX_RIP_CX 0x4000 #define UPROBE_FIX_RIP_CX 0x4000
@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
insn_get_opcode(insn); /* should be a nop */ insn_get_opcode(insn); /* should be a nop */
switch (OPCODE1(insn)) { switch (OPCODE1(insn)) {
case 0x9d:
/* popf */
auprobe->fixups |= UPROBE_FIX_SETF;
break;
case 0xc3: /* ret/lret */ case 0xc3: /* ret/lret */
case 0xcb: case 0xcb:
case 0xc2: case 0xc2:
@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* Skip these instructions as per the currently known x86 ISA. * Skip these instructions as per the currently known x86 ISA.
* 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
*/ */
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ {
int i; int i;
@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
} }
return false; return false;
} }
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
bool ret = __skip_sstep(auprobe, regs);
if (ret && (regs->flags & X86_EFLAGS_TF))
send_sig(SIGTRAP, current, 0);
return ret;
}
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
struct pt_regs *regs = task_pt_regs(task);
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
regs->flags |= X86_EFLAGS_TF;
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
set_task_blockstep(task, false);
}
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
struct pt_regs *regs = task_pt_regs(task);
/*
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
* SIGTRAP if we do not clear TF. We need to examine the opcode to
* make it right.
*/
if (unlikely(trapped)) {
if (!autask->saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
} else {
if (autask->saved_tf)
send_sig(SIGTRAP, task, 0);
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
}
}

View File

@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm);
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
#define MMF_HAS_UPROBES 19 /* might have uprobes */ #define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)

View File

@ -112,6 +112,8 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t);
extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch);
extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs);

View File

@ -411,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
{ {
struct uprobe *uprobe; struct uprobe *uprobe;
unsigned long flags;
spin_lock_irqsave(&uprobes_treelock, flags); spin_lock(&uprobes_treelock);
uprobe = __find_uprobe(inode, offset); uprobe = __find_uprobe(inode, offset);
spin_unlock_irqrestore(&uprobes_treelock, flags); spin_unlock(&uprobes_treelock);
return uprobe; return uprobe;
} }
@ -462,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
*/ */
static struct uprobe *insert_uprobe(struct uprobe *uprobe) static struct uprobe *insert_uprobe(struct uprobe *uprobe)
{ {
unsigned long flags;
struct uprobe *u; struct uprobe *u;
spin_lock_irqsave(&uprobes_treelock, flags); spin_lock(&uprobes_treelock);
u = __insert_uprobe(uprobe); u = __insert_uprobe(uprobe);
spin_unlock_irqrestore(&uprobes_treelock, flags); spin_unlock(&uprobes_treelock);
/* For now assume that the instruction need not be single-stepped */ /* For now assume that the instruction need not be single-stepped */
uprobe->flags |= UPROBE_SKIP_SSTEP; uprobe->flags |= UPROBE_SKIP_SSTEP;
@ -686,7 +684,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
set_bit(MMF_HAS_UPROBES, &mm->flags); set_bit(MMF_HAS_UPROBES, &mm->flags);
ret = set_swbp(&uprobe->arch, mm, vaddr); ret = set_swbp(&uprobe->arch, mm, vaddr);
if (ret && first_uprobe) if (!ret)
clear_bit(MMF_RECALC_UPROBES, &mm->flags);
else if (first_uprobe)
clear_bit(MMF_HAS_UPROBES, &mm->flags); clear_bit(MMF_HAS_UPROBES, &mm->flags);
return ret; return ret;
@ -695,6 +695,11 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
static void static void
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{ {
/* can happen if uprobe_register() fails */
if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
return;
set_bit(MMF_RECALC_UPROBES, &mm->flags);
set_orig_insn(&uprobe->arch, mm, vaddr); set_orig_insn(&uprobe->arch, mm, vaddr);
} }
@ -705,11 +710,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
*/ */
static void delete_uprobe(struct uprobe *uprobe) static void delete_uprobe(struct uprobe *uprobe)
{ {
unsigned long flags; spin_lock(&uprobes_treelock);
spin_lock_irqsave(&uprobes_treelock, flags);
rb_erase(&uprobe->rb_node, &uprobes_tree); rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock_irqrestore(&uprobes_treelock, flags); spin_unlock(&uprobes_treelock);
iput(uprobe->inode); iput(uprobe->inode);
put_uprobe(uprobe); put_uprobe(uprobe);
atomic_dec(&uprobe_events); atomic_dec(&uprobe_events);
@ -897,7 +900,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
} }
mutex_unlock(uprobes_hash(inode)); mutex_unlock(uprobes_hash(inode));
put_uprobe(uprobe); if (uprobe)
put_uprobe(uprobe);
return ret; return ret;
} }
@ -967,7 +971,6 @@ static void build_probe_list(struct inode *inode,
struct list_head *head) struct list_head *head)
{ {
loff_t min, max; loff_t min, max;
unsigned long flags;
struct rb_node *n, *t; struct rb_node *n, *t;
struct uprobe *u; struct uprobe *u;
@ -975,7 +978,7 @@ static void build_probe_list(struct inode *inode,
min = vaddr_to_offset(vma, start); min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1; max = min + (end - start) - 1;
spin_lock_irqsave(&uprobes_treelock, flags); spin_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max); n = find_node_in_range(inode, min, max);
if (n) { if (n) {
for (t = n; t; t = rb_prev(t)) { for (t = n; t; t = rb_prev(t)) {
@ -993,7 +996,7 @@ static void build_probe_list(struct inode *inode,
atomic_inc(&u->ref); atomic_inc(&u->ref);
} }
} }
spin_unlock_irqrestore(&uprobes_treelock, flags); spin_unlock(&uprobes_treelock);
} }
/* /*
@ -1030,6 +1033,25 @@ int uprobe_mmap(struct vm_area_struct *vma)
return 0; return 0;
} }
static bool
vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
loff_t min, max;
struct inode *inode;
struct rb_node *n;
inode = vma->vm_file->f_mapping->host;
min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1;
spin_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max);
spin_unlock(&uprobes_treelock);
return !!n;
}
/* /*
* Called in context of a munmap of a vma. * Called in context of a munmap of a vma.
*/ */
@ -1041,10 +1063,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
return; return;
if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
return; return;
/* TODO: unmapping uprobe(s) will need more work */ if (vma_has_uprobes(vma, start, end))
set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
} }
/* Slot allocation for XOL */ /* Slot allocation for XOL */
@ -1150,8 +1174,11 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{ {
newmm->uprobes_state.xol_area = NULL; newmm->uprobes_state.xol_area = NULL;
if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
set_bit(MMF_HAS_UPROBES, &newmm->flags); set_bit(MMF_HAS_UPROBES, &newmm->flags);
/* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
set_bit(MMF_RECALC_UPROBES, &newmm->flags);
}
} }
/* /*
@ -1369,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
return false; return false;
} }
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!valid_vma(vma, false))
continue;
/*
* This is not strictly accurate, we can race with
* uprobe_unregister() and see the already removed
* uprobe if delete_uprobe() was not yet called.
*/
if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
return;
}
clear_bit(MMF_HAS_UPROBES, &mm->flags);
}
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
@ -1390,11 +1436,24 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
} else { } else {
*is_swbp = -EFAULT; *is_swbp = -EFAULT;
} }
if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
mmf_recalc_uprobes(mm);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
return uprobe; return uprobe;
} }
void __weak arch_uprobe_enable_step(struct arch_uprobe *arch)
{
user_enable_single_step(current);
}
void __weak arch_uprobe_disable_step(struct arch_uprobe *arch)
{
user_disable_single_step(current);
}
/* /*
* Run handler and ask thread to singlestep. * Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@ -1441,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs)
utask->state = UTASK_SSTEP; utask->state = UTASK_SSTEP;
if (!pre_ssout(uprobe, regs, bp_vaddr)) { if (!pre_ssout(uprobe, regs, bp_vaddr)) {
user_enable_single_step(current); arch_uprobe_enable_step(&uprobe->arch);
return; return;
} }
@ -1477,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
else else
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
arch_uprobe_disable_step(&uprobe->arch);
put_uprobe(uprobe); put_uprobe(uprobe);
utask->active_uprobe = NULL; utask->active_uprobe = NULL;
utask->state = UTASK_RUNNING; utask->state = UTASK_RUNNING;
user_disable_single_step(current);
xol_free_insn_slot(current); xol_free_insn_slot(current);
spin_lock_irq(&current->sighand->siglock); spin_lock_irq(&current->sighand->siglock);