Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core

Pull uprobes fixes + cleanups from Oleg Nesterov.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2012-09-19 17:03:07 +02:00
commit d0616c1775
7 changed files with 171 additions and 43 deletions

View File

@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
extern void set_task_blockstep(struct task_struct *task, bool on);
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:

View File

@ -42,10 +42,11 @@ struct arch_uprobe {
};
struct arch_uprobe_task {
unsigned long saved_trap_nr;
#ifdef CONFIG_X86_64
unsigned long saved_scratch_register;
#endif
unsigned int saved_trap_nr;
unsigned int saved_tf;
};
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);

View File

@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
return 1;
}
void set_task_blockstep(struct task_struct *task, bool on)
{
unsigned long debugctl;
/*
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
* wrong if task != current, SIGKILL can wakeup the stopped
* tracee and set/clear can play with the running task, this
* can confuse the next __switch_to_xtra().
*/
local_irq_disable();
debugctl = get_debugctlmsr();
if (on) {
debugctl |= DEBUGCTLMSR_BTF;
set_tsk_thread_flag(task, TIF_BLOCKSTEP);
} else {
debugctl &= ~DEBUGCTLMSR_BTF;
clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
}
if (task == current)
update_debugctlmsr(debugctl);
local_irq_enable();
}
/*
* Enable single or block step.
*/
@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
* So no one should try to use debugger block stepping in a program
* that uses user-mode single stepping itself.
*/
if (enable_single_step(child) && block) {
unsigned long debugctl = get_debugctlmsr();
debugctl |= DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
set_tsk_thread_flag(child, TIF_BLOCKSTEP);
} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
if (enable_single_step(child) && block)
set_task_blockstep(child, true);
else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
set_task_blockstep(child, false);
}
void user_enable_single_step(struct task_struct *child)
@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
/*
* Make sure block stepping (BTF) is disabled.
*/
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
set_task_blockstep(child, false);
/* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP);

View File

@ -41,6 +41,9 @@
/* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL 0x2
/* Instruction will modify TF, don't change it */
#define UPROBE_FIX_SETF 0x4
#define UPROBE_FIX_RIP_AX 0x8000
#define UPROBE_FIX_RIP_CX 0x4000
@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
insn_get_opcode(insn); /* should be a nop */
switch (OPCODE1(insn)) {
case 0x9d:
/* popf */
auprobe->fixups |= UPROBE_FIX_SETF;
break;
case 0xc3: /* ret/lret */
case 0xcb:
case 0xc2:
@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* Skip these instructions as per the currently known x86 ISA.
* 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
*/
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
return false;
}
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
bool ret = __skip_sstep(auprobe, regs);
if (ret && (regs->flags & X86_EFLAGS_TF))
send_sig(SIGTRAP, current, 0);
return ret;
}
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
struct pt_regs *regs = task_pt_regs(task);
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
regs->flags |= X86_EFLAGS_TF;
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
set_task_blockstep(task, false);
}
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
struct pt_regs *regs = task_pt_regs(task);
/*
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
* SIGTRAP if we do not clear TF. We need to examine the opcode to
* make it right.
*/
if (unlikely(trapped)) {
if (!autask->saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
} else {
if (autask->saved_tf)
send_sig(SIGTRAP, task, 0);
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
}
}

View File

@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm);
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
#define MMF_HAS_UPROBES 19 /* might have uprobes */
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)

View File

@ -112,6 +112,8 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t);
extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch);
extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs);

View File

@ -411,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
{
struct uprobe *uprobe;
unsigned long flags;
spin_lock_irqsave(&uprobes_treelock, flags);
spin_lock(&uprobes_treelock);
uprobe = __find_uprobe(inode, offset);
spin_unlock_irqrestore(&uprobes_treelock, flags);
spin_unlock(&uprobes_treelock);
return uprobe;
}
@ -462,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
*/
static struct uprobe *insert_uprobe(struct uprobe *uprobe)
{
unsigned long flags;
struct uprobe *u;
spin_lock_irqsave(&uprobes_treelock, flags);
spin_lock(&uprobes_treelock);
u = __insert_uprobe(uprobe);
spin_unlock_irqrestore(&uprobes_treelock, flags);
spin_unlock(&uprobes_treelock);
/* For now assume that the instruction need not be single-stepped */
uprobe->flags |= UPROBE_SKIP_SSTEP;
@ -686,7 +684,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
set_bit(MMF_HAS_UPROBES, &mm->flags);
ret = set_swbp(&uprobe->arch, mm, vaddr);
if (ret && first_uprobe)
if (!ret)
clear_bit(MMF_RECALC_UPROBES, &mm->flags);
else if (first_uprobe)
clear_bit(MMF_HAS_UPROBES, &mm->flags);
return ret;
@ -695,6 +695,11 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
static void
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{
/* can happen if uprobe_register() fails */
if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
return;
set_bit(MMF_RECALC_UPROBES, &mm->flags);
set_orig_insn(&uprobe->arch, mm, vaddr);
}
@ -705,11 +710,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
*/
static void delete_uprobe(struct uprobe *uprobe)
{
unsigned long flags;
spin_lock_irqsave(&uprobes_treelock, flags);
spin_lock(&uprobes_treelock);
rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock_irqrestore(&uprobes_treelock, flags);
spin_unlock(&uprobes_treelock);
iput(uprobe->inode);
put_uprobe(uprobe);
atomic_dec(&uprobe_events);
@ -897,6 +900,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
}
mutex_unlock(uprobes_hash(inode));
if (uprobe)
put_uprobe(uprobe);
return ret;
@ -967,7 +971,6 @@ static void build_probe_list(struct inode *inode,
struct list_head *head)
{
loff_t min, max;
unsigned long flags;
struct rb_node *n, *t;
struct uprobe *u;
@ -975,7 +978,7 @@ static void build_probe_list(struct inode *inode,
min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1;
spin_lock_irqsave(&uprobes_treelock, flags);
spin_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max);
if (n) {
for (t = n; t; t = rb_prev(t)) {
@ -993,7 +996,7 @@ static void build_probe_list(struct inode *inode,
atomic_inc(&u->ref);
}
}
spin_unlock_irqrestore(&uprobes_treelock, flags);
spin_unlock(&uprobes_treelock);
}
/*
@ -1030,6 +1033,25 @@ int uprobe_mmap(struct vm_area_struct *vma)
return 0;
}
static bool
vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
loff_t min, max;
struct inode *inode;
struct rb_node *n;
inode = vma->vm_file->f_mapping->host;
min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1;
spin_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max);
spin_unlock(&uprobes_treelock);
return !!n;
}
/*
* Called in context of a munmap of a vma.
*/
@ -1041,10 +1063,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
return;
if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
return;
/* TODO: unmapping uprobe(s) will need more work */
if (vma_has_uprobes(vma, start, end))
set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
}
/* Slot allocation for XOL */
@ -1150,8 +1174,11 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{
newmm->uprobes_state.xol_area = NULL;
if (test_bit(MMF_HAS_UPROBES, &oldmm->flags))
if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
set_bit(MMF_HAS_UPROBES, &newmm->flags);
/* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
set_bit(MMF_RECALC_UPROBES, &newmm->flags);
}
}
/*
@ -1369,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
return false;
}
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!valid_vma(vma, false))
continue;
/*
* This is not strictly accurate, we can race with
* uprobe_unregister() and see the already removed
* uprobe if delete_uprobe() was not yet called.
*/
if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
return;
}
clear_bit(MMF_HAS_UPROBES, &mm->flags);
}
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
{
struct mm_struct *mm = current->mm;
@ -1390,11 +1436,24 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
} else {
*is_swbp = -EFAULT;
}
if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
mmf_recalc_uprobes(mm);
up_read(&mm->mmap_sem);
return uprobe;
}
void __weak arch_uprobe_enable_step(struct arch_uprobe *arch)
{
user_enable_single_step(current);
}
void __weak arch_uprobe_disable_step(struct arch_uprobe *arch)
{
user_disable_single_step(current);
}
/*
* Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@ -1441,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs)
utask->state = UTASK_SSTEP;
if (!pre_ssout(uprobe, regs, bp_vaddr)) {
user_enable_single_step(current);
arch_uprobe_enable_step(&uprobe->arch);
return;
}
@ -1477,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
else
WARN_ON_ONCE(1);
arch_uprobe_disable_step(&uprobe->arch);
put_uprobe(uprobe);
utask->active_uprobe = NULL;
utask->state = UTASK_RUNNING;
user_disable_single_step(current);
xol_free_insn_slot(current);
spin_lock_irq(&current->sighand->siglock);