Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts: arch/x86/kernel/kprobes.c kernel/trace/Makefile Merge reason: hw-breakpoints perf integration is looking good in testing and in reviews, plus conflicts are mounting up - so merge & resolve. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
96200591a3
|
@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
|
|||
config HAVE_DEFAULT_NO_SPIN_MUTEXES
|
||||
bool
|
||||
|
||||
config HAVE_HW_BREAKPOINT
|
||||
bool
|
||||
depends on HAVE_PERF_EVENTS
|
||||
select ANON_INODES
|
||||
select PERF_EVENTS
|
||||
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
|
|
@ -49,6 +49,7 @@ config X86
|
|||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_BZIP2
|
||||
select HAVE_KERNEL_LZMA
|
||||
select HAVE_HW_BREAKPOINT
|
||||
select HAVE_ARCH_KMEMCHECK
|
||||
|
||||
config OUTPUT_FORMAT
|
||||
|
|
|
@ -10,6 +10,7 @@ header-y += ptrace-abi.h
|
|||
header-y += sigcontext32.h
|
||||
header-y += ucontext.h
|
||||
header-y += processor-flags.h
|
||||
header-y += hw_breakpoint.h
|
||||
|
||||
unifdef-y += e820.h
|
||||
unifdef-y += ist.h
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include <linux/user.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
/*
|
||||
* fill in the user structure for an a.out core dump
|
||||
|
@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
|
|||
>> PAGE_SHIFT;
|
||||
dump->u_dsize -= dump->u_tsize;
|
||||
dump->u_ssize = 0;
|
||||
dump->u_debugreg[0] = current->thread.debugreg0;
|
||||
dump->u_debugreg[1] = current->thread.debugreg1;
|
||||
dump->u_debugreg[2] = current->thread.debugreg2;
|
||||
dump->u_debugreg[3] = current->thread.debugreg3;
|
||||
dump->u_debugreg[4] = 0;
|
||||
dump->u_debugreg[5] = 0;
|
||||
dump->u_debugreg[6] = current->thread.debugreg6;
|
||||
dump->u_debugreg[7] = current->thread.debugreg7;
|
||||
aout_dump_debugregs(dump);
|
||||
|
||||
if (dump->start_stack < TASK_SIZE)
|
||||
dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define DR_TRAP1 (0x2) /* db1 */
|
||||
#define DR_TRAP2 (0x4) /* db2 */
|
||||
#define DR_TRAP3 (0x8) /* db3 */
|
||||
#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
|
||||
|
||||
#define DR_STEP (0x4000) /* single-step */
|
||||
#define DR_SWITCH (0x8000) /* task switch */
|
||||
|
@ -49,6 +50,8 @@
|
|||
|
||||
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
|
||||
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
|
||||
#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
|
||||
#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
|
||||
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
|
||||
|
||||
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
|
||||
|
@ -67,4 +70,34 @@
|
|||
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
|
||||
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
|
||||
|
||||
/*
|
||||
* HW breakpoint additions
|
||||
*/
|
||||
#ifdef __KERNEL__
|
||||
|
||||
DECLARE_PER_CPU(unsigned long, dr7);
|
||||
|
||||
static inline void hw_breakpoint_disable(void)
|
||||
{
|
||||
/* Zero the control register for HW Breakpoint */
|
||||
set_debugreg(0UL, 7);
|
||||
|
||||
/* Zero-out the individual HW breakpoint address registers */
|
||||
set_debugreg(0UL, 0);
|
||||
set_debugreg(0UL, 1);
|
||||
set_debugreg(0UL, 2);
|
||||
set_debugreg(0UL, 3);
|
||||
}
|
||||
|
||||
static inline int hw_breakpoint_active(void)
|
||||
{
|
||||
return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
|
||||
}
|
||||
|
||||
extern void aout_dump_debugregs(struct user *dump);
|
||||
|
||||
extern void hw_breakpoint_restore(void);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_DEBUGREG_H */
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
#ifndef _I386_HW_BREAKPOINT_H
|
||||
#define _I386_HW_BREAKPOINT_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define __ARCH_HW_BREAKPOINT_H
|
||||
|
||||
/*
|
||||
* The name should probably be something dealt in
|
||||
* a higher level. While dealing with the user
|
||||
* (display/resolving)
|
||||
*/
|
||||
struct arch_hw_breakpoint {
|
||||
char *name; /* Contains name of the symbol to set bkpt */
|
||||
unsigned long address;
|
||||
u8 len;
|
||||
u8 type;
|
||||
};
|
||||
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
/* Available HW breakpoint length encodings */
|
||||
#define X86_BREAKPOINT_LEN_1 0x40
|
||||
#define X86_BREAKPOINT_LEN_2 0x44
|
||||
#define X86_BREAKPOINT_LEN_4 0x4c
|
||||
#define X86_BREAKPOINT_LEN_EXECUTE 0x40
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define X86_BREAKPOINT_LEN_8 0x48
|
||||
#endif
|
||||
|
||||
/* Available HW breakpoint type encodings */
|
||||
|
||||
/* trigger on instruction execute */
|
||||
#define X86_BREAKPOINT_EXECUTE 0x80
|
||||
/* trigger on memory write */
|
||||
#define X86_BREAKPOINT_WRITE 0x81
|
||||
/* trigger on memory read or write */
|
||||
#define X86_BREAKPOINT_RW 0x83
|
||||
|
||||
/* Total number of available HW breakpoint registers */
|
||||
#define HBP_NUM 4
|
||||
|
||||
struct perf_event;
|
||||
struct pmu;
|
||||
|
||||
extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
|
||||
extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
|
||||
struct task_struct *tsk);
|
||||
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
|
||||
unsigned long val, void *data);
|
||||
|
||||
|
||||
int arch_install_hw_breakpoint(struct perf_event *bp);
|
||||
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
|
||||
void hw_breakpoint_pmu_read(struct perf_event *bp);
|
||||
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
|
||||
|
||||
extern void
|
||||
arch_fill_perf_breakpoint(struct perf_event *bp);
|
||||
|
||||
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
|
||||
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
|
||||
|
||||
extern int arch_bp_generic_fields(int x86_len, int x86_type,
|
||||
int *gen_len, int *gen_type);
|
||||
|
||||
extern struct pmu perf_ops_bp;
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _I386_HW_BREAKPOINT_H */
|
||||
|
|
@ -30,6 +30,7 @@ struct mm_struct;
|
|||
#include <linux/math64.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#define HBP_NUM 4
|
||||
/*
|
||||
* Default implementation of macro that returns current
|
||||
* instruction pointer ("program counter").
|
||||
|
@ -422,6 +423,8 @@ extern unsigned int xstate_size;
|
|||
extern void free_thread_xstate(struct task_struct *);
|
||||
extern struct kmem_cache *task_xstate_cachep;
|
||||
|
||||
struct perf_event;
|
||||
|
||||
struct thread_struct {
|
||||
/* Cached TLS descriptors: */
|
||||
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
||||
|
@ -443,13 +446,10 @@ struct thread_struct {
|
|||
unsigned long fs;
|
||||
#endif
|
||||
unsigned long gs;
|
||||
/* Hardware debugging registers: */
|
||||
unsigned long debugreg0;
|
||||
unsigned long debugreg1;
|
||||
unsigned long debugreg2;
|
||||
unsigned long debugreg3;
|
||||
unsigned long debugreg6;
|
||||
unsigned long debugreg7;
|
||||
/* Save middle states of ptrace breakpoints */
|
||||
struct perf_event *ptrace_bps[HBP_NUM];
|
||||
/* Debug status used for traps, single steps, etc... */
|
||||
unsigned long debugreg6;
|
||||
/* Fault info: */
|
||||
unsigned long cr2;
|
||||
unsigned long trap_no;
|
||||
|
|
|
@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
|||
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
|
||||
obj-y += bootflag.o e820.o
|
||||
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
|
||||
obj-y += alternative.o i8253.o pci-nommu.o
|
||||
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
|
||||
obj-y += tsc.o io_delay.o rtc.o
|
||||
|
||||
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
|
||||
|
|
|
@ -0,0 +1,545 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) 2007 Alan Stern
|
||||
* Copyright (C) 2009 IBM Corporation
|
||||
* Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
|
||||
* using the CPU's debug registers.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
/* Per cpu debug control register value */
|
||||
DEFINE_PER_CPU(unsigned long, dr7);
|
||||
EXPORT_PER_CPU_SYMBOL(dr7);
|
||||
|
||||
/* Per cpu debug address registers values */
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
|
||||
|
||||
/*
|
||||
* Stores the breakpoints currently in use on each breakpoint address
|
||||
* register for each cpus
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
|
||||
|
||||
|
||||
/*
|
||||
* Encode the length, type, Exact, and Enable bits for a particular breakpoint
|
||||
* as stored in debug register 7.
|
||||
*/
|
||||
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
|
||||
{
|
||||
unsigned long bp_info;
|
||||
|
||||
bp_info = (len | type) & 0xf;
|
||||
bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
|
||||
bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
|
||||
DR_GLOBAL_SLOWDOWN;
|
||||
return bp_info;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode the length and type bits for a particular breakpoint as
|
||||
* stored in debug register 7. Return the "enabled" status.
|
||||
*/
|
||||
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
|
||||
{
|
||||
int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
|
||||
|
||||
*len = (bp_info & 0xc) | 0x40;
|
||||
*type = (bp_info & 0x3) | 0x80;
|
||||
|
||||
return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Install a perf counter breakpoint.
|
||||
*
|
||||
* We seek a free debug address register and use it for this
|
||||
* breakpoint. Eventually we enable it in the debug control register.
|
||||
*
|
||||
* Atomic: we hold the counter->ctx->lock and we only handle variables
|
||||
* and registers local to this cpu.
|
||||
*/
|
||||
int arch_install_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned long *dr7;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
|
||||
|
||||
if (!*slot) {
|
||||
*slot = bp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
|
||||
return -EBUSY;
|
||||
|
||||
set_debugreg(info->address, i);
|
||||
__get_cpu_var(cpu_debugreg[i]) = info->address;
|
||||
|
||||
dr7 = &__get_cpu_var(dr7);
|
||||
*dr7 |= encode_dr7(i, info->len, info->type);
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Uninstall the breakpoint contained in the given counter.
|
||||
*
|
||||
* First we search the debug address register it uses and then we disable
|
||||
* it.
|
||||
*
|
||||
* Atomic: we hold the counter->ctx->lock and we only handle variables
|
||||
* and registers local to this cpu.
|
||||
*/
|
||||
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned long *dr7;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
|
||||
|
||||
if (*slot == bp) {
|
||||
*slot = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
|
||||
return;
|
||||
|
||||
dr7 = &__get_cpu_var(dr7);
|
||||
*dr7 &= ~encode_dr7(i, info->len, info->type);
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
}
|
||||
|
||||
static int get_hbp_len(u8 hbp_len)
|
||||
{
|
||||
unsigned int len_in_bytes = 0;
|
||||
|
||||
switch (hbp_len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
len_in_bytes = 1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
len_in_bytes = 2;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
len_in_bytes = 4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
len_in_bytes = 8;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return len_in_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for virtual address in user space.
|
||||
*/
|
||||
int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
len = get_hbp_len(hbp_len);
|
||||
|
||||
return (va <= TASK_SIZE - len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for virtual address in kernel space.
|
||||
*/
|
||||
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
len = get_hbp_len(hbp_len);
|
||||
|
||||
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a breakpoint's encoded address, length, and type.
|
||||
*/
|
||||
static int arch_store_info(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
/*
|
||||
* For kernel-addresses, either the address or symbol name can be
|
||||
* specified.
|
||||
*/
|
||||
if (info->name)
|
||||
info->address = (unsigned long)
|
||||
kallsyms_lookup_name(info->name);
|
||||
if (info->address)
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int arch_bp_generic_fields(int x86_len, int x86_type,
|
||||
int *gen_len, int *gen_type)
|
||||
{
|
||||
/* Len */
|
||||
switch (x86_len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
*gen_len = HW_BREAKPOINT_LEN_1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
*gen_len = HW_BREAKPOINT_LEN_2;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
*gen_len = HW_BREAKPOINT_LEN_4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
*gen_len = HW_BREAKPOINT_LEN_8;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Type */
|
||||
switch (x86_type) {
|
||||
case X86_BREAKPOINT_EXECUTE:
|
||||
*gen_type = HW_BREAKPOINT_X;
|
||||
break;
|
||||
case X86_BREAKPOINT_WRITE:
|
||||
*gen_type = HW_BREAKPOINT_W;
|
||||
break;
|
||||
case X86_BREAKPOINT_RW:
|
||||
*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int arch_build_bp_info(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
|
||||
info->address = bp->attr.bp_addr;
|
||||
|
||||
/* Len */
|
||||
switch (bp->attr.bp_len) {
|
||||
case HW_BREAKPOINT_LEN_1:
|
||||
info->len = X86_BREAKPOINT_LEN_1;
|
||||
break;
|
||||
case HW_BREAKPOINT_LEN_2:
|
||||
info->len = X86_BREAKPOINT_LEN_2;
|
||||
break;
|
||||
case HW_BREAKPOINT_LEN_4:
|
||||
info->len = X86_BREAKPOINT_LEN_4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case HW_BREAKPOINT_LEN_8:
|
||||
info->len = X86_BREAKPOINT_LEN_8;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Type */
|
||||
switch (bp->attr.bp_type) {
|
||||
case HW_BREAKPOINT_W:
|
||||
info->type = X86_BREAKPOINT_WRITE;
|
||||
break;
|
||||
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
|
||||
info->type = X86_BREAKPOINT_RW;
|
||||
break;
|
||||
case HW_BREAKPOINT_X:
|
||||
info->type = X86_BREAKPOINT_EXECUTE;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Validate the arch-specific HW Breakpoint register settings
|
||||
*/
|
||||
int arch_validate_hwbkpt_settings(struct perf_event *bp,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned int align;
|
||||
int ret;
|
||||
|
||||
|
||||
ret = arch_build_bp_info(bp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -EINVAL;
|
||||
|
||||
if (info->type == X86_BREAKPOINT_EXECUTE)
|
||||
/*
|
||||
* Ptrace-refactoring code
|
||||
* For now, we'll allow instruction breakpoint only for user-space
|
||||
* addresses
|
||||
*/
|
||||
if ((!arch_check_va_in_userspace(info->address, info->len)) &&
|
||||
info->len != X86_BREAKPOINT_EXECUTE)
|
||||
return ret;
|
||||
|
||||
switch (info->len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
align = 0;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
align = 1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
align = 3;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
align = 7;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (bp->callback)
|
||||
ret = arch_store_info(bp);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* Check that the low-order bits of the address are appropriate
|
||||
* for the alignment implied by len.
|
||||
*/
|
||||
if (info->address & align)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check that the virtual address is in the proper range */
|
||||
if (tsk) {
|
||||
if (!arch_check_va_in_userspace(info->address, info->len))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
if (!arch_check_va_in_kernelspace(info->address, info->len))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump the debug register contents to the user.
|
||||
* We can't dump our per cpu values because it
|
||||
* may contain cpu wide breakpoint, something that
|
||||
* doesn't belong to the current task.
|
||||
*
|
||||
* TODO: include non-ptrace user breakpoints (perf)
|
||||
*/
|
||||
void aout_dump_debugregs(struct user *dump)
|
||||
{
|
||||
int i;
|
||||
int dr7 = 0;
|
||||
struct perf_event *bp;
|
||||
struct arch_hw_breakpoint *info;
|
||||
struct thread_struct *thread = ¤t->thread;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
bp = thread->ptrace_bps[i];
|
||||
|
||||
if (bp && !bp->attr.disabled) {
|
||||
dump->u_debugreg[i] = bp->attr.bp_addr;
|
||||
info = counter_arch_bp(bp);
|
||||
dr7 |= encode_dr7(i, info->len, info->type);
|
||||
} else {
|
||||
dump->u_debugreg[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
dump->u_debugreg[4] = 0;
|
||||
dump->u_debugreg[5] = 0;
|
||||
dump->u_debugreg[6] = current->thread.debugreg6;
|
||||
|
||||
dump->u_debugreg[7] = dr7;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(aout_dump_debugregs);
|
||||
|
||||
/*
|
||||
* Release the user breakpoints used by ptrace
|
||||
*/
|
||||
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
|
||||
{
|
||||
int i;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
unregister_hw_breakpoint(t->ptrace_bps[i]);
|
||||
t->ptrace_bps[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void hw_breakpoint_restore(void)
|
||||
{
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
|
||||
set_debugreg(current->thread.debugreg6, 6);
|
||||
set_debugreg(__get_cpu_var(dr7), 7);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
|
||||
|
||||
/*
|
||||
* Handle debug exception notifications.
|
||||
*
|
||||
* Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
|
||||
*
|
||||
* NOTIFY_DONE returned if one of the following conditions is true.
|
||||
* i) When the causative address is from user-space and the exception
|
||||
* is a valid one, i.e. not triggered as a result of lazy debug register
|
||||
* switching
|
||||
* ii) When there are more bits than trap<n> set in DR6 register (such
|
||||
* as BD, BS or BT) indicating that more than one debug condition is
|
||||
* met and requires some more action in do_debug().
|
||||
*
|
||||
* NOTIFY_STOP returned for all other cases
|
||||
*
|
||||
*/
|
||||
static int __kprobes hw_breakpoint_handler(struct die_args *args)
|
||||
{
|
||||
int i, cpu, rc = NOTIFY_STOP;
|
||||
struct perf_event *bp;
|
||||
unsigned long dr7, dr6;
|
||||
unsigned long *dr6_p;
|
||||
|
||||
/* The DR6 value is pointed by args->err */
|
||||
dr6_p = (unsigned long *)ERR_PTR(args->err);
|
||||
dr6 = *dr6_p;
|
||||
|
||||
/* Do an early return if no trap bits are set in DR6 */
|
||||
if ((dr6 & DR_TRAP_BITS) == 0)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
get_debugreg(dr7, 7);
|
||||
/* Disable breakpoints during exception handling */
|
||||
set_debugreg(0UL, 7);
|
||||
/*
|
||||
* Assert that local interrupts are disabled
|
||||
* Reset the DRn bits in the virtualized register value.
|
||||
* The ptrace trigger routine will add in whatever is needed.
|
||||
*/
|
||||
current->thread.debugreg6 &= ~DR_TRAP_BITS;
|
||||
cpu = get_cpu();
|
||||
|
||||
/* Handle all the breakpoints that were triggered */
|
||||
for (i = 0; i < HBP_NUM; ++i) {
|
||||
if (likely(!(dr6 & (DR_TRAP0 << i))))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The counter may be concurrently released but that can only
|
||||
* occur from a call_rcu() path. We can then safely fetch
|
||||
* the breakpoint, use its callback, touch its counter
|
||||
* while we are in an rcu_read_lock() path.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
bp = per_cpu(bp_per_reg[i], cpu);
|
||||
if (bp)
|
||||
rc = NOTIFY_DONE;
|
||||
/*
|
||||
* Reset the 'i'th TRAP bit in dr6 to denote completion of
|
||||
* exception handling
|
||||
*/
|
||||
(*dr6_p) &= ~(DR_TRAP0 << i);
|
||||
/*
|
||||
* bp can be NULL due to lazy debug register switching
|
||||
* or due to concurrent perf counter removing.
|
||||
*/
|
||||
if (!bp) {
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
(bp->callback)(bp, args->regs);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
if (dr6 & (~DR_TRAP_BITS))
|
||||
rc = NOTIFY_DONE;
|
||||
|
||||
set_debugreg(dr7, 7);
|
||||
put_cpu();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle debug exception notifications.
|
||||
*/
|
||||
int __kprobes hw_breakpoint_exceptions_notify(
|
||||
struct notifier_block *unused, unsigned long val, void *data)
|
||||
{
|
||||
if (val != DIE_DEBUG)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
return hw_breakpoint_handler(data);
|
||||
}
|
||||
|
||||
void hw_breakpoint_pmu_read(struct perf_event *bp)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/system.h>
|
||||
|
||||
|
@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
|
|||
"resuming...\n");
|
||||
kgdb_arch_handle_exception(args->trapnr, args->signr,
|
||||
args->err, "c", "", regs);
|
||||
/*
|
||||
* Reset the BS bit in dr6 (pointed by args->err) to
|
||||
* denote completion of processing
|
||||
*/
|
||||
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
|
||||
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#include <asm/uaccess.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
void jprobe_return_end(void);
|
||||
|
||||
|
@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
|||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_DEBUG:
|
||||
if (post_kprobe_handler(args->regs))
|
||||
if (post_kprobe_handler(args->regs)) {
|
||||
/*
|
||||
* Reset the BS bit in dr6 (pointed by args->err) to
|
||||
* denote completion of processing
|
||||
*/
|
||||
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
|
||||
ret = NOTIFY_STOP;
|
||||
}
|
||||
break;
|
||||
case DIE_GPF:
|
||||
/*
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <asm/desc.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
static void set_idt(void *newidt, __u16 limit)
|
||||
{
|
||||
|
@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
|
|||
|
||||
/* Interrupts aren't acceptable while we reboot */
|
||||
local_irq_disable();
|
||||
hw_breakpoint_disable();
|
||||
|
||||
if (image->preserve_context) {
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
|
||||
unsigned long addr)
|
||||
|
@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
|
|||
|
||||
/* Interrupts aren't acceptable while we reboot */
|
||||
local_irq_disable();
|
||||
hw_breakpoint_disable();
|
||||
|
||||
if (image->preserve_context) {
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <linux/clockchips.h>
|
||||
#include <linux/random.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/syscalls.h>
|
||||
|
@ -17,6 +18,7 @@
|
|||
#include <asm/uaccess.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
unsigned long idle_halt;
|
||||
EXPORT_SYMBOL(idle_halt);
|
||||
|
@ -103,14 +105,7 @@ void flush_thread(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
||||
|
||||
tsk->thread.debugreg0 = 0;
|
||||
tsk->thread.debugreg1 = 0;
|
||||
tsk->thread.debugreg2 = 0;
|
||||
tsk->thread.debugreg3 = 0;
|
||||
tsk->thread.debugreg6 = 0;
|
||||
tsk->thread.debugreg7 = 0;
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
/*
|
||||
* Forget coprocessor state..
|
||||
|
@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|||
else if (next->debugctlmsr != prev->debugctlmsr)
|
||||
update_debugctlmsr(next->debugctlmsr);
|
||||
|
||||
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
||||
set_debugreg(next->debugreg0, 0);
|
||||
set_debugreg(next->debugreg1, 1);
|
||||
set_debugreg(next->debugreg2, 2);
|
||||
set_debugreg(next->debugreg3, 3);
|
||||
/* no 4 and 5 */
|
||||
set_debugreg(next->debugreg6, 6);
|
||||
set_debugreg(next->debugreg7, 7);
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
||||
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
||||
/* prev and next are different */
|
||||
|
|
|
@ -58,6 +58,7 @@
|
|||
#include <asm/idle.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
|
||||
|
@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
|||
|
||||
task_user_gs(p) = get_user_gs(regs);
|
||||
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
tsk = current;
|
||||
err = -ENOMEM;
|
||||
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
||||
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
|
||||
IO_BITMAP_BYTES, GFP_KERNEL);
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <asm/idle.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
asmlinkage extern void ret_from_fork(void);
|
||||
|
||||
|
@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
|||
|
||||
p->thread.fs = me->thread.fs;
|
||||
p->thread.gs = me->thread.gs;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
savesegment(gs, p->thread.gsindex);
|
||||
savesegment(fs, p->thread.fsindex);
|
||||
savesegment(es, p->thread.es);
|
||||
savesegment(ds, p->thread.ds);
|
||||
|
||||
err = -ENOMEM;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
||||
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
||||
if (!p->thread.io_bitmap_ptr) {
|
||||
|
@ -341,6 +346,7 @@ out:
|
|||
kfree(p->thread.io_bitmap_ptr);
|
||||
p->thread.io_bitmap_max = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
*/
|
||||
if (preload_fpu)
|
||||
__math_state_restore();
|
||||
|
||||
return prev_p;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include <linux/seccomp.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
@ -34,6 +36,7 @@
|
|||
#include <asm/prctl.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
|
@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long debugreg_addr_limit(struct task_struct *task)
|
||||
{
|
||||
return TASK_SIZE - 3;
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
|
||||
|
@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long debugreg_addr_limit(struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_tsk_thread_flag(task, TIF_IA32))
|
||||
return IA32_PAGE_OFFSET - 3;
|
||||
#endif
|
||||
return TASK_SIZE_MAX - 7;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static unsigned long get_flags(struct task_struct *task)
|
||||
|
@ -566,96 +555,226 @@ static int genregs_set(struct task_struct *target,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is trivial and will be inlined by the compiler.
|
||||
* Having it separates the implementation details of debug
|
||||
* registers from the interface details of ptrace.
|
||||
*/
|
||||
static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
|
||||
static void ptrace_triggered(struct perf_event *bp, void *data)
|
||||
{
|
||||
switch (n) {
|
||||
case 0: return child->thread.debugreg0;
|
||||
case 1: return child->thread.debugreg1;
|
||||
case 2: return child->thread.debugreg2;
|
||||
case 3: return child->thread.debugreg3;
|
||||
case 6: return child->thread.debugreg6;
|
||||
case 7: return child->thread.debugreg7;
|
||||
int i;
|
||||
struct thread_struct *thread = &(current->thread);
|
||||
|
||||
/*
|
||||
* Store in the virtual DR6 register the fact that the breakpoint
|
||||
* was hit so the thread's debugger will see it.
|
||||
*/
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
if (thread->ptrace_bps[i] == bp)
|
||||
break;
|
||||
}
|
||||
|
||||
thread->debugreg6 |= (DR_TRAP0 << i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk through every ptrace breakpoints for this thread and
|
||||
* build the dr7 value on top of their attributes.
|
||||
*
|
||||
*/
|
||||
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
|
||||
{
|
||||
int i;
|
||||
int dr7 = 0;
|
||||
struct arch_hw_breakpoint *info;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
if (bp[i] && !bp[i]->attr.disabled) {
|
||||
info = counter_arch_bp(bp[i]);
|
||||
dr7 |= encode_dr7(i, info->len, info->type);
|
||||
}
|
||||
}
|
||||
|
||||
return dr7;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle ptrace writes to debug register 7.
|
||||
*/
|
||||
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
|
||||
{
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
unsigned long old_dr7;
|
||||
int i, orig_ret = 0, rc = 0;
|
||||
int enabled, second_pass = 0;
|
||||
unsigned len, type;
|
||||
int gen_len, gen_type;
|
||||
struct perf_event *bp;
|
||||
|
||||
data &= ~DR_CONTROL_RESERVED;
|
||||
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
|
||||
restore:
|
||||
/*
|
||||
* Loop through all the hardware breakpoints, making the
|
||||
* appropriate changes to each.
|
||||
*/
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
enabled = decode_dr7(data, i, &len, &type);
|
||||
bp = thread->ptrace_bps[i];
|
||||
|
||||
if (!enabled) {
|
||||
if (bp) {
|
||||
/*
|
||||
* Don't unregister the breakpoints right-away,
|
||||
* unless all register_user_hw_breakpoint()
|
||||
* requests have succeeded. This prevents
|
||||
* any window of opportunity for debug
|
||||
* register grabbing by other users.
|
||||
*/
|
||||
if (!second_pass)
|
||||
continue;
|
||||
thread->ptrace_bps[i] = NULL;
|
||||
unregister_hw_breakpoint(bp);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We shoud have at least an inactive breakpoint at this
|
||||
* slot. It means the user is writing dr7 without having
|
||||
* written the address register first
|
||||
*/
|
||||
if (!bp) {
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
|
||||
if (rc)
|
||||
break;
|
||||
|
||||
/*
|
||||
* This is a temporary thing as bp is unregistered/registered
|
||||
* to simulate modification
|
||||
*/
|
||||
bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
|
||||
gen_type, bp->callback,
|
||||
tsk, true);
|
||||
thread->ptrace_bps[i] = NULL;
|
||||
|
||||
if (!bp) { /* incorrect bp, or we have a bug in bp API */
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (IS_ERR(bp)) {
|
||||
rc = PTR_ERR(bp);
|
||||
bp = NULL;
|
||||
break;
|
||||
}
|
||||
thread->ptrace_bps[i] = bp;
|
||||
}
|
||||
/*
|
||||
* Make a second pass to free the remaining unused breakpoints
|
||||
* or to restore the original breakpoints if an error occurred.
|
||||
*/
|
||||
if (!second_pass) {
|
||||
second_pass = 1;
|
||||
if (rc < 0) {
|
||||
orig_ret = rc;
|
||||
data = old_dr7;
|
||||
}
|
||||
goto restore;
|
||||
}
|
||||
return ((orig_ret < 0) ? orig_ret : rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle PTRACE_PEEKUSR calls for the debug register area.
|
||||
*/
|
||||
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
|
||||
{
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
unsigned long val = 0;
|
||||
|
||||
if (n < HBP_NUM) {
|
||||
struct perf_event *bp;
|
||||
bp = thread->ptrace_bps[n];
|
||||
if (!bp)
|
||||
return 0;
|
||||
val = bp->hw.info.address;
|
||||
} else if (n == 6) {
|
||||
val = thread->debugreg6;
|
||||
} else if (n == 7) {
|
||||
val = ptrace_get_dr7(thread->ptrace_bps);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
|
||||
unsigned long addr)
|
||||
{
|
||||
struct perf_event *bp;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
|
||||
if (!t->ptrace_bps[nr]) {
|
||||
/*
|
||||
* Put stub len and type to register (reserve) an inactive but
|
||||
* correct bp
|
||||
*/
|
||||
bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
|
||||
HW_BREAKPOINT_W,
|
||||
ptrace_triggered, tsk,
|
||||
false);
|
||||
} else {
|
||||
bp = t->ptrace_bps[nr];
|
||||
t->ptrace_bps[nr] = NULL;
|
||||
bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
|
||||
bp->attr.bp_type,
|
||||
bp->callback,
|
||||
tsk,
|
||||
bp->attr.disabled);
|
||||
}
|
||||
|
||||
if (!bp)
|
||||
return -EIO;
|
||||
/*
|
||||
* CHECKME: the previous code returned -EIO if the addr wasn't a
|
||||
* valid task virtual addr. The new one will return -EINVAL in this
|
||||
* case.
|
||||
* -EINVAL may be what we want for in-kernel breakpoints users, but
|
||||
* -EIO looks better for ptrace, since we refuse a register writing
|
||||
* for the user. And anyway this is the previous behaviour.
|
||||
*/
|
||||
if (IS_ERR(bp))
|
||||
return PTR_ERR(bp);
|
||||
|
||||
t->ptrace_bps[nr] = bp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptrace_set_debugreg(struct task_struct *child,
|
||||
int n, unsigned long data)
|
||||
/*
|
||||
* Handle PTRACE_POKEUSR calls for the debug register area.
|
||||
*/
|
||||
int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
|
||||
{
|
||||
int i;
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
int rc = 0;
|
||||
|
||||
if (unlikely(n == 4 || n == 5))
|
||||
/* There are no DR4 or DR5 registers */
|
||||
if (n == 4 || n == 5)
|
||||
return -EIO;
|
||||
|
||||
if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
|
||||
return -EIO;
|
||||
|
||||
switch (n) {
|
||||
case 0: child->thread.debugreg0 = data; break;
|
||||
case 1: child->thread.debugreg1 = data; break;
|
||||
case 2: child->thread.debugreg2 = data; break;
|
||||
case 3: child->thread.debugreg3 = data; break;
|
||||
|
||||
case 6:
|
||||
if ((data & ~0xffffffffUL) != 0)
|
||||
return -EIO;
|
||||
child->thread.debugreg6 = data;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
/*
|
||||
* Sanity-check data. Take one half-byte at once with
|
||||
* check = (val >> (16 + 4*i)) & 0xf. It contains the
|
||||
* R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
|
||||
* 2 and 3 are LENi. Given a list of invalid values,
|
||||
* we do mask |= 1 << invalid_value, so that
|
||||
* (mask >> check) & 1 is a correct test for invalid
|
||||
* values.
|
||||
*
|
||||
* R/Wi contains the type of the breakpoint /
|
||||
* watchpoint, LENi contains the length of the watched
|
||||
* data in the watchpoint case.
|
||||
*
|
||||
* The invalid values are:
|
||||
* - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
|
||||
* - R/Wi == 0x10 (break on I/O reads or writes), so
|
||||
* mask |= 0x4444.
|
||||
* - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
|
||||
* 0x1110.
|
||||
*
|
||||
* Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
|
||||
*
|
||||
* See the Intel Manual "System Programming Guide",
|
||||
* 15.2.4
|
||||
*
|
||||
* Note that LENi == 0x10 is defined on x86_64 in long
|
||||
* mode (i.e. even for 32-bit userspace software, but
|
||||
* 64-bit kernel), so the x86_64 mask value is 0x5454.
|
||||
* See the AMD manual no. 24593 (AMD64 System Programming)
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#define DR7_MASK 0x5f54
|
||||
#else
|
||||
#define DR7_MASK 0x5554
|
||||
#endif
|
||||
data &= ~DR_CONTROL_RESERVED;
|
||||
for (i = 0; i < 4; i++)
|
||||
if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
|
||||
return -EIO;
|
||||
child->thread.debugreg7 = data;
|
||||
if (data)
|
||||
set_tsk_thread_flag(child, TIF_DEBUG);
|
||||
else
|
||||
clear_tsk_thread_flag(child, TIF_DEBUG);
|
||||
break;
|
||||
if (n == 6) {
|
||||
thread->debugreg6 = val;
|
||||
goto ret_path;
|
||||
}
|
||||
if (n < HBP_NUM) {
|
||||
rc = ptrace_set_breakpoint_addr(tsk, n, val);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
/* All that's left is DR7 */
|
||||
if (n == 7)
|
||||
rc = ptrace_write_dr7(tsk, val);
|
||||
|
||||
return 0;
|
||||
ret_path:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
|
|||
|
||||
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
|
||||
if (signr > 0) {
|
||||
/*
|
||||
* Re-enable any watchpoints before delivering the
|
||||
* signal to user space. The processor register will
|
||||
* have been cleared if the watchpoint triggered
|
||||
* inside the kernel.
|
||||
*/
|
||||
if (current->thread.debugreg7)
|
||||
set_debugreg(current->thread.debugreg7, 7);
|
||||
|
||||
/* Whee! Actually deliver the signal. */
|
||||
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
|
||||
/*
|
||||
|
|
|
@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
|
|||
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long condition;
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
get_debugreg(condition, 6);
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
if (condition & DR_STEP && kmemcheck_trap(regs))
|
||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
||||
return;
|
||||
|
||||
/* DR6 may or may not be cleared by the CPU */
|
||||
set_debugreg(0, 6);
|
||||
/*
|
||||
* The processor cleared BTF, so don't mark that we need it set.
|
||||
*/
|
||||
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
||||
tsk->thread.debugctlmsr = 0;
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
/* Store the virtualized DR6 value */
|
||||
tsk->thread.debugreg6 = dr6;
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
preempt_conditional_sti(regs);
|
||||
|
||||
/* Mask out spurious debug traps due to lazy DR7 setting */
|
||||
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
||||
if (!tsk->thread.debugreg7)
|
||||
goto clear_dr7;
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
||||
error_code, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
goto debug_vm86;
|
||||
#endif
|
||||
|
||||
/* Save debug status register where ptrace can see it */
|
||||
tsk->thread.debugreg6 = condition;
|
||||
|
||||
/*
|
||||
* Single-stepping through TF: make sure we ignore any events in
|
||||
* kernel space (but re-enable TF when returning to user mode).
|
||||
* Single-stepping through system calls: ignore any exceptions in
|
||||
* kernel space, but re-enable TF when returning to user mode.
|
||||
*
|
||||
* We already checked v86 mode above, so we can check for kernel mode
|
||||
* by just checking the CPL of CS.
|
||||
*/
|
||||
if (condition & DR_STEP) {
|
||||
if (!user_mode(regs))
|
||||
goto clear_TF_reenable;
|
||||
if ((dr6 & DR_STEP) && !user_mode(regs)) {
|
||||
tsk->thread.debugreg6 &= ~DR_STEP;
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
|
||||
si_code = get_si_code(condition);
|
||||
/* Ok, finally something we can handle */
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
|
||||
/*
|
||||
* Disable additional traps. They'll be re-enabled when
|
||||
* the signal is delivered.
|
||||
*/
|
||||
clear_dr7:
|
||||
set_debugreg(0, 7);
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
preempt_conditional_cli(regs);
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
debug_vm86:
|
||||
/* reenable preemption: handle_vm86_trap() might sleep */
|
||||
dec_preempt_count();
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
||||
conditional_cli(regs);
|
||||
return;
|
||||
#endif
|
||||
|
||||
clear_TF_reenable:
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
preempt_conditional_cli(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/desc.h>
|
||||
|
@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
trace_kvm_entry(vcpu->vcpu_id);
|
||||
kvm_x86_ops->run(vcpu, kvm_run);
|
||||
|
||||
if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
|
||||
set_debugreg(current->thread.debugreg0, 0);
|
||||
set_debugreg(current->thread.debugreg1, 1);
|
||||
set_debugreg(current->thread.debugreg2, 2);
|
||||
set_debugreg(current->thread.debugreg3, 3);
|
||||
set_debugreg(current->thread.debugreg6, 6);
|
||||
set_debugreg(current->thread.debugreg7, 7);
|
||||
}
|
||||
/*
|
||||
* If the guest has used debug registers, at least dr7
|
||||
* will be disabled while returning to the host.
|
||||
* If we don't have active breakpoints in the host, we don't
|
||||
* care about the messed up debug address registers. But if
|
||||
* we have some of them active, restore the old state.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
|
||||
set_bit(KVM_REQ_KICK, &vcpu->requests);
|
||||
local_irq_enable();
|
||||
|
|
|
@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
|
|||
struct die_args *arg = args;
|
||||
|
||||
if (val == DIE_DEBUG && (arg->err & DR_STEP))
|
||||
if (post_kmmio_handler(arg->err, arg->regs) == 1)
|
||||
if (post_kmmio_handler(arg->err, arg->regs) == 1) {
|
||||
/*
|
||||
* Reset the BS bit in dr6 (pointed by args->err) to
|
||||
* denote completion of processing
|
||||
*/
|
||||
(*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/mce.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/suspend.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static struct saved_context saved_context;
|
||||
|
@ -142,31 +143,6 @@ static void fix_processor_context(void)
|
|||
#endif
|
||||
load_TR_desc(); /* This does ltr */
|
||||
load_LDT(¤t->active_mm->context); /* This does lldt */
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers
|
||||
*/
|
||||
if (current->thread.debugreg7) {
|
||||
#ifdef CONFIG_X86_32
|
||||
set_debugreg(current->thread.debugreg0, 0);
|
||||
set_debugreg(current->thread.debugreg1, 1);
|
||||
set_debugreg(current->thread.debugreg2, 2);
|
||||
set_debugreg(current->thread.debugreg3, 3);
|
||||
/* no 4 and 5 */
|
||||
set_debugreg(current->thread.debugreg6, 6);
|
||||
set_debugreg(current->thread.debugreg7, 7);
|
||||
#else
|
||||
/* CONFIG_X86_64 */
|
||||
loaddebug(¤t->thread, 0);
|
||||
loaddebug(¤t->thread, 1);
|
||||
loaddebug(¤t->thread, 2);
|
||||
loaddebug(¤t->thread, 3);
|
||||
/* no 4 and 5 */
|
||||
loaddebug(¤t->thread, 6);
|
||||
loaddebug(¤t->thread, 7);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
#ifndef _LINUX_HW_BREAKPOINT_H
|
||||
#define _LINUX_HW_BREAKPOINT_H
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
enum {
|
||||
HW_BREAKPOINT_LEN_1 = 1,
|
||||
HW_BREAKPOINT_LEN_2 = 2,
|
||||
HW_BREAKPOINT_LEN_4 = 4,
|
||||
HW_BREAKPOINT_LEN_8 = 8,
|
||||
};
|
||||
|
||||
enum {
|
||||
HW_BREAKPOINT_R = 1,
|
||||
HW_BREAKPOINT_W = 2,
|
||||
HW_BREAKPOINT_X = 4,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
|
||||
static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
|
||||
{
|
||||
return bp->attr.bp_addr;
|
||||
}
|
||||
|
||||
static inline int hw_breakpoint_type(struct perf_event *bp)
|
||||
{
|
||||
return bp->attr.bp_type;
|
||||
}
|
||||
|
||||
static inline int hw_breakpoint_len(struct perf_event *bp)
|
||||
{
|
||||
return bp->attr.bp_len;
|
||||
}
|
||||
|
||||
extern struct perf_event *
|
||||
register_user_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active);
|
||||
|
||||
/* FIXME: only change from the attr, and don't unregister */
|
||||
extern struct perf_event *
|
||||
modify_user_hw_breakpoint(struct perf_event *bp,
|
||||
unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active);
|
||||
|
||||
/*
|
||||
* Kernel breakpoints are not associated with any particular thread.
|
||||
*/
|
||||
extern struct perf_event *
|
||||
register_wide_hw_breakpoint_cpu(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
int cpu,
|
||||
bool active);
|
||||
|
||||
extern struct perf_event **
|
||||
register_wide_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
bool active);
|
||||
|
||||
extern int register_perf_hw_breakpoint(struct perf_event *bp);
|
||||
extern int __register_perf_hw_breakpoint(struct perf_event *bp);
|
||||
extern void unregister_hw_breakpoint(struct perf_event *bp);
|
||||
extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
|
||||
|
||||
extern int reserve_bp_slot(struct perf_event *bp);
|
||||
extern void release_bp_slot(struct perf_event *bp);
|
||||
|
||||
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
|
||||
|
||||
static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
|
||||
{
|
||||
return &bp->hw.info;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_HAVE_HW_BREAKPOINT */
|
||||
|
||||
static inline struct perf_event *
|
||||
register_user_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active) { return NULL; }
|
||||
static inline struct perf_event *
|
||||
modify_user_hw_breakpoint(struct perf_event *bp,
|
||||
unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active) { return NULL; }
|
||||
static inline struct perf_event *
|
||||
register_wide_hw_breakpoint_cpu(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
int cpu,
|
||||
bool active) { return NULL; }
|
||||
static inline struct perf_event **
|
||||
register_wide_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
bool active) { return NULL; }
|
||||
static inline int
|
||||
register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
|
||||
static inline int
|
||||
__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
|
||||
static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
|
||||
static inline void
|
||||
unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
|
||||
static inline int
|
||||
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
|
||||
static inline void release_bp_slot(struct perf_event *bp) { }
|
||||
|
||||
static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
|
||||
|
||||
static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
|
||||
|
||||
#endif /* _LINUX_HW_BREAKPOINT_H */
|
|
@ -18,6 +18,10 @@
|
|||
#include <linux/ioctl.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* User-space ABI bits:
|
||||
*/
|
||||
|
@ -31,6 +35,7 @@ enum perf_type_id {
|
|||
PERF_TYPE_TRACEPOINT = 2,
|
||||
PERF_TYPE_HW_CACHE = 3,
|
||||
PERF_TYPE_RAW = 4,
|
||||
PERF_TYPE_BREAKPOINT = 5,
|
||||
|
||||
PERF_TYPE_MAX, /* non-ABI */
|
||||
};
|
||||
|
@ -209,6 +214,15 @@ struct perf_event_attr {
|
|||
__u32 wakeup_events; /* wakeup every n events */
|
||||
__u32 wakeup_watermark; /* bytes before wakeup */
|
||||
};
|
||||
|
||||
union {
|
||||
struct { /* Hardware breakpoint info */
|
||||
__u64 bp_addr;
|
||||
__u32 bp_type;
|
||||
__u32 bp_len;
|
||||
};
|
||||
};
|
||||
|
||||
__u32 __reserved_2;
|
||||
|
||||
__u64 __reserved_3;
|
||||
|
@ -478,6 +492,11 @@ struct hw_perf_event {
|
|||
s64 remaining;
|
||||
struct hrtimer hrtimer;
|
||||
};
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
union { /* breakpoint */
|
||||
struct arch_hw_breakpoint info;
|
||||
};
|
||||
#endif
|
||||
};
|
||||
atomic64_t prev_count;
|
||||
u64 sample_period;
|
||||
|
@ -546,6 +565,8 @@ struct perf_pending_entry {
|
|||
void (*func)(struct perf_pending_entry *);
|
||||
};
|
||||
|
||||
typedef void (*perf_callback_t)(struct perf_event *, void *);
|
||||
|
||||
/**
|
||||
* struct perf_event - performance event kernel representation:
|
||||
*/
|
||||
|
@ -588,7 +609,7 @@ struct perf_event {
|
|||
u64 tstamp_running;
|
||||
u64 tstamp_stopped;
|
||||
|
||||
struct perf_event_attr attr;
|
||||
struct perf_event_attr attr;
|
||||
struct hw_perf_event hw;
|
||||
|
||||
struct perf_event_context *ctx;
|
||||
|
@ -641,6 +662,10 @@ struct perf_event {
|
|||
struct event_filter *filter;
|
||||
#endif
|
||||
|
||||
perf_callback_t callback;
|
||||
|
||||
perf_callback_t event_callback;
|
||||
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
};
|
||||
|
||||
|
@ -745,6 +770,13 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
|
|||
struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx, int cpu);
|
||||
extern void perf_event_update_userpage(struct perf_event *event);
|
||||
extern int perf_event_release_kernel(struct perf_event *event);
|
||||
extern struct perf_event *
|
||||
perf_event_create_kernel_counter(struct perf_event_attr *attr,
|
||||
int cpu,
|
||||
pid_t pid,
|
||||
perf_callback_t callback);
|
||||
extern u64 perf_event_read_value(struct perf_event *event);
|
||||
|
||||
struct perf_sample_data {
|
||||
u64 type;
|
||||
|
@ -821,6 +853,7 @@ extern int sysctl_perf_event_sample_rate;
|
|||
extern void perf_event_init(void);
|
||||
extern void perf_tp_event(int event_id, u64 addr, u64 count,
|
||||
void *record, int entry_size);
|
||||
extern void perf_bp_event(struct perf_event *event, void *data);
|
||||
|
||||
#ifndef perf_misc_flags
|
||||
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
|
||||
|
@ -855,6 +888,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
|
|||
static inline void
|
||||
perf_sw_event(u32 event_id, u64 nr, int nmi,
|
||||
struct pt_regs *regs, u64 addr) { }
|
||||
static inline void
|
||||
perf_bp_event(struct perf_event *event, void *data) { }
|
||||
|
||||
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
|
||||
static inline void perf_event_comm(struct task_struct *tsk) { }
|
||||
|
|
|
@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
|
|||
obj-$(CONFIG_SMP) += sched_cpupri.o
|
||||
obj-$(CONFIG_SLOW_WORK) += slow-work.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include <linux/init_task.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <trace/events/sched.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -977,6 +978,10 @@ NORET_TYPE void do_exit(long code)
|
|||
|
||||
proc_exit_connector(tsk);
|
||||
|
||||
/*
|
||||
* FIXME: do that only when needed, using sched_exit tracepoint
|
||||
*/
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
/*
|
||||
* Flush inherited counters to the parent - before the parent
|
||||
* gets woken up by child-exit notifications.
|
||||
|
|
|
@ -0,0 +1,494 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) 2007 Alan Stern
|
||||
* Copyright (C) IBM Corporation, 2009
|
||||
* Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
|
||||
*
|
||||
* Thanks to Ingo Molnar for his many suggestions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
|
||||
* using the CPU's debug registers.
|
||||
* This file contains the arch-independent routines.
|
||||
*/
|
||||
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/debugreg.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Constraints data
|
||||
*/
|
||||
|
||||
/* Number of pinned cpu breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
|
||||
|
||||
/* Number of pinned task breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
|
||||
|
||||
/* Number of non-pinned cpu/task breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
|
||||
|
||||
/* Gather the number of total pinned and un-pinned bp in a cpuset */
|
||||
struct bp_busy_slots {
|
||||
unsigned int pinned;
|
||||
unsigned int flexible;
|
||||
};
|
||||
|
||||
/* Serialize accesses to the above constraints */
|
||||
static DEFINE_MUTEX(nr_bp_mutex);
|
||||
|
||||
/*
|
||||
* Report the maximum number of pinned breakpoints a task
|
||||
* have in this cpu
|
||||
*/
|
||||
static unsigned int max_task_bp_pinned(int cpu)
|
||||
{
|
||||
int i;
|
||||
unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
|
||||
|
||||
for (i = HBP_NUM -1; i >= 0; i--) {
|
||||
if (tsk_pinned[i] > 0)
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Report the number of pinned/un-pinned breakpoints we have in
|
||||
* a given cpu (cpu > -1) or in all of them (cpu = -1).
|
||||
*/
|
||||
static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
|
||||
{
|
||||
if (cpu >= 0) {
|
||||
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
|
||||
slots->pinned += max_task_bp_pinned(cpu);
|
||||
slots->flexible = per_cpu(nr_bp_flexible, cpu);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
unsigned int nr;
|
||||
|
||||
nr = per_cpu(nr_cpu_bp_pinned, cpu);
|
||||
nr += max_task_bp_pinned(cpu);
|
||||
|
||||
if (nr > slots->pinned)
|
||||
slots->pinned = nr;
|
||||
|
||||
nr = per_cpu(nr_bp_flexible, cpu);
|
||||
|
||||
if (nr > slots->flexible)
|
||||
slots->flexible = nr;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a pinned breakpoint for the given task in our constraint table
|
||||
*/
|
||||
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
|
||||
{
|
||||
int count = 0;
|
||||
struct perf_event *bp;
|
||||
struct perf_event_context *ctx = tsk->perf_event_ctxp;
|
||||
unsigned int *task_bp_pinned;
|
||||
struct list_head *list;
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ONCE(!ctx, "No perf context for this task"))
|
||||
return;
|
||||
|
||||
list = &ctx->event_list;
|
||||
|
||||
spin_lock_irqsave(&ctx->lock, flags);
|
||||
|
||||
/*
|
||||
* The current breakpoint counter is not included in the list
|
||||
* at the open() callback time
|
||||
*/
|
||||
list_for_each_entry(bp, list, event_entry) {
|
||||
if (bp->attr.type == PERF_TYPE_BREAKPOINT)
|
||||
count++;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&ctx->lock, flags);
|
||||
|
||||
if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
|
||||
return;
|
||||
|
||||
task_bp_pinned = per_cpu(task_bp_pinned, cpu);
|
||||
if (enable) {
|
||||
task_bp_pinned[count]++;
|
||||
if (count > 0)
|
||||
task_bp_pinned[count-1]--;
|
||||
} else {
|
||||
task_bp_pinned[count]--;
|
||||
if (count > 0)
|
||||
task_bp_pinned[count-1]++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add/remove the given breakpoint in our constraint table
|
||||
*/
|
||||
static void toggle_bp_slot(struct perf_event *bp, bool enable)
|
||||
{
|
||||
int cpu = bp->cpu;
|
||||
struct task_struct *tsk = bp->ctx->task;
|
||||
|
||||
/* Pinned counter task profiling */
|
||||
if (tsk) {
|
||||
if (cpu >= 0) {
|
||||
toggle_bp_task_slot(tsk, cpu, enable);
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
toggle_bp_task_slot(tsk, cpu, enable);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pinned counter cpu profiling */
|
||||
if (enable)
|
||||
per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
|
||||
else
|
||||
per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Contraints to check before allowing this new breakpoint counter:
|
||||
*
|
||||
* == Non-pinned counter == (Considered as pinned for now)
|
||||
*
|
||||
* - If attached to a single cpu, check:
|
||||
*
|
||||
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
|
||||
* + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
|
||||
*
|
||||
* -> If there are already non-pinned counters in this cpu, it means
|
||||
* there is already a free slot for them.
|
||||
* Otherwise, we check that the maximum number of per task
|
||||
* breakpoints (for this cpu) plus the number of per cpu breakpoint
|
||||
* (for this cpu) doesn't cover every registers.
|
||||
*
|
||||
* - If attached to every cpus, check:
|
||||
*
|
||||
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
|
||||
* + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
|
||||
*
|
||||
* -> This is roughly the same, except we check the number of per cpu
|
||||
* bp for every cpu and we keep the max one. Same for the per tasks
|
||||
* breakpoints.
|
||||
*
|
||||
*
|
||||
* == Pinned counter ==
|
||||
*
|
||||
* - If attached to a single cpu, check:
|
||||
*
|
||||
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
|
||||
* + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
|
||||
*
|
||||
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep
|
||||
* one register at least (or they will never be fed).
|
||||
*
|
||||
* - If attached to every cpus, check:
|
||||
*
|
||||
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
|
||||
* + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
|
||||
*/
|
||||
int reserve_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
struct bp_busy_slots slots = {0};
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&nr_bp_mutex);
|
||||
|
||||
fetch_bp_busy_slots(&slots, bp->cpu);
|
||||
|
||||
/* Flexible counters need to keep at least one slot */
|
||||
if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
|
||||
ret = -ENOSPC;
|
||||
goto end;
|
||||
}
|
||||
|
||||
toggle_bp_slot(bp, true);
|
||||
|
||||
end:
|
||||
mutex_unlock(&nr_bp_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void release_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
mutex_lock(&nr_bp_mutex);
|
||||
|
||||
toggle_bp_slot(bp, false);
|
||||
|
||||
mutex_unlock(&nr_bp_mutex);
|
||||
}
|
||||
|
||||
|
||||
int __register_perf_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = reserve_bp_slot(bp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bp->attr.disabled)
|
||||
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int register_perf_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
bp->callback = perf_bp_event;
|
||||
|
||||
return __register_perf_hw_breakpoint(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a breakpoint bound to a task and a given cpu.
|
||||
* If cpu is -1, the breakpoint is active for the task in every cpu
|
||||
* If the task is -1, the breakpoint is active for every tasks in the given
|
||||
* cpu.
|
||||
*/
|
||||
static struct perf_event *
|
||||
register_user_hw_breakpoint_cpu(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
pid_t pid,
|
||||
int cpu,
|
||||
bool active)
|
||||
{
|
||||
struct perf_event_attr *attr;
|
||||
struct perf_event *bp;
|
||||
|
||||
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
|
||||
if (!attr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
attr->type = PERF_TYPE_BREAKPOINT;
|
||||
attr->size = sizeof(*attr);
|
||||
attr->bp_addr = addr;
|
||||
attr->bp_len = len;
|
||||
attr->bp_type = type;
|
||||
/*
|
||||
* Such breakpoints are used by debuggers to trigger signals when
|
||||
* we hit the excepted memory op. We can't miss such events, they
|
||||
* must be pinned.
|
||||
*/
|
||||
attr->pinned = 1;
|
||||
|
||||
if (!active)
|
||||
attr->disabled = 1;
|
||||
|
||||
bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
|
||||
kfree(attr);
|
||||
|
||||
return bp;
|
||||
}
|
||||
|
||||
/**
|
||||
* register_user_hw_breakpoint - register a hardware breakpoint for user space
|
||||
* @addr: is the memory address that triggers the breakpoint
|
||||
* @len: the length of the access to the memory (1 byte, 2 bytes etc...)
|
||||
* @type: the type of the access to the memory (read/write/exec)
|
||||
* @triggered: callback to trigger when we hit the breakpoint
|
||||
* @tsk: pointer to 'task_struct' of the process to which the address belongs
|
||||
* @active: should we activate it while registering it
|
||||
*
|
||||
*/
|
||||
struct perf_event *
|
||||
register_user_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active)
|
||||
{
|
||||
return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
|
||||
tsk->pid, -1, active);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
|
||||
|
||||
/**
|
||||
* modify_user_hw_breakpoint - modify a user-space hardware breakpoint
|
||||
* @bp: the breakpoint structure to modify
|
||||
* @addr: is the memory address that triggers the breakpoint
|
||||
* @len: the length of the access to the memory (1 byte, 2 bytes etc...)
|
||||
* @type: the type of the access to the memory (read/write/exec)
|
||||
* @triggered: callback to trigger when we hit the breakpoint
|
||||
* @tsk: pointer to 'task_struct' of the process to which the address belongs
|
||||
* @active: should we activate it while registering it
|
||||
*/
|
||||
struct perf_event *
|
||||
modify_user_hw_breakpoint(struct perf_event *bp,
|
||||
unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
struct task_struct *tsk,
|
||||
bool active)
|
||||
{
|
||||
/*
|
||||
* FIXME: do it without unregistering
|
||||
* - We don't want to lose our slot
|
||||
* - If the new bp is incorrect, don't lose the older one
|
||||
*/
|
||||
unregister_hw_breakpoint(bp);
|
||||
|
||||
return register_user_hw_breakpoint(addr, len, type, triggered,
|
||||
tsk, active);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
|
||||
|
||||
/**
|
||||
* unregister_hw_breakpoint - unregister a user-space hardware breakpoint
|
||||
* @bp: the breakpoint structure to unregister
|
||||
*/
|
||||
void unregister_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
if (!bp)
|
||||
return;
|
||||
perf_event_release_kernel(bp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
|
||||
|
||||
static struct perf_event *
|
||||
register_kernel_hw_breakpoint_cpu(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
int cpu,
|
||||
bool active)
|
||||
{
|
||||
return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
|
||||
-1, cpu, active);
|
||||
}
|
||||
|
||||
/**
|
||||
* register_wide_hw_breakpoint - register a wide breakpoint in the kernel
|
||||
* @addr: is the memory address that triggers the breakpoint
|
||||
* @len: the length of the access to the memory (1 byte, 2 bytes etc...)
|
||||
* @type: the type of the access to the memory (read/write/exec)
|
||||
* @triggered: callback to trigger when we hit the breakpoint
|
||||
* @active: should we activate it while registering it
|
||||
*
|
||||
* @return a set of per_cpu pointers to perf events
|
||||
*/
|
||||
struct perf_event **
|
||||
register_wide_hw_breakpoint(unsigned long addr,
|
||||
int len,
|
||||
int type,
|
||||
perf_callback_t triggered,
|
||||
bool active)
|
||||
{
|
||||
struct perf_event **cpu_events, **pevent, *bp;
|
||||
long err;
|
||||
int cpu;
|
||||
|
||||
cpu_events = alloc_percpu(typeof(*cpu_events));
|
||||
if (!cpu_events)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
|
||||
triggered, cpu, active);
|
||||
|
||||
*pevent = bp;
|
||||
|
||||
if (IS_ERR(bp) || !bp) {
|
||||
err = PTR_ERR(bp);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
return cpu_events;
|
||||
|
||||
fail:
|
||||
for_each_possible_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
if (IS_ERR(*pevent) || !*pevent)
|
||||
break;
|
||||
unregister_hw_breakpoint(*pevent);
|
||||
}
|
||||
free_percpu(cpu_events);
|
||||
/* return the error if any */
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
|
||||
|
||||
/**
|
||||
* unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
|
||||
* @cpu_events: the per cpu set of events to unregister
|
||||
*/
|
||||
void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
|
||||
{
|
||||
int cpu;
|
||||
struct perf_event **pevent;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
unregister_hw_breakpoint(*pevent);
|
||||
}
|
||||
free_percpu(cpu_events);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
|
||||
|
||||
static struct notifier_block hw_breakpoint_exceptions_nb = {
|
||||
.notifier_call = hw_breakpoint_exceptions_notify,
|
||||
/* we need to be notified first */
|
||||
.priority = 0x7fffffff
|
||||
};
|
||||
|
||||
static int __init init_hw_breakpoint(void)
|
||||
{
|
||||
return register_die_notifier(&hw_breakpoint_exceptions_nb);
|
||||
}
|
||||
core_initcall(init_hw_breakpoint);
|
||||
|
||||
|
||||
struct pmu perf_ops_bp = {
|
||||
.enable = arch_install_hw_breakpoint,
|
||||
.disable = arch_uninstall_hw_breakpoint,
|
||||
.read = hw_breakpoint_pmu_read,
|
||||
.unthrottle = hw_breakpoint_pmu_unthrottle
|
||||
};
|
|
@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
|
|||
}
|
||||
return module_kallsyms_lookup_name(name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
|
||||
|
||||
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
|
||||
unsigned long),
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <linux/kernel_stat.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ftrace_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
|
@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
perf_event_remove_from_context(event);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
mutex_lock(&event->owner->perf_event_mutex);
|
||||
list_del_init(&event->owner_entry);
|
||||
mutex_unlock(&event->owner->perf_event_mutex);
|
||||
put_task_struct(event->owner);
|
||||
|
||||
free_event(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
|
||||
static int perf_event_read_size(struct perf_event *event)
|
||||
{
|
||||
int entry = sizeof(u64); /* value */
|
||||
|
@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event)
|
|||
return size;
|
||||
}
|
||||
|
||||
static u64 perf_event_read_value(struct perf_event *event)
|
||||
u64 perf_event_read_value(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *child;
|
||||
u64 total = 0;
|
||||
|
@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event)
|
|||
|
||||
return total;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_read_value);
|
||||
|
||||
static int perf_event_read_entry(struct perf_event *event,
|
||||
u64 read_format, char __user *buf)
|
||||
|
@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event)
|
|||
|
||||
#endif /* CONFIG_EVENT_PROFILE */
|
||||
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
static void bp_perf_event_destroy(struct perf_event *event)
|
||||
{
|
||||
release_bp_slot(event);
|
||||
}
|
||||
|
||||
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* The breakpoint is already filled if we haven't created the counter
|
||||
* through perf syscall
|
||||
* FIXME: manage to get trigerred to NULL if it comes from syscalls
|
||||
*/
|
||||
if (!bp->callback)
|
||||
err = register_perf_hw_breakpoint(bp);
|
||||
else
|
||||
err = __register_perf_hw_breakpoint(bp);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
bp->destroy = bp_perf_event_destroy;
|
||||
|
||||
return &perf_ops_bp;
|
||||
}
|
||||
|
||||
void perf_bp_event(struct perf_event *bp, void *regs)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
#else
|
||||
static void bp_perf_event_destroy(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
|
||||
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void perf_bp_event(struct perf_event *bp, void *regs)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
|
||||
|
||||
static void sw_perf_event_destroy(struct perf_event *event)
|
||||
|
@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr,
|
|||
struct perf_event_context *ctx,
|
||||
struct perf_event *group_leader,
|
||||
struct perf_event *parent_event,
|
||||
perf_callback_t callback,
|
||||
gfp_t gfpflags)
|
||||
{
|
||||
const struct pmu *pmu;
|
||||
|
@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr,
|
|||
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
|
||||
if (!callback && parent_event)
|
||||
callback = parent_event->callback;
|
||||
|
||||
event->callback = callback;
|
||||
|
||||
if (attr->disabled)
|
||||
event->state = PERF_EVENT_STATE_OFF;
|
||||
|
||||
|
@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr,
|
|||
pmu = tp_perf_event_init(event);
|
||||
break;
|
||||
|
||||
case PERF_TYPE_BREAKPOINT:
|
||||
pmu = bp_perf_event_init(event);
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
}
|
||||
|
||||
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
|
||||
NULL, GFP_KERNEL);
|
||||
NULL, NULL, GFP_KERNEL);
|
||||
err = PTR_ERR(event);
|
||||
if (IS_ERR(event))
|
||||
goto err_put_context;
|
||||
|
@ -4663,6 +4741,58 @@ err_put_context:
|
|||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* perf_event_create_kernel_counter
|
||||
*
|
||||
* @attr: attributes of the counter to create
|
||||
* @cpu: cpu in which the counter is bound
|
||||
* @pid: task to profile
|
||||
*/
|
||||
struct perf_event *
|
||||
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
pid_t pid, perf_callback_t callback)
|
||||
{
|
||||
struct perf_event *event;
|
||||
struct perf_event_context *ctx;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Get the target context (task or percpu):
|
||||
*/
|
||||
|
||||
ctx = find_get_context(pid, cpu);
|
||||
if (IS_ERR(ctx))
|
||||
return NULL;
|
||||
|
||||
event = perf_event_alloc(attr, cpu, ctx, NULL,
|
||||
NULL, callback, GFP_KERNEL);
|
||||
err = PTR_ERR(event);
|
||||
if (IS_ERR(event))
|
||||
goto err_put_context;
|
||||
|
||||
event->filp = NULL;
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
++ctx->generation;
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
event->owner = current;
|
||||
get_task_struct(current);
|
||||
mutex_lock(¤t->perf_event_mutex);
|
||||
list_add_tail(&event->owner_entry, ¤t->perf_event_list);
|
||||
mutex_unlock(¤t->perf_event_mutex);
|
||||
|
||||
return event;
|
||||
|
||||
err_put_context:
|
||||
if (err < 0)
|
||||
put_ctx(ctx);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
|
||||
|
||||
/*
|
||||
* inherit a event from parent task to child task:
|
||||
*/
|
||||
|
@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event,
|
|||
child_event = perf_event_alloc(&parent_event->attr,
|
||||
parent_event->cpu, child_ctx,
|
||||
group_leader, parent_event,
|
||||
GFP_KERNEL);
|
||||
NULL, GFP_KERNEL);
|
||||
if (IS_ERR(child_event))
|
||||
return child_event;
|
||||
get_ctx(child_ctx);
|
||||
|
|
|
@ -339,6 +339,27 @@ config POWER_TRACER
|
|||
power management decisions, specifically the C-state and P-state
|
||||
behavior.
|
||||
|
||||
config KSYM_TRACER
|
||||
bool "Trace read and write access on kernel memory locations"
|
||||
depends on HAVE_HW_BREAKPOINT
|
||||
select TRACING
|
||||
help
|
||||
This tracer helps find read and write operations on any given kernel
|
||||
symbol i.e. /proc/kallsyms.
|
||||
|
||||
config PROFILE_KSYM_TRACER
|
||||
bool "Profile all kernel memory accesses on 'watched' variables"
|
||||
depends on KSYM_TRACER
|
||||
help
|
||||
This tracer profiles kernel accesses on variables watched through the
|
||||
ksym tracer ftrace plugin. Depending upon the hardware, all read
|
||||
and write operations on kernel variables can be monitored for
|
||||
accesses.
|
||||
|
||||
The results will be displayed in:
|
||||
/debugfs/tracing/profile_ksym
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config STACK_TRACER
|
||||
bool "Trace max stack"
|
||||
|
|
|
@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
|
|||
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
|
||||
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
|
||||
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
|
||||
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
|
||||
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
|
||||
|
||||
libftrace-y := ftrace.o
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/ftrace.h>
|
||||
#include <trace/boot.h>
|
||||
#include <linux/kmemtrace.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <linux/trace_seq.h>
|
||||
#include <linux/ftrace_event.h>
|
||||
|
@ -37,6 +38,7 @@ enum trace_type {
|
|||
TRACE_KMEM_ALLOC,
|
||||
TRACE_KMEM_FREE,
|
||||
TRACE_BLK,
|
||||
TRACE_KSYM,
|
||||
|
||||
__TRACE_LAST_TYPE,
|
||||
};
|
||||
|
@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
|
|||
TRACE_KMEM_ALLOC); \
|
||||
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
|
||||
TRACE_KMEM_FREE); \
|
||||
IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
|
||||
__ftrace_bad_type(); \
|
||||
} while (0)
|
||||
|
||||
|
@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
|
|||
void unregister_tracer(struct tracer *type);
|
||||
int is_tracing_stopped(void);
|
||||
|
||||
extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
|
||||
|
||||
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
|
@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
|
|||
struct trace_array *tr);
|
||||
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
|
||||
struct trace_array *tr);
|
||||
extern int trace_selftest_startup_ksym(struct tracer *trace,
|
||||
struct trace_array *tr);
|
||||
#endif /* CONFIG_FTRACE_STARTUP_TEST */
|
||||
|
||||
extern void *head_page(struct trace_array_cpu *data);
|
||||
|
|
|
@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
|
|||
F_printk("type:%u call_site:%lx ptr:%p",
|
||||
__entry->type_id, __entry->call_site, __entry->ptr)
|
||||
);
|
||||
|
||||
FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
|
||||
|
||||
TRACE_KSYM,
|
||||
|
||||
F_STRUCT(
|
||||
__field( unsigned long, ip )
|
||||
__field( unsigned char, type )
|
||||
__array( char , cmd, TASK_COMM_LEN )
|
||||
__field( unsigned long, addr )
|
||||
),
|
||||
|
||||
F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
|
||||
(void *)__entry->ip, (unsigned int)__entry->type,
|
||||
(void *)__entry->addr, __entry->cmd)
|
||||
);
|
||||
|
|
|
@ -0,0 +1,554 @@
|
|||
/*
|
||||
* trace_ksym.c - Kernel Symbol Tracer
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2009
|
||||
*/
|
||||
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "trace_output.h"
|
||||
#include "trace_stat.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
/*
|
||||
* For now, let us restrict the no. of symbols traced simultaneously to number
|
||||
* of available hardware breakpoint registers.
|
||||
*/
|
||||
#define KSYM_TRACER_MAX HBP_NUM
|
||||
|
||||
#define KSYM_TRACER_OP_LEN 3 /* rw- */
|
||||
|
||||
struct trace_ksym {
|
||||
struct perf_event **ksym_hbp;
|
||||
unsigned long ksym_addr;
|
||||
int type;
|
||||
int len;
|
||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||
unsigned long counter;
|
||||
#endif
|
||||
struct hlist_node ksym_hlist;
|
||||
};
|
||||
|
||||
static struct trace_array *ksym_trace_array;
|
||||
|
||||
static unsigned int ksym_filter_entry_count;
|
||||
static unsigned int ksym_tracing_enabled;
|
||||
|
||||
static HLIST_HEAD(ksym_filter_head);
|
||||
|
||||
static DEFINE_MUTEX(ksym_tracer_mutex);
|
||||
|
||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||
|
||||
#define MAX_UL_INT 0xffffffff
|
||||
|
||||
void ksym_collect_stats(unsigned long hbp_hit_addr)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct trace_ksym *entry;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
|
||||
if ((entry->ksym_addr == hbp_hit_addr) &&
|
||||
(entry->counter <= MAX_UL_INT)) {
|
||||
entry->counter++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif /* CONFIG_PROFILE_KSYM_TRACER */
|
||||
|
||||
void ksym_hbp_handler(struct perf_event *hbp, void *data)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct ksym_trace_entry *entry;
|
||||
struct pt_regs *regs = data;
|
||||
struct ring_buffer *buffer;
|
||||
int pc;
|
||||
|
||||
if (!ksym_tracing_enabled)
|
||||
return;
|
||||
|
||||
buffer = ksym_trace_array->buffer;
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
|
||||
sizeof(*entry), 0, pc);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
entry = ring_buffer_event_data(event);
|
||||
entry->ip = instruction_pointer(regs);
|
||||
entry->type = hw_breakpoint_type(hbp);
|
||||
entry->addr = hw_breakpoint_addr(hbp);
|
||||
strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
|
||||
|
||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||
ksym_collect_stats(hw_breakpoint_addr(hbp));
|
||||
#endif /* CONFIG_PROFILE_KSYM_TRACER */
|
||||
|
||||
trace_buffer_unlock_commit(buffer, event, 0, pc);
|
||||
}
|
||||
|
||||
/* Valid access types are represented as
|
||||
*
|
||||
* rw- : Set Read/Write Access Breakpoint
|
||||
* -w- : Set Write Access Breakpoint
|
||||
* --- : Clear Breakpoints
|
||||
* --x : Set Execution Break points (Not available yet)
|
||||
*
|
||||
*/
|
||||
static int ksym_trace_get_access_type(char *str)
|
||||
{
|
||||
int access = 0;
|
||||
|
||||
if (str[0] == 'r')
|
||||
access |= HW_BREAKPOINT_R;
|
||||
|
||||
if (str[1] == 'w')
|
||||
access |= HW_BREAKPOINT_W;
|
||||
|
||||
if (str[2] == 'x')
|
||||
access |= HW_BREAKPOINT_X;
|
||||
|
||||
switch (access) {
|
||||
case HW_BREAKPOINT_R:
|
||||
case HW_BREAKPOINT_W:
|
||||
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
|
||||
return access;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There can be several possible malformed requests and we attempt to capture
|
||||
* all of them. We enumerate some of the rules
|
||||
* 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
|
||||
* i.e. multiple ':' symbols disallowed. Possible uses are of the form
|
||||
* <module>:<ksym_name>:<op>.
|
||||
* 2. No delimiter symbol ':' in the input string
|
||||
* 3. Spurious operator symbols or symbols not in their respective positions
|
||||
* 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
|
||||
* 5. Kernel symbol not a part of /proc/kallsyms
|
||||
* 6. Duplicate requests
|
||||
*/
|
||||
static int parse_ksym_trace_str(char *input_string, char **ksymname,
|
||||
unsigned long *addr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
*ksymname = strsep(&input_string, ":");
|
||||
*addr = kallsyms_lookup_name(*ksymname);
|
||||
|
||||
/* Check for malformed request: (2), (1) and (5) */
|
||||
if ((!input_string) ||
|
||||
(strlen(input_string) != KSYM_TRACER_OP_LEN) ||
|
||||
(*addr == 0))
|
||||
return -EINVAL;;
|
||||
|
||||
ret = ksym_trace_get_access_type(input_string);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
|
||||
{
|
||||
struct trace_ksym *entry;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
|
||||
printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
|
||||
" new requests for tracing can be accepted now.\n",
|
||||
KSYM_TRACER_MAX);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
entry->type = op;
|
||||
entry->ksym_addr = addr;
|
||||
entry->len = HW_BREAKPOINT_LEN_4;
|
||||
|
||||
ret = -EAGAIN;
|
||||
entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
|
||||
entry->len, entry->type,
|
||||
ksym_hbp_handler, true);
|
||||
if (IS_ERR(entry->ksym_hbp)) {
|
||||
entry->ksym_hbp = NULL;
|
||||
ret = PTR_ERR(entry->ksym_hbp);
|
||||
}
|
||||
|
||||
if (!entry->ksym_hbp) {
|
||||
printk(KERN_INFO "ksym_tracer request failed. Try again"
|
||||
" later!!\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
|
||||
ksym_filter_entry_count++;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(entry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct trace_ksym *entry;
|
||||
struct hlist_node *node;
|
||||
struct trace_seq *s;
|
||||
ssize_t cnt = 0;
|
||||
int ret;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
trace_seq_init(s);
|
||||
|
||||
mutex_lock(&ksym_tracer_mutex);
|
||||
|
||||
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
|
||||
ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
|
||||
if (entry->type == HW_BREAKPOINT_R)
|
||||
ret = trace_seq_puts(s, "r--\n");
|
||||
else if (entry->type == HW_BREAKPOINT_W)
|
||||
ret = trace_seq_puts(s, "-w-\n");
|
||||
else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
|
||||
ret = trace_seq_puts(s, "rw-\n");
|
||||
WARN_ON_ONCE(!ret);
|
||||
}
|
||||
|
||||
cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
|
||||
|
||||
mutex_unlock(&ksym_tracer_mutex);
|
||||
|
||||
kfree(s);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static void __ksym_trace_reset(void)
|
||||
{
|
||||
struct trace_ksym *entry;
|
||||
struct hlist_node *node, *node1;
|
||||
|
||||
mutex_lock(&ksym_tracer_mutex);
|
||||
hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
|
||||
ksym_hlist) {
|
||||
unregister_wide_hw_breakpoint(entry->ksym_hbp);
|
||||
ksym_filter_entry_count--;
|
||||
hlist_del_rcu(&(entry->ksym_hlist));
|
||||
synchronize_rcu();
|
||||
kfree(entry);
|
||||
}
|
||||
mutex_unlock(&ksym_tracer_mutex);
|
||||
}
|
||||
|
||||
static ssize_t ksym_trace_filter_write(struct file *file,
|
||||
const char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct trace_ksym *entry;
|
||||
struct hlist_node *node;
|
||||
char *input_string, *ksymname = NULL;
|
||||
unsigned long ksym_addr = 0;
|
||||
int ret, op, changed = 0;
|
||||
|
||||
input_string = kzalloc(count + 1, GFP_KERNEL);
|
||||
if (!input_string)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(input_string, buffer, count)) {
|
||||
kfree(input_string);
|
||||
return -EFAULT;
|
||||
}
|
||||
input_string[count] = '\0';
|
||||
|
||||
strstrip(input_string);
|
||||
|
||||
/*
|
||||
* Clear all breakpoints if:
|
||||
* 1: echo > ksym_trace_filter
|
||||
* 2: echo 0 > ksym_trace_filter
|
||||
* 3: echo "*:---" > ksym_trace_filter
|
||||
*/
|
||||
if (!input_string[0] || !strcmp(input_string, "0") ||
|
||||
!strcmp(input_string, "*:---")) {
|
||||
__ksym_trace_reset();
|
||||
kfree(input_string);
|
||||
return count;
|
||||
}
|
||||
|
||||
ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
|
||||
if (ret < 0) {
|
||||
kfree(input_string);
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_lock(&ksym_tracer_mutex);
|
||||
|
||||
ret = -EINVAL;
|
||||
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
|
||||
if (entry->ksym_addr == ksym_addr) {
|
||||
/* Check for malformed request: (6) */
|
||||
if (entry->type != op)
|
||||
changed = 1;
|
||||
else
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (changed) {
|
||||
unregister_wide_hw_breakpoint(entry->ksym_hbp);
|
||||
entry->type = op;
|
||||
if (op > 0) {
|
||||
entry->ksym_hbp =
|
||||
register_wide_hw_breakpoint(entry->ksym_addr,
|
||||
entry->len, entry->type,
|
||||
ksym_hbp_handler, true);
|
||||
if (IS_ERR(entry->ksym_hbp))
|
||||
entry->ksym_hbp = NULL;
|
||||
if (!entry->ksym_hbp)
|
||||
goto out;
|
||||
}
|
||||
ksym_filter_entry_count--;
|
||||
hlist_del_rcu(&(entry->ksym_hlist));
|
||||
synchronize_rcu();
|
||||
kfree(entry);
|
||||
ret = 0;
|
||||
goto out;
|
||||
} else {
|
||||
/* Check for malformed request: (4) */
|
||||
if (op == 0)
|
||||
goto out;
|
||||
ret = process_new_ksym_entry(ksymname, op, ksym_addr);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&ksym_tracer_mutex);
|
||||
|
||||
kfree(input_string);
|
||||
|
||||
if (!ret)
|
||||
ret = count;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations ksym_tracing_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.read = ksym_trace_filter_read,
|
||||
.write = ksym_trace_filter_write,
|
||||
};
|
||||
|
||||
static void ksym_trace_reset(struct trace_array *tr)
|
||||
{
|
||||
ksym_tracing_enabled = 0;
|
||||
__ksym_trace_reset();
|
||||
}
|
||||
|
||||
static int ksym_trace_init(struct trace_array *tr)
|
||||
{
|
||||
int cpu, ret = 0;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
tracing_reset(tr, cpu);
|
||||
ksym_tracing_enabled = 1;
|
||||
ksym_trace_array = tr;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ksym_trace_print_header(struct seq_file *m)
|
||||
{
|
||||
seq_puts(m,
|
||||
"# TASK-PID CPU# Symbol "
|
||||
"Type Function\n");
|
||||
seq_puts(m,
|
||||
"# | | | "
|
||||
" | |\n");
|
||||
}
|
||||
|
||||
static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
|
||||
{
|
||||
struct trace_entry *entry = iter->ent;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct ksym_trace_entry *field;
|
||||
char str[KSYM_SYMBOL_LEN];
|
||||
int ret;
|
||||
|
||||
if (entry->type != TRACE_KSYM)
|
||||
return TRACE_TYPE_UNHANDLED;
|
||||
|
||||
trace_assign_type(field, entry);
|
||||
|
||||
ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
|
||||
entry->pid, iter->cpu, (char *)field->addr);
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
switch (field->type) {
|
||||
case HW_BREAKPOINT_R:
|
||||
ret = trace_seq_printf(s, " R ");
|
||||
break;
|
||||
case HW_BREAKPOINT_W:
|
||||
ret = trace_seq_printf(s, " W ");
|
||||
break;
|
||||
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
|
||||
ret = trace_seq_printf(s, " RW ");
|
||||
break;
|
||||
default:
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
sprint_symbol(str, field->ip);
|
||||
ret = trace_seq_printf(s, "%s\n", str);
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
return TRACE_TYPE_HANDLED;
|
||||
}
|
||||
|
||||
struct tracer ksym_tracer __read_mostly =
|
||||
{
|
||||
.name = "ksym_tracer",
|
||||
.init = ksym_trace_init,
|
||||
.reset = ksym_trace_reset,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_ksym,
|
||||
#endif
|
||||
.print_header = ksym_trace_print_header,
|
||||
.print_line = ksym_trace_output
|
||||
};
|
||||
|
||||
__init static int init_ksym_trace(void)
|
||||
{
|
||||
struct dentry *d_tracer;
|
||||
struct dentry *entry;
|
||||
|
||||
d_tracer = tracing_init_dentry();
|
||||
ksym_filter_entry_count = 0;
|
||||
|
||||
entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
|
||||
NULL, &ksym_tracing_fops);
|
||||
if (!entry)
|
||||
pr_warning("Could not create debugfs "
|
||||
"'ksym_trace_filter' file\n");
|
||||
|
||||
return register_tracer(&ksym_tracer);
|
||||
}
|
||||
device_initcall(init_ksym_trace);
|
||||
|
||||
|
||||
#ifdef CONFIG_PROFILE_KSYM_TRACER
|
||||
static int ksym_tracer_stat_headers(struct seq_file *m)
|
||||
{
|
||||
seq_puts(m, " Access Type ");
|
||||
seq_puts(m, " Symbol Counter\n");
|
||||
seq_puts(m, " ----------- ");
|
||||
seq_puts(m, " ------ -------\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ksym_tracer_stat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct hlist_node *stat = v;
|
||||
struct trace_ksym *entry;
|
||||
int access_type = 0;
|
||||
char fn_name[KSYM_NAME_LEN];
|
||||
|
||||
entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
|
||||
|
||||
access_type = entry->type;
|
||||
|
||||
switch (access_type) {
|
||||
case HW_BREAKPOINT_R:
|
||||
seq_puts(m, " R ");
|
||||
break;
|
||||
case HW_BREAKPOINT_W:
|
||||
seq_puts(m, " W ");
|
||||
break;
|
||||
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
|
||||
seq_puts(m, " RW ");
|
||||
break;
|
||||
default:
|
||||
seq_puts(m, " NA ");
|
||||
}
|
||||
|
||||
if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
|
||||
seq_printf(m, " %-36s", fn_name);
|
||||
else
|
||||
seq_printf(m, " %-36s", "<NA>");
|
||||
seq_printf(m, " %15lu\n", entry->counter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *ksym_tracer_stat_start(struct tracer_stat *trace)
|
||||
{
|
||||
return ksym_filter_head.first;
|
||||
}
|
||||
|
||||
static void *
|
||||
ksym_tracer_stat_next(void *v, int idx)
|
||||
{
|
||||
struct hlist_node *stat = v;
|
||||
|
||||
return stat->next;
|
||||
}
|
||||
|
||||
static struct tracer_stat ksym_tracer_stats = {
|
||||
.name = "ksym_tracer",
|
||||
.stat_start = ksym_tracer_stat_start,
|
||||
.stat_next = ksym_tracer_stat_next,
|
||||
.stat_headers = ksym_tracer_stat_headers,
|
||||
.stat_show = ksym_tracer_stat_show
|
||||
};
|
||||
|
||||
__init static int ksym_tracer_stat_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_stat_tracer(&ksym_tracer_stats);
|
||||
if (ret) {
|
||||
printk(KERN_WARNING "Warning: could not register "
|
||||
"ksym tracer stats\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
fs_initcall(ksym_tracer_stat_init);
|
||||
#endif /* CONFIG_PROFILE_KSYM_TRACER */
|
|
@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
|
|||
case TRACE_GRAPH_ENT:
|
||||
case TRACE_GRAPH_RET:
|
||||
case TRACE_HW_BRANCHES:
|
||||
case TRACE_KSYM:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
|
|||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_HW_BRANCH_TRACER */
|
||||
|
||||
#ifdef CONFIG_KSYM_TRACER
|
||||
static int ksym_selftest_dummy;
|
||||
|
||||
int
|
||||
trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
unsigned long count;
|
||||
int ret;
|
||||
|
||||
/* start the tracing */
|
||||
ret = tracer_init(trace, tr);
|
||||
if (ret) {
|
||||
warn_failed_init_tracer(trace, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ksym_selftest_dummy = 0;
|
||||
/* Register the read-write tracing request */
|
||||
|
||||
ret = process_new_ksym_entry("ksym_selftest_dummy",
|
||||
HW_BREAKPOINT_R | HW_BREAKPOINT_W,
|
||||
(unsigned long)(&ksym_selftest_dummy));
|
||||
|
||||
if (ret < 0) {
|
||||
printk(KERN_CONT "ksym_trace read-write startup test failed\n");
|
||||
goto ret_path;
|
||||
}
|
||||
/* Perform a read and a write operation over the dummy variable to
|
||||
* trigger the tracer
|
||||
*/
|
||||
if (ksym_selftest_dummy == 0)
|
||||
ksym_selftest_dummy++;
|
||||
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check the trace buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
/* read & write operations - one each is performed on the dummy variable
|
||||
* triggering two entries in the trace buffer
|
||||
*/
|
||||
if (!ret && count != 2) {
|
||||
printk(KERN_CONT "Ksym tracer startup test failed");
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
ret_path:
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_KSYM_TRACER */
|
||||
|
||||
|
|
|
@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
|
|||
default m
|
||||
depends on SAMPLE_KPROBES && KRETPROBES
|
||||
|
||||
config SAMPLE_HW_BREAKPOINT
|
||||
tristate "Build kernel hardware breakpoint examples -- loadable module only"
|
||||
depends on HAVE_HW_BREAKPOINT && m
|
||||
help
|
||||
This builds kernel hardware breakpoint example modules.
|
||||
|
||||
endif # SAMPLES
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# Makefile for Linux samples code
|
||||
|
||||
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/
|
||||
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
|
||||
hw_breakpoint/
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* usage: insmod data_breakpoint.ko ksym=<ksym_name>
|
||||
*
|
||||
* This file is a kernel module that places a breakpoint over ksym_name kernel
|
||||
* variable using Hardware Breakpoint register. The corresponding handler which
|
||||
* prints a backtrace is invoked everytime a write operation is performed on
|
||||
* that variable.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2009
|
||||
*/
|
||||
#include <linux/module.h> /* Needed by all modules */
|
||||
#include <linux/kernel.h> /* Needed for KERN_INFO */
|
||||
#include <linux/init.h> /* Needed for the macros */
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
struct perf_event **sample_hbp;
|
||||
|
||||
static char ksym_name[KSYM_NAME_LEN] = "pid_max";
|
||||
module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
|
||||
MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
|
||||
" write operations on the kernel symbol");
|
||||
|
||||
static void sample_hbp_handler(struct perf_event *temp, void *data)
|
||||
{
|
||||
printk(KERN_INFO "%s value is changed\n", ksym_name);
|
||||
dump_stack();
|
||||
printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
|
||||
}
|
||||
|
||||
static int __init hw_break_module_init(void)
|
||||
{
|
||||
int ret;
|
||||
unsigned long addr;
|
||||
|
||||
addr = kallsyms_lookup_name(ksym_name);
|
||||
|
||||
sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
|
||||
HW_BREAKPOINT_W | HW_BREAKPOINT_R,
|
||||
sample_hbp_handler, true);
|
||||
if (IS_ERR(sample_hbp)) {
|
||||
ret = PTR_ERR(sample_hbp);
|
||||
goto fail;
|
||||
} else if (!sample_hbp) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
printk(KERN_INFO "Breakpoint registration failed\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit hw_break_module_exit(void)
|
||||
{
|
||||
unregister_wide_hw_breakpoint(sample_hbp);
|
||||
printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
|
||||
}
|
||||
|
||||
module_init(hw_break_module_init);
|
||||
module_exit(hw_break_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("K.Prasad");
|
||||
MODULE_DESCRIPTION("ksym breakpoint");
|
Loading…
Reference in New Issue