kprobes: Remove kretprobe hash
The kretprobe hash is mostly superfluous, replace it with a per-task variable. This gets rid of the task hash and it's related locking. Note that this may change the kprobes module-exported API for kretprobe handlers. If any out-of-tree kretprobe user uses ri->rp, use get_kretprobe(ri) instead. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/159870620431.1229682.16325792502413731312.stgit@devnote2
This commit is contained in:
parent
476c5818c3
commit
d741bf41d7
|
@ -27,6 +27,7 @@
|
|||
#include <linux/rcupdate.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <asm/kprobes.h>
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
|
@ -144,6 +145,11 @@ static inline int kprobe_ftrace(struct kprobe *p)
|
|||
* ignored, due to maxactive being too low.
|
||||
*
|
||||
*/
|
||||
struct kretprobe_holder {
|
||||
struct kretprobe *rp;
|
||||
refcount_t ref;
|
||||
};
|
||||
|
||||
struct kretprobe {
|
||||
struct kprobe kp;
|
||||
kretprobe_handler_t handler;
|
||||
|
@ -152,17 +158,18 @@ struct kretprobe {
|
|||
int nmissed;
|
||||
size_t data_size;
|
||||
struct hlist_head free_instances;
|
||||
struct kretprobe_holder *rph;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
struct kretprobe_instance {
|
||||
union {
|
||||
struct llist_node llist;
|
||||
struct hlist_node hlist;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
struct kretprobe *rp;
|
||||
struct kretprobe_holder *rph;
|
||||
kprobe_opcode_t *ret_addr;
|
||||
struct task_struct *task;
|
||||
void *fp;
|
||||
char data[];
|
||||
};
|
||||
|
@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
|
||||
"Kretprobe is accessed from instance under preemptive context");
|
||||
|
||||
return READ_ONCE(ri->rph->rp);
|
||||
}
|
||||
|
||||
#else /* CONFIG_KRETPROBES */
|
||||
static inline void arch_prepare_kretprobe(struct kretprobe *rp,
|
||||
struct pt_regs *regs)
|
||||
|
|
|
@ -1315,6 +1315,10 @@ struct task_struct {
|
|||
struct callback_head mce_kill_me;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_head kretprobe_instances;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* New fields for task_struct should be added above here, so that
|
||||
* they are included in the randomized portion of task_struct.
|
||||
|
|
|
@ -2161,6 +2161,10 @@ static __latent_entropy struct task_struct *copy_process(
|
|||
INIT_LIST_HEAD(&p->thread_group);
|
||||
p->task_works = NULL;
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
p->kretprobe_instances.first = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Ensure that the cgroup subsystem policies allow the new process to be
|
||||
* forked. It should be noted the the new process's css_set can be changed
|
||||
|
|
236
kernel/kprobes.c
236
kernel/kprobes.c
|
@ -53,7 +53,6 @@ static int kprobes_initialized;
|
|||
* - RCU hlist traversal under disabling preempt (breakpoint handlers)
|
||||
*/
|
||||
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
||||
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
|
||||
/* NOTE: change this value only with kprobe_mutex held */
|
||||
static bool kprobes_all_disarmed;
|
||||
|
@ -61,9 +60,6 @@ static bool kprobes_all_disarmed;
|
|||
/* This protects kprobe_table and optimizing_list */
|
||||
static DEFINE_MUTEX(kprobe_mutex);
|
||||
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
|
||||
static struct {
|
||||
raw_spinlock_t lock ____cacheline_aligned_in_smp;
|
||||
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
|
||||
|
||||
kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
|
||||
unsigned int __unused)
|
||||
|
@ -71,11 +67,6 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
|
|||
return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
|
||||
}
|
||||
|
||||
static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
|
||||
{
|
||||
return &(kretprobe_table_locks[hash].lock);
|
||||
}
|
||||
|
||||
/* Blacklist -- list of struct kprobe_blacklist_entry */
|
||||
static LIST_HEAD(kprobe_blacklist);
|
||||
|
||||
|
@ -1223,65 +1214,30 @@ void kprobes_inc_nmissed_count(struct kprobe *p)
|
|||
}
|
||||
NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
|
||||
|
||||
static void free_rp_inst_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
|
||||
|
||||
if (refcount_dec_and_test(&ri->rph->ref))
|
||||
kfree(ri->rph);
|
||||
kfree(ri);
|
||||
}
|
||||
NOKPROBE_SYMBOL(free_rp_inst_rcu);
|
||||
|
||||
static void recycle_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
struct kretprobe *rp = ri->rp;
|
||||
struct kretprobe *rp = get_kretprobe(ri);
|
||||
|
||||
/* remove rp inst off the rprobe_inst_table */
|
||||
hlist_del(&ri->hlist);
|
||||
INIT_HLIST_NODE(&ri->hlist);
|
||||
if (likely(rp)) {
|
||||
raw_spin_lock(&rp->lock);
|
||||
hlist_add_head(&ri->hlist, &rp->free_instances);
|
||||
raw_spin_unlock(&rp->lock);
|
||||
} else
|
||||
kfree_rcu(ri, rcu);
|
||||
call_rcu(&ri->rcu, free_rp_inst_rcu);
|
||||
}
|
||||
NOKPROBE_SYMBOL(recycle_rp_inst);
|
||||
|
||||
static void kretprobe_hash_lock(struct task_struct *tsk,
|
||||
struct hlist_head **head, unsigned long *flags)
|
||||
__acquires(hlist_lock)
|
||||
{
|
||||
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
|
||||
raw_spinlock_t *hlist_lock;
|
||||
|
||||
*head = &kretprobe_inst_table[hash];
|
||||
hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_hash_lock);
|
||||
|
||||
static void kretprobe_table_lock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
__acquires(hlist_lock)
|
||||
{
|
||||
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_lock);
|
||||
|
||||
static void kretprobe_hash_unlock(struct task_struct *tsk,
|
||||
unsigned long *flags)
|
||||
__releases(hlist_lock)
|
||||
{
|
||||
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
|
||||
raw_spinlock_t *hlist_lock;
|
||||
|
||||
hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_unlock_irqrestore(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_hash_unlock);
|
||||
|
||||
static void kretprobe_table_unlock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
__releases(hlist_lock)
|
||||
{
|
||||
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_unlock_irqrestore(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_unlock);
|
||||
|
||||
static struct kprobe kprobe_busy = {
|
||||
.addr = (void *) get_kprobe,
|
||||
};
|
||||
|
@ -1311,24 +1267,21 @@ void kprobe_busy_end(void)
|
|||
void kprobe_flush_task(struct task_struct *tk)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *tmp;
|
||||
unsigned long hash, flags = 0;
|
||||
struct llist_node *node;
|
||||
|
||||
/* Early boot, not yet initialized. */
|
||||
if (unlikely(!kprobes_initialized))
|
||||
/* Early boot. kretprobe_table_locks not yet initialized. */
|
||||
return;
|
||||
|
||||
kprobe_busy_begin();
|
||||
|
||||
hash = hash_ptr(tk, KPROBE_HASH_BITS);
|
||||
head = &kretprobe_inst_table[hash];
|
||||
kretprobe_table_lock(hash, &flags);
|
||||
hlist_for_each_entry_safe(ri, tmp, head, hlist) {
|
||||
if (ri->task == tk)
|
||||
node = __llist_del_all(&tk->kretprobe_instances);
|
||||
while (node) {
|
||||
ri = container_of(node, struct kretprobe_instance, llist);
|
||||
node = node->next;
|
||||
|
||||
recycle_rp_inst(ri);
|
||||
}
|
||||
kretprobe_table_unlock(hash, &flags);
|
||||
|
||||
kprobe_busy_end();
|
||||
}
|
||||
|
@ -1338,37 +1291,20 @@ static inline void free_rp_inst(struct kretprobe *rp)
|
|||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_node *next;
|
||||
int count = 0;
|
||||
|
||||
hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
|
||||
hlist_del(&ri->hlist);
|
||||
kfree(ri);
|
||||
count++;
|
||||
}
|
||||
|
||||
if (refcount_sub_and_test(count, &rp->rph->ref)) {
|
||||
kfree(rp->rph);
|
||||
rp->rph = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void cleanup_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
unsigned long flags, hash;
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_node *next;
|
||||
struct hlist_head *head;
|
||||
|
||||
/* To avoid recursive kretprobe by NMI, set kprobe busy here */
|
||||
kprobe_busy_begin();
|
||||
for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
|
||||
kretprobe_table_lock(hash, &flags);
|
||||
head = &kretprobe_inst_table[hash];
|
||||
hlist_for_each_entry_safe(ri, next, head, hlist) {
|
||||
if (ri->rp == rp)
|
||||
ri->rp = NULL;
|
||||
}
|
||||
kretprobe_table_unlock(hash, &flags);
|
||||
}
|
||||
kprobe_busy_end();
|
||||
|
||||
free_rp_inst(rp);
|
||||
}
|
||||
NOKPROBE_SYMBOL(cleanup_rp_inst);
|
||||
|
||||
/* Add the new probe to ap->list */
|
||||
static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
|
@ -1928,88 +1864,56 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
|
|||
void *trampoline_address,
|
||||
void *frame_pointer)
|
||||
{
|
||||
struct kretprobe_instance *ri = NULL, *last = NULL;
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *tmp;
|
||||
unsigned long flags;
|
||||
kprobe_opcode_t *correct_ret_addr = NULL;
|
||||
bool skipped = false;
|
||||
struct kretprobe_instance *ri = NULL;
|
||||
struct llist_node *first, *node;
|
||||
struct kretprobe *rp;
|
||||
|
||||
kretprobe_hash_lock(current, &head, &flags);
|
||||
/* Find all nodes for this frame. */
|
||||
first = node = current->kretprobe_instances.first;
|
||||
while (node) {
|
||||
ri = container_of(node, struct kretprobe_instance, llist);
|
||||
|
||||
/*
|
||||
* It is possible to have multiple instances associated with a given
|
||||
* task either because multiple functions in the call path have
|
||||
* return probes installed on them, and/or more than one
|
||||
* return probe was registered for a target function.
|
||||
*
|
||||
* We can handle this because:
|
||||
* - instances are always pushed into the head of the list
|
||||
* - when multiple return probes are registered for the same
|
||||
* function, the (chronologically) first instance's ret_addr
|
||||
* will be the real return address, and all the rest will
|
||||
* point to kretprobe_trampoline.
|
||||
*/
|
||||
hlist_for_each_entry(ri, head, hlist) {
|
||||
if (ri->task != current)
|
||||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
/*
|
||||
* Return probes must be pushed on this hash list correct
|
||||
* order (same as return order) so that it can be popped
|
||||
* correctly. However, if we find it is pushed it incorrect
|
||||
* order, this means we find a function which should not be
|
||||
* probed, because the wrong order entry is pushed on the
|
||||
* path of processing other kretprobe itself.
|
||||
*/
|
||||
if (ri->fp != frame_pointer) {
|
||||
if (!skipped)
|
||||
pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
|
||||
skipped = true;
|
||||
continue;
|
||||
}
|
||||
BUG_ON(ri->fp != frame_pointer);
|
||||
|
||||
if (ri->ret_addr != trampoline_address) {
|
||||
correct_ret_addr = ri->ret_addr;
|
||||
if (skipped)
|
||||
pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
|
||||
ri->rp->kp.addr);
|
||||
|
||||
if (correct_ret_addr != trampoline_address)
|
||||
/*
|
||||
* This is the real return address. Any other
|
||||
* instances associated with this task are for
|
||||
* other calls deeper on the call stack
|
||||
*/
|
||||
break;
|
||||
goto found;
|
||||
}
|
||||
|
||||
BUG_ON(!correct_ret_addr || (correct_ret_addr == trampoline_address));
|
||||
last = ri;
|
||||
node = node->next;
|
||||
}
|
||||
pr_err("Oops! Kretprobe fails to find correct return address.\n");
|
||||
BUG_ON(1);
|
||||
|
||||
hlist_for_each_entry_safe(ri, tmp, head, hlist) {
|
||||
if (ri->task != current)
|
||||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
if (ri->fp != frame_pointer)
|
||||
continue;
|
||||
found:
|
||||
/* Unlink all nodes for this frame. */
|
||||
current->kretprobe_instances.first = node->next;
|
||||
node->next = NULL;
|
||||
|
||||
if (ri->rp && ri->rp->handler) {
|
||||
/* Run them.. */
|
||||
while (first) {
|
||||
ri = container_of(first, struct kretprobe_instance, llist);
|
||||
first = first->next;
|
||||
|
||||
rp = get_kretprobe(ri);
|
||||
if (rp && rp->handler) {
|
||||
struct kprobe *prev = kprobe_running();
|
||||
|
||||
__this_cpu_write(current_kprobe, &ri->rp->kp);
|
||||
__this_cpu_write(current_kprobe, &rp->kp);
|
||||
ri->ret_addr = correct_ret_addr;
|
||||
ri->rp->handler(ri, regs);
|
||||
rp->handler(ri, regs);
|
||||
__this_cpu_write(current_kprobe, prev);
|
||||
}
|
||||
|
||||
recycle_rp_inst(ri);
|
||||
|
||||
if (ri == last)
|
||||
break;
|
||||
}
|
||||
|
||||
kretprobe_hash_unlock(current, &flags);
|
||||
|
||||
return (unsigned long)correct_ret_addr;
|
||||
}
|
||||
NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
|
||||
|
@ -2021,11 +1925,10 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
|
|||
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
|
||||
unsigned long hash, flags = 0;
|
||||
unsigned long flags = 0;
|
||||
struct kretprobe_instance *ri;
|
||||
|
||||
/* TODO: consider to only swap the RA after the last pre_handler fired */
|
||||
hash = hash_ptr(current, KPROBE_HASH_BITS);
|
||||
raw_spin_lock_irqsave(&rp->lock, flags);
|
||||
if (!hlist_empty(&rp->free_instances)) {
|
||||
ri = hlist_entry(rp->free_instances.first,
|
||||
|
@ -2033,9 +1936,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
|||
hlist_del(&ri->hlist);
|
||||
raw_spin_unlock_irqrestore(&rp->lock, flags);
|
||||
|
||||
ri->rp = rp;
|
||||
ri->task = current;
|
||||
|
||||
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
|
||||
raw_spin_lock_irqsave(&rp->lock, flags);
|
||||
hlist_add_head(&ri->hlist, &rp->free_instances);
|
||||
|
@ -2045,11 +1945,8 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
|||
|
||||
arch_prepare_kretprobe(ri, regs);
|
||||
|
||||
/* XXX(hch): why is there no hlist_move_head? */
|
||||
INIT_HLIST_NODE(&ri->hlist);
|
||||
kretprobe_table_lock(hash, &flags);
|
||||
hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
|
||||
kretprobe_table_unlock(hash, &flags);
|
||||
__llist_add(&ri->llist, ¤t->kretprobe_instances);
|
||||
|
||||
} else {
|
||||
rp->nmissed++;
|
||||
raw_spin_unlock_irqrestore(&rp->lock, flags);
|
||||
|
@ -2112,16 +2009,24 @@ int register_kretprobe(struct kretprobe *rp)
|
|||
}
|
||||
raw_spin_lock_init(&rp->lock);
|
||||
INIT_HLIST_HEAD(&rp->free_instances);
|
||||
rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
|
||||
if (!rp->rph)
|
||||
return -ENOMEM;
|
||||
|
||||
rp->rph->rp = rp;
|
||||
for (i = 0; i < rp->maxactive; i++) {
|
||||
inst = kmalloc(sizeof(struct kretprobe_instance) +
|
||||
inst = kzalloc(sizeof(struct kretprobe_instance) +
|
||||
rp->data_size, GFP_KERNEL);
|
||||
if (inst == NULL) {
|
||||
refcount_set(&rp->rph->ref, i);
|
||||
free_rp_inst(rp);
|
||||
return -ENOMEM;
|
||||
}
|
||||
inst->rph = rp->rph;
|
||||
INIT_HLIST_NODE(&inst->hlist);
|
||||
hlist_add_head(&inst->hlist, &rp->free_instances);
|
||||
}
|
||||
refcount_set(&rp->rph->ref, i);
|
||||
|
||||
rp->nmissed = 0;
|
||||
/* Establish function entry probe point */
|
||||
|
@ -2163,16 +2068,18 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
|
|||
if (num <= 0)
|
||||
return;
|
||||
mutex_lock(&kprobe_mutex);
|
||||
for (i = 0; i < num; i++)
|
||||
for (i = 0; i < num; i++) {
|
||||
if (__unregister_kprobe_top(&rps[i]->kp) < 0)
|
||||
rps[i]->kp.addr = NULL;
|
||||
rps[i]->rph->rp = NULL;
|
||||
}
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
|
||||
synchronize_rcu();
|
||||
for (i = 0; i < num; i++) {
|
||||
if (rps[i]->kp.addr) {
|
||||
__unregister_kprobe_bottom(&rps[i]->kp);
|
||||
cleanup_rp_inst(rps[i]);
|
||||
free_rp_inst(rps[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2535,11 +2442,8 @@ static int __init init_kprobes(void)
|
|||
|
||||
/* FIXME allocate the probe table, currently defined statically */
|
||||
/* initialize all list heads */
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++)
|
||||
INIT_HLIST_HEAD(&kprobe_table[i]);
|
||||
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
|
||||
raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
|
||||
}
|
||||
|
||||
err = populate_kprobe_blacklist(__start_kprobe_blacklist,
|
||||
__stop_kprobe_blacklist);
|
||||
|
|
|
@ -1714,7 +1714,8 @@ NOKPROBE_SYMBOL(kprobe_dispatcher);
|
|||
static int
|
||||
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
|
||||
struct kretprobe *rp = get_kretprobe(ri);
|
||||
struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
|
||||
|
||||
raw_cpu_inc(*tk->nhit);
|
||||
|
||||
|
|
Loading…
Reference in New Issue