Merge branch 'bpf-permit-multiple-bpf-attachments-for-a-single-perf-tracepoint-event'
Yonghong Song says: ==================== bpf: permit multiple bpf attachments for a single perf tracepoint event This patch set adds support to permit multiple bpf prog attachments for a single perf tracepoint event. Patch 1 does some cleanup such that perf_event_{set|free}_bpf_handler is called under the same condition. Patch 2 has the core implementation, and Patch 3 adds a test case. Changelogs: v2 -> v3: . fix compilation error. v1 -> v2: . fix a potential deadlock issue discovered by Daniel. . fix some coding style issues. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
d4588211c6
|
@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
|
||||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
|
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
|
||||||
__u32 __user *prog_ids, u32 cnt);
|
__u32 __user *prog_ids, u32 cnt);
|
||||||
|
|
||||||
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
|
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
|
||||||
|
struct bpf_prog *old_prog);
|
||||||
|
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
|
||||||
|
struct bpf_prog *exclude_prog,
|
||||||
|
struct bpf_prog *include_prog,
|
||||||
|
struct bpf_prog_array **new_array);
|
||||||
|
|
||||||
|
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
|
||||||
({ \
|
({ \
|
||||||
struct bpf_prog **_prog; \
|
struct bpf_prog **_prog, *__prog; \
|
||||||
|
struct bpf_prog_array *_array; \
|
||||||
u32 _ret = 1; \
|
u32 _ret = 1; \
|
||||||
rcu_read_lock(); \
|
rcu_read_lock(); \
|
||||||
_prog = rcu_dereference(array)->progs; \
|
_array = rcu_dereference(array); \
|
||||||
for (; *_prog; _prog++) \
|
if (unlikely(check_non_null && !_array))\
|
||||||
_ret &= func(*_prog, ctx); \
|
goto _out; \
|
||||||
|
_prog = _array->progs; \
|
||||||
|
while ((__prog = READ_ONCE(*_prog))) { \
|
||||||
|
_ret &= func(__prog, ctx); \
|
||||||
|
_prog++; \
|
||||||
|
} \
|
||||||
|
_out: \
|
||||||
rcu_read_unlock(); \
|
rcu_read_unlock(); \
|
||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
|
||||||
|
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
|
||||||
|
|
||||||
|
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
|
||||||
|
__BPF_PROG_RUN_ARRAY(array, ctx, func, true)
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||||
|
|
||||||
|
|
|
@ -271,14 +271,37 @@ struct trace_event_call {
|
||||||
#ifdef CONFIG_PERF_EVENTS
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
int perf_refcount;
|
int perf_refcount;
|
||||||
struct hlist_head __percpu *perf_events;
|
struct hlist_head __percpu *perf_events;
|
||||||
struct bpf_prog *prog;
|
struct bpf_prog_array __rcu *prog_array;
|
||||||
struct perf_event *bpf_prog_owner;
|
|
||||||
|
|
||||||
int (*perf_perm)(struct trace_event_call *,
|
int (*perf_perm)(struct trace_event_call *,
|
||||||
struct perf_event *);
|
struct perf_event *);
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This inline function checks whether call->prog_array
|
||||||
|
* is valid or not. The function is called in various places,
|
||||||
|
* outside rcu_read_lock/unlock, as a heuristic to speed up execution.
|
||||||
|
*
|
||||||
|
* If this function returns true, and later call->prog_array
|
||||||
|
* becomes false inside rcu_read_lock/unlock region,
|
||||||
|
* we bail out then. If this function return false,
|
||||||
|
* there is a risk that we might miss a few events if the checking
|
||||||
|
* were delayed until inside rcu_read_lock/unlock region and
|
||||||
|
* call->prog_array happened to become non-NULL then.
|
||||||
|
*
|
||||||
|
* Here, READ_ONCE() is used instead of rcu_access_pointer().
|
||||||
|
* rcu_access_pointer() requires the actual definition of
|
||||||
|
* "struct bpf_prog_array" while READ_ONCE() only needs
|
||||||
|
* a declaration of the same type.
|
||||||
|
*/
|
||||||
|
return !!READ_ONCE(call->prog_array);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline const char *
|
static inline const char *
|
||||||
trace_event_name(struct trace_event_call *call)
|
trace_event_name(struct trace_event_call *call)
|
||||||
{
|
{
|
||||||
|
@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_EVENTS
|
#ifdef CONFIG_BPF_EVENTS
|
||||||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
|
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
|
||||||
|
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
|
||||||
|
void perf_event_detach_bpf_prog(struct perf_event *event);
|
||||||
#else
|
#else
|
||||||
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
|
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
|
||||||
|
{
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
|
||||||
{
|
{
|
||||||
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
|
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* _LINUX_TRACE_EVENT_H */
|
#endif /* _LINUX_TRACE_EVENT_H */
|
||||||
|
|
|
@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \
|
||||||
struct trace_event_call *event_call = __data; \
|
struct trace_event_call *event_call = __data; \
|
||||||
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
|
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
|
||||||
struct trace_event_raw_##call *entry; \
|
struct trace_event_raw_##call *entry; \
|
||||||
struct bpf_prog *prog = event_call->prog; \
|
|
||||||
struct pt_regs *__regs; \
|
struct pt_regs *__regs; \
|
||||||
u64 __count = 1; \
|
u64 __count = 1; \
|
||||||
struct task_struct *__task = NULL; \
|
struct task_struct *__task = NULL; \
|
||||||
|
@ -46,7 +45,8 @@ perf_trace_##call(void *__data, proto) \
|
||||||
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
|
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
|
||||||
\
|
\
|
||||||
head = this_cpu_ptr(event_call->perf_events); \
|
head = this_cpu_ptr(event_call->perf_events); \
|
||||||
if (!prog && __builtin_constant_p(!__task) && !__task && \
|
if (!bpf_prog_array_valid(event_call) && \
|
||||||
|
__builtin_constant_p(!__task) && !__task && \
|
||||||
hlist_empty(head)) \
|
hlist_empty(head)) \
|
||||||
return; \
|
return; \
|
||||||
\
|
\
|
||||||
|
|
|
@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
|
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
|
||||||
|
|
||||||
|
static unsigned int __bpf_prog_ret1(const void *ctx,
|
||||||
|
const struct bpf_insn *insn)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bpf_prog_dummy {
|
||||||
|
struct bpf_prog prog;
|
||||||
|
} dummy_bpf_prog = {
|
||||||
|
.prog = {
|
||||||
|
.bpf_func = __bpf_prog_ret1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
/* to avoid allocating empty bpf_prog_array for cgroups that
|
/* to avoid allocating empty bpf_prog_array for cgroups that
|
||||||
* don't have bpf program attached use one global 'empty_prog_array'
|
* don't have bpf program attached use one global 'empty_prog_array'
|
||||||
* It will not be modified the caller of bpf_prog_array_alloc()
|
* It will not be modified the caller of bpf_prog_array_alloc()
|
||||||
|
@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
|
||||||
|
struct bpf_prog *old_prog)
|
||||||
|
{
|
||||||
|
struct bpf_prog **prog = progs->progs;
|
||||||
|
|
||||||
|
for (; *prog; prog++)
|
||||||
|
if (*prog == old_prog) {
|
||||||
|
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
|
||||||
|
struct bpf_prog *exclude_prog,
|
||||||
|
struct bpf_prog *include_prog,
|
||||||
|
struct bpf_prog_array **new_array)
|
||||||
|
{
|
||||||
|
int new_prog_cnt, carry_prog_cnt = 0;
|
||||||
|
struct bpf_prog **existing_prog;
|
||||||
|
struct bpf_prog_array *array;
|
||||||
|
int new_prog_idx = 0;
|
||||||
|
|
||||||
|
/* Figure out how many existing progs we need to carry over to
|
||||||
|
* the new array.
|
||||||
|
*/
|
||||||
|
if (old_array) {
|
||||||
|
existing_prog = old_array->progs;
|
||||||
|
for (; *existing_prog; existing_prog++) {
|
||||||
|
if (*existing_prog != exclude_prog &&
|
||||||
|
*existing_prog != &dummy_bpf_prog.prog)
|
||||||
|
carry_prog_cnt++;
|
||||||
|
if (*existing_prog == include_prog)
|
||||||
|
return -EEXIST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* How many progs (not NULL) will be in the new array? */
|
||||||
|
new_prog_cnt = carry_prog_cnt;
|
||||||
|
if (include_prog)
|
||||||
|
new_prog_cnt += 1;
|
||||||
|
|
||||||
|
/* Do we have any prog (not NULL) in the new array? */
|
||||||
|
if (!new_prog_cnt) {
|
||||||
|
*new_array = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* +1 as the end of prog_array is marked with NULL */
|
||||||
|
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
|
||||||
|
if (!array)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/* Fill in the new prog array */
|
||||||
|
if (carry_prog_cnt) {
|
||||||
|
existing_prog = old_array->progs;
|
||||||
|
for (; *existing_prog; existing_prog++)
|
||||||
|
if (*existing_prog != exclude_prog &&
|
||||||
|
*existing_prog != &dummy_bpf_prog.prog)
|
||||||
|
array->progs[new_prog_idx++] = *existing_prog;
|
||||||
|
}
|
||||||
|
if (include_prog)
|
||||||
|
array->progs[new_prog_idx++] = include_prog;
|
||||||
|
array->progs[new_prog_idx] = NULL;
|
||||||
|
*new_array = array;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void bpf_prog_free_deferred(struct work_struct *work)
|
static void bpf_prog_free_deferred(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct bpf_prog_aux *aux;
|
struct bpf_prog_aux *aux;
|
||||||
|
|
|
@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
|
||||||
struct pt_regs *regs, struct hlist_head *head,
|
struct pt_regs *regs, struct hlist_head *head,
|
||||||
struct task_struct *task)
|
struct task_struct *task)
|
||||||
{
|
{
|
||||||
struct bpf_prog *prog = call->prog;
|
if (bpf_prog_array_valid(call)) {
|
||||||
|
|
||||||
if (prog) {
|
|
||||||
*(struct pt_regs **)raw_data = regs;
|
*(struct pt_regs **)raw_data = regs;
|
||||||
if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
|
if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
|
||||||
perf_swevent_put_recursion_context(rctx);
|
perf_swevent_put_recursion_context(rctx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
||||||
{
|
{
|
||||||
bool is_kprobe, is_tracepoint, is_syscall_tp;
|
bool is_kprobe, is_tracepoint, is_syscall_tp;
|
||||||
struct bpf_prog *prog;
|
struct bpf_prog *prog;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
||||||
return perf_event_set_bpf_handler(event, prog_fd);
|
return perf_event_set_bpf_handler(event, prog_fd);
|
||||||
|
|
||||||
if (event->tp_event->prog)
|
|
||||||
return -EEXIST;
|
|
||||||
|
|
||||||
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
|
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
|
||||||
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
|
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
|
||||||
is_syscall_tp = is_syscall_trace_event(event->tp_event);
|
is_syscall_tp = is_syscall_trace_event(event->tp_event);
|
||||||
|
@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
event->tp_event->prog = prog;
|
|
||||||
event->tp_event->bpf_prog_owner = event;
|
|
||||||
|
|
||||||
return 0;
|
ret = perf_event_attach_bpf_prog(event, prog);
|
||||||
|
if (ret)
|
||||||
|
bpf_prog_put(prog);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_event_free_bpf_prog(struct perf_event *event)
|
static void perf_event_free_bpf_prog(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct bpf_prog *prog;
|
if (event->attr.type != PERF_TYPE_TRACEPOINT) {
|
||||||
|
|
||||||
perf_event_free_bpf_handler(event);
|
perf_event_free_bpf_handler(event);
|
||||||
|
|
||||||
if (!event->tp_event)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
prog = event->tp_event->prog;
|
|
||||||
if (prog && event->tp_event->bpf_prog_owner == event) {
|
|
||||||
event->tp_event->prog = NULL;
|
|
||||||
bpf_prog_put(prog);
|
|
||||||
}
|
}
|
||||||
|
perf_event_detach_bpf_prog(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* trace_call_bpf - invoke BPF program
|
* trace_call_bpf - invoke BPF program
|
||||||
* @prog: BPF program
|
* @call: tracepoint event
|
||||||
* @ctx: opaque context pointer
|
* @ctx: opaque context pointer
|
||||||
*
|
*
|
||||||
* kprobe handlers execute BPF programs via this helper.
|
* kprobe handlers execute BPF programs via this helper.
|
||||||
|
@ -29,7 +29,7 @@
|
||||||
* 1 - store kprobe event into ring buffer
|
* 1 - store kprobe event into ring buffer
|
||||||
* Other values are reserved and currently alias to 1
|
* Other values are reserved and currently alias to 1
|
||||||
*/
|
*/
|
||||||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
|
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
||||||
{
|
{
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
|
||||||
|
@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
/*
|
||||||
ret = BPF_PROG_RUN(prog, ctx);
|
* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
|
||||||
rcu_read_unlock();
|
* to all call sites, we did a bpf_prog_array_valid() there to check
|
||||||
|
* whether call->prog_array is empty or not, which is
|
||||||
|
* a heurisitc to speed up execution.
|
||||||
|
*
|
||||||
|
* If bpf_prog_array_valid() fetched prog_array was
|
||||||
|
* non-NULL, we go into trace_call_bpf() and do the actual
|
||||||
|
* proper rcu_dereference() under RCU lock.
|
||||||
|
* If it turns out that prog_array is NULL then, we bail out.
|
||||||
|
* For the opposite, if the bpf_prog_array_valid() fetched pointer
|
||||||
|
* was NULL, you'll skip the prog_array with the risk of missing
|
||||||
|
* out of events when it was updated in between this and the
|
||||||
|
* rcu_dereference() which is accepted risk.
|
||||||
|
*/
|
||||||
|
ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
__this_cpu_dec(bpf_prog_active);
|
__this_cpu_dec(bpf_prog_active);
|
||||||
|
@ -741,3 +754,62 @@ const struct bpf_verifier_ops perf_event_verifier_ops = {
|
||||||
|
|
||||||
const struct bpf_prog_ops perf_event_prog_ops = {
|
const struct bpf_prog_ops perf_event_prog_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static DEFINE_MUTEX(bpf_event_mutex);
|
||||||
|
|
||||||
|
int perf_event_attach_bpf_prog(struct perf_event *event,
|
||||||
|
struct bpf_prog *prog)
|
||||||
|
{
|
||||||
|
struct bpf_prog_array __rcu *old_array;
|
||||||
|
struct bpf_prog_array *new_array;
|
||||||
|
int ret = -EEXIST;
|
||||||
|
|
||||||
|
mutex_lock(&bpf_event_mutex);
|
||||||
|
|
||||||
|
if (event->prog)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
old_array = rcu_dereference_protected(event->tp_event->prog_array,
|
||||||
|
lockdep_is_held(&bpf_event_mutex));
|
||||||
|
ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* set the new array to event->tp_event and set event->prog */
|
||||||
|
event->prog = prog;
|
||||||
|
rcu_assign_pointer(event->tp_event->prog_array, new_array);
|
||||||
|
bpf_prog_array_free(old_array);
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_unlock(&bpf_event_mutex);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_event_detach_bpf_prog(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct bpf_prog_array __rcu *old_array;
|
||||||
|
struct bpf_prog_array *new_array;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mutex_lock(&bpf_event_mutex);
|
||||||
|
|
||||||
|
if (!event->prog)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
old_array = rcu_dereference_protected(event->tp_event->prog_array,
|
||||||
|
lockdep_is_held(&bpf_event_mutex));
|
||||||
|
|
||||||
|
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
|
||||||
|
if (ret < 0) {
|
||||||
|
bpf_prog_array_delete_safe(old_array, event->prog);
|
||||||
|
} else {
|
||||||
|
rcu_assign_pointer(event->tp_event->prog_array, new_array);
|
||||||
|
bpf_prog_array_free(old_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_prog_put(event->prog);
|
||||||
|
event->prog = NULL;
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_unlock(&bpf_event_mutex);
|
||||||
|
}
|
||||||
|
|
|
@ -1174,13 +1174,12 @@ static void
|
||||||
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct trace_event_call *call = &tk->tp.call;
|
struct trace_event_call *call = &tk->tp.call;
|
||||||
struct bpf_prog *prog = call->prog;
|
|
||||||
struct kprobe_trace_entry_head *entry;
|
struct kprobe_trace_entry_head *entry;
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
int size, __size, dsize;
|
int size, __size, dsize;
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
if (prog && !trace_call_bpf(prog, regs))
|
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
head = this_cpu_ptr(call->perf_events);
|
head = this_cpu_ptr(call->perf_events);
|
||||||
|
@ -1210,13 +1209,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct trace_event_call *call = &tk->tp.call;
|
struct trace_event_call *call = &tk->tp.call;
|
||||||
struct bpf_prog *prog = call->prog;
|
|
||||||
struct kretprobe_trace_entry_head *entry;
|
struct kretprobe_trace_entry_head *entry;
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
int size, __size, dsize;
|
int size, __size, dsize;
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
if (prog && !trace_call_bpf(prog, regs))
|
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
head = this_cpu_ptr(call->perf_events);
|
head = this_cpu_ptr(call->perf_events);
|
||||||
|
|
|
@ -559,9 +559,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
|
||||||
static int sys_perf_refcount_enter;
|
static int sys_perf_refcount_enter;
|
||||||
static int sys_perf_refcount_exit;
|
static int sys_perf_refcount_exit;
|
||||||
|
|
||||||
static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
|
static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
|
||||||
struct syscall_metadata *sys_data,
|
struct syscall_metadata *sys_data,
|
||||||
struct syscall_trace_enter *rec) {
|
struct syscall_trace_enter *rec)
|
||||||
|
{
|
||||||
struct syscall_tp_t {
|
struct syscall_tp_t {
|
||||||
unsigned long long regs;
|
unsigned long long regs;
|
||||||
unsigned long syscall_nr;
|
unsigned long syscall_nr;
|
||||||
|
@ -573,7 +574,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
|
||||||
param.syscall_nr = rec->nr;
|
param.syscall_nr = rec->nr;
|
||||||
for (i = 0; i < sys_data->nb_args; i++)
|
for (i = 0; i < sys_data->nb_args; i++)
|
||||||
param.args[i] = rec->args[i];
|
param.args[i] = rec->args[i];
|
||||||
return trace_call_bpf(prog, ¶m);
|
return trace_call_bpf(call, ¶m);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||||
|
@ -581,7 +582,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||||
struct syscall_metadata *sys_data;
|
struct syscall_metadata *sys_data;
|
||||||
struct syscall_trace_enter *rec;
|
struct syscall_trace_enter *rec;
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
struct bpf_prog *prog;
|
bool valid_prog_array;
|
||||||
int syscall_nr;
|
int syscall_nr;
|
||||||
int rctx;
|
int rctx;
|
||||||
int size;
|
int size;
|
||||||
|
@ -596,9 +597,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||||
if (!sys_data)
|
if (!sys_data)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
prog = READ_ONCE(sys_data->enter_event->prog);
|
|
||||||
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
||||||
if (!prog && hlist_empty(head))
|
valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
|
||||||
|
if (!valid_prog_array && hlist_empty(head))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* get the size after alignment with the u32 buffer size field */
|
/* get the size after alignment with the u32 buffer size field */
|
||||||
|
@ -614,7 +615,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||||
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
||||||
(unsigned long *)&rec->args);
|
(unsigned long *)&rec->args);
|
||||||
|
|
||||||
if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
|
if ((valid_prog_array &&
|
||||||
|
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
|
||||||
hlist_empty(head)) {
|
hlist_empty(head)) {
|
||||||
perf_swevent_put_recursion_context(rctx);
|
perf_swevent_put_recursion_context(rctx);
|
||||||
return;
|
return;
|
||||||
|
@ -659,8 +661,9 @@ static void perf_sysenter_disable(struct trace_event_call *call)
|
||||||
mutex_unlock(&syscall_trace_lock);
|
mutex_unlock(&syscall_trace_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
|
static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
|
||||||
struct syscall_trace_exit *rec) {
|
struct syscall_trace_exit *rec)
|
||||||
|
{
|
||||||
struct syscall_tp_t {
|
struct syscall_tp_t {
|
||||||
unsigned long long regs;
|
unsigned long long regs;
|
||||||
unsigned long syscall_nr;
|
unsigned long syscall_nr;
|
||||||
|
@ -670,7 +673,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
|
||||||
*(struct pt_regs **)¶m = regs;
|
*(struct pt_regs **)¶m = regs;
|
||||||
param.syscall_nr = rec->nr;
|
param.syscall_nr = rec->nr;
|
||||||
param.ret = rec->ret;
|
param.ret = rec->ret;
|
||||||
return trace_call_bpf(prog, ¶m);
|
return trace_call_bpf(call, ¶m);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||||
|
@ -678,7 +681,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||||
struct syscall_metadata *sys_data;
|
struct syscall_metadata *sys_data;
|
||||||
struct syscall_trace_exit *rec;
|
struct syscall_trace_exit *rec;
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
struct bpf_prog *prog;
|
bool valid_prog_array;
|
||||||
int syscall_nr;
|
int syscall_nr;
|
||||||
int rctx;
|
int rctx;
|
||||||
int size;
|
int size;
|
||||||
|
@ -693,9 +696,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||||
if (!sys_data)
|
if (!sys_data)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
prog = READ_ONCE(sys_data->exit_event->prog);
|
|
||||||
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
||||||
if (!prog && hlist_empty(head))
|
valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
|
||||||
|
if (!valid_prog_array && hlist_empty(head))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* We can probably do that at build time */
|
/* We can probably do that at build time */
|
||||||
|
@ -709,7 +712,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||||
rec->nr = syscall_nr;
|
rec->nr = syscall_nr;
|
||||||
rec->ret = syscall_get_return_value(current, regs);
|
rec->ret = syscall_get_return_value(current, regs);
|
||||||
|
|
||||||
if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
|
if ((valid_prog_array &&
|
||||||
|
!perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
|
||||||
hlist_empty(head)) {
|
hlist_empty(head)) {
|
||||||
perf_swevent_put_recursion_context(rctx);
|
perf_swevent_put_recursion_context(rctx);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -1113,13 +1113,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
|
||||||
{
|
{
|
||||||
struct trace_event_call *call = &tu->tp.call;
|
struct trace_event_call *call = &tu->tp.call;
|
||||||
struct uprobe_trace_entry_head *entry;
|
struct uprobe_trace_entry_head *entry;
|
||||||
struct bpf_prog *prog = call->prog;
|
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
void *data;
|
void *data;
|
||||||
int size, esize;
|
int size, esize;
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
if (prog && !trace_call_bpf(prog, regs))
|
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||||
|
|
|
@ -23,6 +23,13 @@
|
||||||
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
|
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static void usage(const char *cmd)
|
||||||
|
{
|
||||||
|
printf("USAGE: %s [-i num_progs] [-h]\n", cmd);
|
||||||
|
printf(" -i num_progs # number of progs of the test\n");
|
||||||
|
printf(" -h # help\n");
|
||||||
|
}
|
||||||
|
|
||||||
static void verify_map(int map_id)
|
static void verify_map(int map_id)
|
||||||
{
|
{
|
||||||
__u32 key = 0;
|
__u32 key = 0;
|
||||||
|
@ -32,23 +39,30 @@ static void verify_map(int map_id)
|
||||||
fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
|
fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (val == 0)
|
if (val == 0) {
|
||||||
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
|
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
val = 0;
|
||||||
|
if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) {
|
||||||
|
fprintf(stderr, "map_update failed: %s\n", strerror(errno));
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
static int test(char *filename, int num_progs)
|
||||||
{
|
{
|
||||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
int i, fd, map0_fds[num_progs], map1_fds[num_progs];
|
||||||
char filename[256];
|
|
||||||
int fd;
|
|
||||||
|
|
||||||
setrlimit(RLIMIT_MEMLOCK, &r);
|
|
||||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
|
||||||
|
|
||||||
|
for (i = 0; i < num_progs; i++) {
|
||||||
if (load_bpf_file(filename)) {
|
if (load_bpf_file(filename)) {
|
||||||
fprintf(stderr, "%s", bpf_log_buf);
|
fprintf(stderr, "%s", bpf_log_buf);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
|
||||||
|
map0_fds[i] = map_fd[0];
|
||||||
|
map1_fds[i] = map_fd[1];
|
||||||
|
}
|
||||||
|
|
||||||
/* current load_bpf_file has perf_event_open default pid = -1
|
/* current load_bpf_file has perf_event_open default pid = -1
|
||||||
* and cpu = 0, which permits attached bpf execution on
|
* and cpu = 0, which permits attached bpf execution on
|
||||||
|
@ -64,8 +78,34 @@ int main(int argc, char **argv)
|
||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
/* verify the map */
|
/* verify the map */
|
||||||
verify_map(map_fd[0]);
|
for (i = 0; i < num_progs; i++) {
|
||||||
verify_map(map_fd[1]);
|
verify_map(map0_fds[i]);
|
||||||
|
verify_map(map1_fds[i]);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||||
|
int opt, num_progs = 1;
|
||||||
|
char filename[256];
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, "i:h")) != -1) {
|
||||||
|
switch (opt) {
|
||||||
|
case 'i':
|
||||||
|
num_progs = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setrlimit(RLIMIT_MEMLOCK, &r);
|
||||||
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||||
|
|
||||||
|
return test(filename, num_progs);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue