bpf: Introduce pinnable bpf_link abstraction
Introduce bpf_link abstraction, representing an attachment of BPF program to a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates ownership of attached BPF program, reference counting of a link itself, when reference from multiple anonymous inodes, as well as ensures that release callback will be called from a process context, so that users can safely take mutex locks and sleep. Additionally, with a new abstraction it's now possible to generalize pinning of a link object in BPF FS, allowing to explicitly prevent BPF program detachment on process exit by pinning it in a BPF FS and let it open from independent other process to keep working with it. Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF program attachments) into utilizing bpf_link framework, making them pinnable in BPF FS. More FD-based bpf_links will be added in follow up patches. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com
This commit is contained in:
parent
775a2be52d
commit
70ed506c3b
|
@ -1056,6 +1056,19 @@ extern int sysctl_unprivileged_bpf_disabled;
|
|||
int bpf_map_new_fd(struct bpf_map *map, int flags);
|
||||
int bpf_prog_new_fd(struct bpf_prog *prog);
|
||||
|
||||
struct bpf_link;
|
||||
|
||||
struct bpf_link_ops {
|
||||
void (*release)(struct bpf_link *link);
|
||||
};
|
||||
|
||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
||||
struct bpf_prog *prog);
|
||||
void bpf_link_inc(struct bpf_link *link);
|
||||
void bpf_link_put(struct bpf_link *link);
|
||||
int bpf_link_new_fd(struct bpf_link *link);
|
||||
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
|
||||
|
||||
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
|
||||
int bpf_obj_get_user(const char __user *pathname, int flags);
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ enum bpf_type {
|
|||
BPF_TYPE_UNSPEC = 0,
|
||||
BPF_TYPE_PROG,
|
||||
BPF_TYPE_MAP,
|
||||
BPF_TYPE_LINK,
|
||||
};
|
||||
|
||||
static void *bpf_any_get(void *raw, enum bpf_type type)
|
||||
|
@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
|
|||
case BPF_TYPE_MAP:
|
||||
bpf_map_inc_with_uref(raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
bpf_link_inc(raw);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
|
@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type)
|
|||
case BPF_TYPE_MAP:
|
||||
bpf_map_put_with_uref(raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
bpf_link_put(raw);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
|
@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
|
|||
{
|
||||
void *raw;
|
||||
|
||||
*type = BPF_TYPE_MAP;
|
||||
raw = bpf_map_get_with_uref(ufd);
|
||||
if (IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_PROG;
|
||||
raw = bpf_prog_get(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_MAP;
|
||||
return raw;
|
||||
}
|
||||
|
||||
raw = bpf_prog_get(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_PROG;
|
||||
return raw;
|
||||
}
|
||||
|
||||
raw = bpf_link_get_from_fd(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_LINK;
|
||||
return raw;
|
||||
}
|
||||
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static const struct inode_operations bpf_dir_iops;
|
||||
|
||||
static const struct inode_operations bpf_prog_iops = { };
|
||||
static const struct inode_operations bpf_map_iops = { };
|
||||
static const struct inode_operations bpf_link_iops = { };
|
||||
|
||||
static struct inode *bpf_get_inode(struct super_block *sb,
|
||||
const struct inode *dir,
|
||||
|
@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
|
|||
*type = BPF_TYPE_PROG;
|
||||
else if (inode->i_op == &bpf_map_iops)
|
||||
*type = BPF_TYPE_MAP;
|
||||
else if (inode->i_op == &bpf_link_iops)
|
||||
*type = BPF_TYPE_LINK;
|
||||
else
|
||||
return -EACCES;
|
||||
|
||||
|
@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
|
|||
&bpffs_map_fops : &bpffs_obj_fops);
|
||||
}
|
||||
|
||||
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
|
||||
{
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
|
||||
&bpffs_obj_fops);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
|
||||
{
|
||||
|
@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
|
|||
case BPF_TYPE_MAP:
|
||||
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
|
||||
break;
|
||||
default:
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
|
|||
ret = bpf_prog_new_fd(raw);
|
||||
else if (type == BPF_TYPE_MAP)
|
||||
ret = bpf_map_new_fd(raw, f_flags);
|
||||
else if (type == BPF_TYPE_LINK)
|
||||
ret = bpf_link_new_fd(raw);
|
||||
else
|
||||
return -ENOENT;
|
||||
|
||||
|
@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
|
|||
|
||||
if (inode->i_op == &bpf_map_iops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (inode->i_op == &bpf_link_iops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (inode->i_op != &bpf_prog_iops)
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
|
|
|
@ -2173,24 +2173,154 @@ static int bpf_obj_get(const union bpf_attr *attr)
|
|||
attr->file_flags);
|
||||
}
|
||||
|
||||
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_prog *prog = filp->private_data;
|
||||
struct bpf_link {
|
||||
atomic64_t refcnt;
|
||||
const struct bpf_link_ops *ops;
|
||||
struct bpf_prog *prog;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
atomic64_set(&link->refcnt, 1);
|
||||
link->ops = ops;
|
||||
link->prog = prog;
|
||||
}
|
||||
|
||||
void bpf_link_inc(struct bpf_link *link)
|
||||
{
|
||||
atomic64_inc(&link->refcnt);
|
||||
}
|
||||
|
||||
/* bpf_link_free is guaranteed to be called from process context */
|
||||
static void bpf_link_free(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* remember prog locally, because release below will free link memory */
|
||||
prog = link->prog;
|
||||
/* extra clean up and kfree of container link struct */
|
||||
link->ops->release(link);
|
||||
/* no more accesing of link members after this point */
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
|
||||
static void bpf_link_put_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_link *link = container_of(work, struct bpf_link, work);
|
||||
|
||||
bpf_link_free(link);
|
||||
}
|
||||
|
||||
/* bpf_link_put can be called from atomic context, but ensures that resources
|
||||
* are freed from process context
|
||||
*/
|
||||
void bpf_link_put(struct bpf_link *link)
|
||||
{
|
||||
if (!atomic64_dec_and_test(&link->refcnt))
|
||||
return;
|
||||
|
||||
if (in_atomic()) {
|
||||
INIT_WORK(&link->work, bpf_link_put_deferred);
|
||||
schedule_work(&link->work);
|
||||
} else {
|
||||
bpf_link_free(link);
|
||||
}
|
||||
}
|
||||
|
||||
static int bpf_link_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_link *link = filp->private_data;
|
||||
|
||||
bpf_link_put(link);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_tracing_prog_fops = {
|
||||
.release = bpf_tracing_prog_release,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops;
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops;
|
||||
static const struct bpf_link_ops bpf_xdp_link_lops;
|
||||
|
||||
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
{
|
||||
const struct bpf_link *link = filp->private_data;
|
||||
const struct bpf_prog *prog = link->prog;
|
||||
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
|
||||
const char *link_type;
|
||||
|
||||
if (link->ops == &bpf_raw_tp_lops)
|
||||
link_type = "raw_tracepoint";
|
||||
else if (link->ops == &bpf_tracing_link_lops)
|
||||
link_type = "tracing";
|
||||
else
|
||||
link_type = "unknown";
|
||||
|
||||
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
||||
seq_printf(m,
|
||||
"link_type:\t%s\n"
|
||||
"prog_tag:\t%s\n"
|
||||
"prog_id:\t%u\n",
|
||||
link_type,
|
||||
prog_tag,
|
||||
prog->aux->id);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct file_operations bpf_link_fops = {
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = bpf_link_show_fdinfo,
|
||||
#endif
|
||||
.release = bpf_link_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
};
|
||||
|
||||
int bpf_link_new_fd(struct bpf_link *link)
|
||||
{
|
||||
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
|
||||
{
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_link *link;
|
||||
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
if (f.file->f_op != &bpf_link_fops) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
link = f.file->private_data;
|
||||
bpf_link_inc(link);
|
||||
fdput(f);
|
||||
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_tracing_link {
|
||||
struct bpf_link link;
|
||||
};
|
||||
|
||||
static void bpf_tracing_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_tracing_link *tr_link =
|
||||
container_of(link, struct bpf_tracing_link, link);
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
|
||||
kfree(tr_link);
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops = {
|
||||
.release = bpf_tracing_link_release,
|
||||
};
|
||||
|
||||
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||
{
|
||||
int tr_fd, err;
|
||||
struct bpf_tracing_link *link;
|
||||
int link_fd, err;
|
||||
|
||||
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
|
||||
prog->expected_attach_type != BPF_TRACE_FEXIT &&
|
||||
|
@ -2199,58 +2329,61 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
|||
goto out_put_prog;
|
||||
}
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err)
|
||||
goto out_put_prog;
|
||||
|
||||
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
|
||||
prog, O_CLOEXEC);
|
||||
if (tr_fd < 0) {
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
err = tr_fd;
|
||||
link = kzalloc(sizeof(*link), GFP_USER);
|
||||
if (!link) {
|
||||
err = -ENOMEM;
|
||||
goto out_put_prog;
|
||||
}
|
||||
return tr_fd;
|
||||
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err)
|
||||
goto out_free_link;
|
||||
|
||||
link_fd = bpf_link_new_fd(&link->link);
|
||||
if (link_fd < 0) {
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
err = link_fd;
|
||||
goto out_free_link;
|
||||
}
|
||||
return link_fd;
|
||||
|
||||
out_free_link:
|
||||
kfree(link);
|
||||
out_put_prog:
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct bpf_raw_tracepoint {
|
||||
struct bpf_raw_tp_link {
|
||||
struct bpf_link link;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
|
||||
static void bpf_raw_tp_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
|
||||
if (raw_tp->prog) {
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
|
||||
bpf_prog_put(raw_tp->prog);
|
||||
}
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
|
||||
bpf_put_raw_tracepoint(raw_tp->btp);
|
||||
kfree(raw_tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_raw_tp_fops = {
|
||||
.release = bpf_raw_tracepoint_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops = {
|
||||
.release = bpf_raw_tp_link_release,
|
||||
};
|
||||
|
||||
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
|
||||
|
||||
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp;
|
||||
struct bpf_raw_tp_link *raw_tp;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct bpf_prog *prog;
|
||||
const char *tp_name;
|
||||
char buf[128];
|
||||
int tp_fd, err;
|
||||
int link_fd, err;
|
||||
|
||||
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
|
||||
return -EINVAL;
|
||||
|
@ -2302,21 +2435,20 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
|||
err = -ENOMEM;
|
||||
goto out_put_btp;
|
||||
}
|
||||
bpf_link_init(&raw_tp->link, &bpf_raw_tp_lops, prog);
|
||||
raw_tp->btp = btp;
|
||||
raw_tp->prog = prog;
|
||||
|
||||
err = bpf_probe_register(raw_tp->btp, prog);
|
||||
if (err)
|
||||
goto out_free_tp;
|
||||
|
||||
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
|
||||
O_CLOEXEC);
|
||||
if (tp_fd < 0) {
|
||||
link_fd = bpf_link_new_fd(&raw_tp->link);
|
||||
if (link_fd < 0) {
|
||||
bpf_probe_unregister(raw_tp->btp, prog);
|
||||
err = tp_fd;
|
||||
err = link_fd;
|
||||
goto out_free_tp;
|
||||
}
|
||||
return tp_fd;
|
||||
return link_fd;
|
||||
|
||||
out_free_tp:
|
||||
kfree(raw_tp);
|
||||
|
@ -3266,16 +3398,22 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
|||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
if (file->f_op == &bpf_link_fops) {
|
||||
struct bpf_link *link = file->private_data;
|
||||
|
||||
if (link->ops == &bpf_raw_tp_lops) {
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
raw_tp->link.prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
goto out_not_supp;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
if (!IS_ERR(event)) {
|
||||
|
@ -3294,6 +3432,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
|||
goto put_file;
|
||||
}
|
||||
|
||||
out_not_supp:
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
|
|
Loading…
Reference in New Issue