bpf: Introduce pinnable bpf_link abstraction

Introduce bpf_link abstraction, representing an attachment of BPF program to
a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates
ownership of attached BPF program, reference counting of a link itself, when
reference from multiple anonymous inodes, as well as ensures that release
callback will be called from a process context, so that users can safely take
mutex locks and sleep.

Additionally, with a new abstraction it's now possible to generalize pinning
of a link object in BPF FS, allowing to explicitly prevent BPF program
detachment on process exit by pinning it in a BPF FS and let it open from
independent other process to keep working with it.

Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF
program attachments) into utilizing bpf_link framework, making them pinnable
in BPF FS. More FD-based bpf_links will be added in follow up patches.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com
This commit is contained in:
Andrii Nakryiko 2020-03-02 20:31:57 -08:00 committed by Alexei Starovoitov
parent 775a2be52d
commit 70ed506c3b
3 changed files with 237 additions and 51 deletions

View File

@ -1056,6 +1056,19 @@ extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_prog_new_fd(struct bpf_prog *prog);
struct bpf_link;
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
};
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog);
void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags); int bpf_obj_get_user(const char __user *pathname, int flags);

View File

@ -25,6 +25,7 @@ enum bpf_type {
BPF_TYPE_UNSPEC = 0, BPF_TYPE_UNSPEC = 0,
BPF_TYPE_PROG, BPF_TYPE_PROG,
BPF_TYPE_MAP, BPF_TYPE_MAP,
BPF_TYPE_LINK,
}; };
static void *bpf_any_get(void *raw, enum bpf_type type) static void *bpf_any_get(void *raw, enum bpf_type type)
@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
case BPF_TYPE_MAP: case BPF_TYPE_MAP:
bpf_map_inc_with_uref(raw); bpf_map_inc_with_uref(raw);
break; break;
case BPF_TYPE_LINK:
bpf_link_inc(raw);
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
break; break;
@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type)
case BPF_TYPE_MAP: case BPF_TYPE_MAP:
bpf_map_put_with_uref(raw); bpf_map_put_with_uref(raw);
break; break;
case BPF_TYPE_LINK:
bpf_link_put(raw);
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
break; break;
@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
{ {
void *raw; void *raw;
*type = BPF_TYPE_MAP;
raw = bpf_map_get_with_uref(ufd); raw = bpf_map_get_with_uref(ufd);
if (IS_ERR(raw)) { if (!IS_ERR(raw)) {
*type = BPF_TYPE_PROG; *type = BPF_TYPE_MAP;
raw = bpf_prog_get(ufd); return raw;
} }
return raw; raw = bpf_prog_get(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
return raw;
}
raw = bpf_link_get_from_fd(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_LINK;
return raw;
}
return ERR_PTR(-EINVAL);
} }
static const struct inode_operations bpf_dir_iops; static const struct inode_operations bpf_dir_iops;
static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
static struct inode *bpf_get_inode(struct super_block *sb, static struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir, const struct inode *dir,
@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
*type = BPF_TYPE_PROG; *type = BPF_TYPE_PROG;
else if (inode->i_op == &bpf_map_iops) else if (inode->i_op == &bpf_map_iops)
*type = BPF_TYPE_MAP; *type = BPF_TYPE_MAP;
else if (inode->i_op == &bpf_link_iops)
*type = BPF_TYPE_LINK;
else else
return -EACCES; return -EACCES;
@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
&bpffs_map_fops : &bpffs_obj_fops); &bpffs_map_fops : &bpffs_obj_fops);
} }
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
{
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
&bpffs_obj_fops);
}
static struct dentry * static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{ {
@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
case BPF_TYPE_MAP: case BPF_TYPE_MAP:
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
break; break;
case BPF_TYPE_LINK:
ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
break;
default: default:
ret = -EPERM; ret = -EPERM;
} }
@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
ret = bpf_prog_new_fd(raw); ret = bpf_prog_new_fd(raw);
else if (type == BPF_TYPE_MAP) else if (type == BPF_TYPE_MAP)
ret = bpf_map_new_fd(raw, f_flags); ret = bpf_map_new_fd(raw, f_flags);
else if (type == BPF_TYPE_LINK)
ret = bpf_link_new_fd(raw);
else else
return -ENOENT; return -ENOENT;
@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
if (inode->i_op == &bpf_map_iops) if (inode->i_op == &bpf_map_iops)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (inode->i_op == &bpf_link_iops)
return ERR_PTR(-EINVAL);
if (inode->i_op != &bpf_prog_iops) if (inode->i_op != &bpf_prog_iops)
return ERR_PTR(-EACCES); return ERR_PTR(-EACCES);

View File

@ -2173,24 +2173,154 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags); attr->file_flags);
} }
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) struct bpf_link {
{ atomic64_t refcnt;
struct bpf_prog *prog = filp->private_data; const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog)
{
atomic64_set(&link->refcnt, 1);
link->ops = ops;
link->prog = prog;
}
void bpf_link_inc(struct bpf_link *link)
{
atomic64_inc(&link->refcnt);
}
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
struct bpf_prog *prog;
/* remember prog locally, because release below will free link memory */
prog = link->prog;
/* extra clean up and kfree of container link struct */
link->ops->release(link);
/* no more accesing of link members after this point */
bpf_prog_put(prog); bpf_prog_put(prog);
}
static void bpf_link_put_deferred(struct work_struct *work)
{
struct bpf_link *link = container_of(work, struct bpf_link, work);
bpf_link_free(link);
}
/* bpf_link_put can be called from atomic context, but ensures that resources
* are freed from process context
*/
void bpf_link_put(struct bpf_link *link)
{
if (!atomic64_dec_and_test(&link->refcnt))
return;
if (in_atomic()) {
INIT_WORK(&link->work, bpf_link_put_deferred);
schedule_work(&link->work);
} else {
bpf_link_free(link);
}
}
static int bpf_link_release(struct inode *inode, struct file *filp)
{
struct bpf_link *link = filp->private_data;
bpf_link_put(link);
return 0; return 0;
} }
static const struct file_operations bpf_tracing_prog_fops = { #ifdef CONFIG_PROC_FS
.release = bpf_tracing_prog_release, static const struct bpf_link_ops bpf_raw_tp_lops;
static const struct bpf_link_ops bpf_tracing_link_lops;
static const struct bpf_link_ops bpf_xdp_link_lops;
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_link *link = filp->private_data;
const struct bpf_prog *prog = link->prog;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
const char *link_type;
if (link->ops == &bpf_raw_tp_lops)
link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing";
else
link_type = "unknown";
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m,
"link_type:\t%s\n"
"prog_tag:\t%s\n"
"prog_id:\t%u\n",
link_type,
prog_tag,
prog->aux->id);
}
#endif
const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
#endif
.release = bpf_link_release,
.read = bpf_dummy_read, .read = bpf_dummy_read,
.write = bpf_dummy_write, .write = bpf_dummy_write,
}; };
int bpf_link_new_fd(struct bpf_link *link)
{
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_link *link;
if (!f.file)
return ERR_PTR(-EBADF);
if (f.file->f_op != &bpf_link_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}
link = f.file->private_data;
bpf_link_inc(link);
fdput(f);
return link;
}
struct bpf_tracing_link {
struct bpf_link link;
};
static void bpf_tracing_link_release(struct bpf_link *link)
{
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link);
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
kfree(tr_link);
}
static const struct bpf_link_ops bpf_tracing_link_lops = {
.release = bpf_tracing_link_release,
};
static int bpf_tracing_prog_attach(struct bpf_prog *prog) static int bpf_tracing_prog_attach(struct bpf_prog *prog)
{ {
int tr_fd, err; struct bpf_tracing_link *link;
int link_fd, err;
if (prog->expected_attach_type != BPF_TRACE_FENTRY && if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT && prog->expected_attach_type != BPF_TRACE_FEXIT &&
@ -2199,58 +2329,61 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
goto out_put_prog; goto out_put_prog;
} }
err = bpf_trampoline_link_prog(prog); link = kzalloc(sizeof(*link), GFP_USER);
if (err) if (!link) {
goto out_put_prog; err = -ENOMEM;
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
prog, O_CLOEXEC);
if (tr_fd < 0) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
err = tr_fd;
goto out_put_prog; goto out_put_prog;
} }
return tr_fd; bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
err = bpf_trampoline_link_prog(prog);
if (err)
goto out_free_link;
link_fd = bpf_link_new_fd(&link->link);
if (link_fd < 0) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
err = link_fd;
goto out_free_link;
}
return link_fd;
out_free_link:
kfree(link);
out_put_prog: out_put_prog:
bpf_prog_put(prog); bpf_prog_put(prog);
return err; return err;
} }
struct bpf_raw_tracepoint { struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp; struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
}; };
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) static void bpf_raw_tp_link_release(struct bpf_link *link)
{ {
struct bpf_raw_tracepoint *raw_tp = filp->private_data; struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
if (raw_tp->prog) { bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
bpf_put_raw_tracepoint(raw_tp->btp); bpf_put_raw_tracepoint(raw_tp->btp);
kfree(raw_tp); kfree(raw_tp);
return 0;
} }
static const struct file_operations bpf_raw_tp_fops = { static const struct bpf_link_ops bpf_raw_tp_lops = {
.release = bpf_raw_tracepoint_release, .release = bpf_raw_tp_link_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
}; };
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr) static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{ {
struct bpf_raw_tracepoint *raw_tp; struct bpf_raw_tp_link *raw_tp;
struct bpf_raw_event_map *btp; struct bpf_raw_event_map *btp;
struct bpf_prog *prog; struct bpf_prog *prog;
const char *tp_name; const char *tp_name;
char buf[128]; char buf[128];
int tp_fd, err; int link_fd, err;
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
return -EINVAL; return -EINVAL;
@ -2302,21 +2435,20 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
err = -ENOMEM; err = -ENOMEM;
goto out_put_btp; goto out_put_btp;
} }
bpf_link_init(&raw_tp->link, &bpf_raw_tp_lops, prog);
raw_tp->btp = btp; raw_tp->btp = btp;
raw_tp->prog = prog;
err = bpf_probe_register(raw_tp->btp, prog); err = bpf_probe_register(raw_tp->btp, prog);
if (err) if (err)
goto out_free_tp; goto out_free_tp;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, link_fd = bpf_link_new_fd(&raw_tp->link);
O_CLOEXEC); if (link_fd < 0) {
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog); bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd; err = link_fd;
goto out_free_tp; goto out_free_tp;
} }
return tp_fd; return link_fd;
out_free_tp: out_free_tp:
kfree(raw_tp); kfree(raw_tp);
@ -3266,15 +3398,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (err) if (err)
goto out; goto out;
if (file->f_op == &bpf_raw_tp_fops) { if (file->f_op == &bpf_link_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data; struct bpf_link *link = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr, if (link->ops == &bpf_raw_tp_lops) {
raw_tp->prog->aux->id, struct bpf_raw_tp_link *raw_tp =
BPF_FD_TYPE_RAW_TRACEPOINT, container_of(link, struct bpf_raw_tp_link, link);
btp->tp->name, 0, 0); struct bpf_raw_event_map *btp = raw_tp->btp;
goto put_file;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->link.prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
goto out_not_supp;
} }
event = perf_get_event(file); event = perf_get_event(file);
@ -3294,6 +3432,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
goto put_file; goto put_file;
} }
out_not_supp:
err = -ENOTSUPP; err = -ENOTSUPP;
put_file: put_file:
fput(file); fput(file);