bpf: Support replacing cgroup-bpf program in MULTI mode
The common use-case in production is to have multiple cgroup-bpf programs per attach type that cover multiple use-cases. Such programs are attached with BPF_F_ALLOW_MULTI and can be maintained by different people. Order of programs usually matters, for example imagine two egress programs: the first one drops packets and the second one counts packets. If they're swapped the result of counting program will be different. It brings operational challenges with updating cgroup-bpf program(s) attached with BPF_F_ALLOW_MULTI since there is no way to replace a program: * One way to update is to detach all programs first and then attach the new version(s) again in the right order. This introduces an interruption in the work a program is doing and may not be acceptable (e.g. if it's egress firewall); * Another way is attach the new version of a program first and only then detach the old version. This introduces the time interval when two versions of same program are working, what may not be acceptable if a program is not idempotent. It also imposes additional burden on program developers to make sure that two versions of their program can co-exist. Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to replace That way user can replace any program among those attached with BPF_F_ALLOW_MULTI flag without the problems described above. Details of the new API: * If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid descriptor of BPF program, BPF_PROG_ATTACH will return corresponding error (EINVAL or EBADF). * If replace_bpf_fd has valid descriptor of BPF program but such a program is not attached to specified cgroup, BPF_PROG_ATTACH will return ENOENT. BPF_F_REPLACE is introduced to make the user intent clear, since replace_bpf_fd alone can't be used for this (its default value, 0, is a valid fd). BPF_F_REPLACE also makes it possible to extend the API in the future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed). Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Andrii Narkyiko <andriin@fb.com> Link: https://lore.kernel.org/bpf/30cd850044a0057bdfcaaf154b7d2f39850ba813.1576741281.git.rdna@fb.com
This commit is contained in:
parent
9fab329d6a
commit
7dd68b3279
|
@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
|
|||
void cgroup_bpf_offline(struct cgroup *cgrp);
|
||||
|
||||
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
struct bpf_prog *replace_prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type);
|
||||
|
@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
|||
|
||||
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
|
||||
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
struct bpf_prog *replace_prog, enum bpf_attach_type type,
|
||||
u32 flags);
|
||||
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
||||
|
|
|
@ -231,6 +231,11 @@ enum bpf_attach_type {
|
|||
* When children program makes decision (like picking TCP CA or sock bind)
|
||||
* parent program has a chance to override it.
|
||||
*
|
||||
* With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
|
||||
* programs for a cgroup. Though it's possible to replace an old program at
|
||||
* any position by also specifying BPF_F_REPLACE flag and position itself in
|
||||
* replace_bpf_fd attribute. Old program at this position will be released.
|
||||
*
|
||||
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
|
||||
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
|
||||
* Ex1:
|
||||
|
@ -249,6 +254,7 @@ enum bpf_attach_type {
|
|||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||
#define BPF_F_REPLACE (1U << 2)
|
||||
|
||||
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||
* verifier will perform strict alignment checking as if the kernel
|
||||
|
@ -442,6 +448,10 @@ union bpf_attr {
|
|||
__u32 attach_bpf_fd; /* eBPF program to attach */
|
||||
__u32 attach_type;
|
||||
__u32 attach_flags;
|
||||
__u32 replace_bpf_fd; /* previously attached eBPF
|
||||
* program to replace if
|
||||
* BPF_F_REPLACE is used
|
||||
*/
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||
|
|
|
@ -282,14 +282,17 @@ cleanup:
|
|||
* propagate the change to descendants
|
||||
* @cgrp: The cgroup which descendants to traverse
|
||||
* @prog: A program to attach
|
||||
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
|
||||
* @type: Type of attach operation
|
||||
* @flags: Option flags
|
||||
*
|
||||
* Must be called with cgroup_mutex held.
|
||||
*/
|
||||
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
struct bpf_prog *replace_prog,
|
||||
enum bpf_attach_type type, u32 flags)
|
||||
{
|
||||
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
struct bpf_prog *old_prog = NULL;
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
|
||||
|
@ -298,14 +301,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|||
enum bpf_cgroup_storage_type stype;
|
||||
int err;
|
||||
|
||||
if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
|
||||
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
|
||||
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
|
||||
/* invalid combination */
|
||||
return -EINVAL;
|
||||
|
||||
if (!hierarchy_allows_attach(cgrp, type))
|
||||
return -EPERM;
|
||||
|
||||
if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
|
||||
if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
|
||||
/* Disallow attaching non-overridable on top
|
||||
* of existing overridable in this cgroup.
|
||||
* Disallow attaching multi-prog if overridable or none
|
||||
|
@ -320,7 +324,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|||
if (pl->prog == prog)
|
||||
/* disallow attaching the same prog twice */
|
||||
return -EINVAL;
|
||||
if (pl->prog == replace_prog)
|
||||
replace_pl = pl;
|
||||
}
|
||||
if ((flags & BPF_F_REPLACE) && !replace_pl)
|
||||
/* prog to replace not found for cgroup */
|
||||
return -ENOENT;
|
||||
} else if (!list_empty(progs)) {
|
||||
replace_pl = list_first_entry(progs, typeof(*pl), node);
|
||||
}
|
||||
|
@ -356,7 +365,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|||
for_each_cgroup_storage_type(stype)
|
||||
pl->storage[stype] = storage[stype];
|
||||
|
||||
cgrp->bpf.flags[type] = flags;
|
||||
cgrp->bpf.flags[type] = saved_flags;
|
||||
|
||||
err = update_effective_progs(cgrp, type);
|
||||
if (err)
|
||||
|
@ -522,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
|||
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
||||
enum bpf_prog_type ptype, struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog *replace_prog = NULL;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
|
@ -529,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
|||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
|
||||
if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
|
||||
(attr->attach_flags & BPF_F_REPLACE)) {
|
||||
replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
|
||||
if (IS_ERR(replace_prog)) {
|
||||
cgroup_put(cgrp);
|
||||
return PTR_ERR(replace_prog);
|
||||
}
|
||||
}
|
||||
|
||||
ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
|
||||
attr->attach_flags);
|
||||
|
||||
if (replace_prog)
|
||||
bpf_prog_put(replace_prog);
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -2073,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
|||
}
|
||||
}
|
||||
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
|
||||
|
||||
#define BPF_F_ATTACH_MASK \
|
||||
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
|
||||
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
|
||||
|
||||
static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
{
|
||||
|
|
|
@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
|
|||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags)
|
||||
struct bpf_prog *replace_prog, enum bpf_attach_type type,
|
||||
u32 flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
|
||||
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -231,6 +231,11 @@ enum bpf_attach_type {
|
|||
* When children program makes decision (like picking TCP CA or sock bind)
|
||||
* parent program has a chance to override it.
|
||||
*
|
||||
* With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
|
||||
* programs for a cgroup. Though it's possible to replace an old program at
|
||||
* any position by also specifying BPF_F_REPLACE flag and position itself in
|
||||
* replace_bpf_fd attribute. Old program at this position will be released.
|
||||
*
|
||||
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
|
||||
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
|
||||
* Ex1:
|
||||
|
@ -249,6 +254,7 @@ enum bpf_attach_type {
|
|||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||
#define BPF_F_REPLACE (1U << 2)
|
||||
|
||||
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||
* verifier will perform strict alignment checking as if the kernel
|
||||
|
@ -442,6 +448,10 @@ union bpf_attr {
|
|||
__u32 attach_bpf_fd; /* eBPF program to attach */
|
||||
__u32 attach_type;
|
||||
__u32 attach_flags;
|
||||
__u32 replace_bpf_fd; /* previously attached eBPF
|
||||
* program to replace if
|
||||
* BPF_F_REPLACE is used
|
||||
*/
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||
|
|
Loading…
Reference in New Issue