Merge branch 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "This contains the cgroup.procs permission check fixes so that they use the credentials at the time of open rather than write, which also fixes the cgroup namespace lifetime bug" * 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: selftests: cgroup: Test open-time cgroup namespace usage for migration checks selftests: cgroup: Test open-time credential usage for migration checks selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644 cgroup: Use open-time cgroup namespace for process migration perm checks cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv cgroup: Use open-time credentials for process migraton perm checks
This commit is contained in:
commit
d1587f7bfe
|
@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
|
|||
return container_of(kfc, struct cgroup_fs_context, kfc);
|
||||
}
|
||||
|
||||
struct cgroup_pidlist;
|
||||
|
||||
struct cgroup_file_ctx {
|
||||
struct cgroup_namespace *ns;
|
||||
|
||||
struct {
|
||||
void *trigger;
|
||||
} psi;
|
||||
|
||||
struct {
|
||||
bool started;
|
||||
struct css_task_iter iter;
|
||||
} procs;
|
||||
|
||||
struct {
|
||||
struct cgroup_pidlist *pidlist;
|
||||
} procs1;
|
||||
};
|
||||
|
||||
/*
|
||||
* A cgroup can be associated with multiple css_sets as different tasks may
|
||||
* belong to different cgroups on different hierarchies. In the other
|
||||
|
|
|
@ -394,6 +394,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
|
|||
* next pid to display, if any
|
||||
*/
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct cgroup *cgrp = seq_css(s)->cgroup;
|
||||
struct cgroup_pidlist *l;
|
||||
enum cgroup_filetype type = seq_cft(s)->private;
|
||||
|
@ -403,25 +404,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
|
|||
mutex_lock(&cgrp->pidlist_mutex);
|
||||
|
||||
/*
|
||||
* !NULL @of->priv indicates that this isn't the first start()
|
||||
* after open. If the matching pidlist is around, we can use that.
|
||||
* Look for it. Note that @of->priv can't be used directly. It
|
||||
* could already have been destroyed.
|
||||
* !NULL @ctx->procs1.pidlist indicates that this isn't the first
|
||||
* start() after open. If the matching pidlist is around, we can use
|
||||
* that. Look for it. Note that @ctx->procs1.pidlist can't be used
|
||||
* directly. It could already have been destroyed.
|
||||
*/
|
||||
if (of->priv)
|
||||
of->priv = cgroup_pidlist_find(cgrp, type);
|
||||
if (ctx->procs1.pidlist)
|
||||
ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
|
||||
|
||||
/*
|
||||
* Either this is the first start() after open or the matching
|
||||
* pidlist has been destroyed inbetween. Create a new one.
|
||||
*/
|
||||
if (!of->priv) {
|
||||
ret = pidlist_array_load(cgrp, type,
|
||||
(struct cgroup_pidlist **)&of->priv);
|
||||
if (!ctx->procs1.pidlist) {
|
||||
ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
l = of->priv;
|
||||
l = ctx->procs1.pidlist;
|
||||
|
||||
if (pid) {
|
||||
int end = l->length;
|
||||
|
@ -449,7 +449,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
|
|||
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
|
||||
{
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct cgroup_pidlist *l = of->priv;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct cgroup_pidlist *l = ctx->procs1.pidlist;
|
||||
|
||||
if (l)
|
||||
mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
|
||||
|
@ -460,7 +461,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
|
|||
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct cgroup_pidlist *l = of->priv;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct cgroup_pidlist *l = ctx->procs1.pidlist;
|
||||
pid_t *p = v;
|
||||
pid_t *end = l->list + l->length;
|
||||
/*
|
||||
|
@ -504,10 +506,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
|
|||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Even if we're attaching all tasks in the thread group, we only
|
||||
* need to check permissions on one of them.
|
||||
* Even if we're attaching all tasks in the thread group, we only need
|
||||
* to check permissions on one of them. Check permissions using the
|
||||
* credentials from file open to protect against inherited fd attacks.
|
||||
*/
|
||||
cred = current_cred();
|
||||
cred = of->file->f_cred;
|
||||
tcred = get_task_cred(task);
|
||||
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
|
||||
!uid_eq(cred->euid, tcred->uid) &&
|
||||
|
|
|
@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
|||
static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, enum psi_res res)
|
||||
{
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct psi_trigger *new;
|
||||
struct cgroup *cgrp;
|
||||
struct psi_group *psi;
|
||||
|
@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
|||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
psi_trigger_replace(&of->priv, new);
|
||||
psi_trigger_replace(&ctx->psi.trigger, new);
|
||||
|
||||
cgroup_put(cgrp);
|
||||
|
||||
|
@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
|
|||
static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
|
||||
poll_table *pt)
|
||||
{
|
||||
return psi_trigger_poll(&of->priv, of->file, pt);
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
|
||||
}
|
||||
|
||||
static void cgroup_pressure_release(struct kernfs_open_file *of)
|
||||
{
|
||||
psi_trigger_replace(&of->priv, NULL);
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, NULL);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
|
@ -3811,24 +3816,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
|
|||
static int cgroup_file_open(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cftype *cft = of_cft(of);
|
||||
struct cgroup_file_ctx *ctx;
|
||||
int ret;
|
||||
|
||||
if (cft->open)
|
||||
return cft->open(of);
|
||||
return 0;
|
||||
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->ns = current->nsproxy->cgroup_ns;
|
||||
get_cgroup_ns(ctx->ns);
|
||||
of->priv = ctx;
|
||||
|
||||
if (!cft->open)
|
||||
return 0;
|
||||
|
||||
ret = cft->open(of);
|
||||
if (ret) {
|
||||
put_cgroup_ns(ctx->ns);
|
||||
kfree(ctx);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cgroup_file_release(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cftype *cft = of_cft(of);
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
if (cft->release)
|
||||
cft->release(of);
|
||||
put_cgroup_ns(ctx->ns);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct cgroup *cgrp = of->kn->parent->priv;
|
||||
struct cftype *cft = of_cft(of);
|
||||
struct cgroup_subsys_state *css;
|
||||
|
@ -3845,7 +3869,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
|
|||
*/
|
||||
if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
|
||||
!(cft->flags & CFTYPE_NS_DELEGATABLE) &&
|
||||
ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
|
||||
ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
|
||||
return -EPERM;
|
||||
|
||||
if (cft->write)
|
||||
|
@ -4751,21 +4775,21 @@ void css_task_iter_end(struct css_task_iter *it)
|
|||
|
||||
static void cgroup_procs_release(struct kernfs_open_file *of)
|
||||
{
|
||||
if (of->priv) {
|
||||
css_task_iter_end(of->priv);
|
||||
kfree(of->priv);
|
||||
}
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
if (ctx->procs.started)
|
||||
css_task_iter_end(&ctx->procs.iter);
|
||||
}
|
||||
|
||||
static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct css_task_iter *it = of->priv;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
if (pos)
|
||||
(*pos)++;
|
||||
|
||||
return css_task_iter_next(it);
|
||||
return css_task_iter_next(&ctx->procs.iter);
|
||||
}
|
||||
|
||||
static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
||||
|
@ -4773,21 +4797,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
|||
{
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct cgroup *cgrp = seq_css(s)->cgroup;
|
||||
struct css_task_iter *it = of->priv;
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct css_task_iter *it = &ctx->procs.iter;
|
||||
|
||||
/*
|
||||
* When a seq_file is seeked, it's always traversed sequentially
|
||||
* from position 0, so we can simply keep iterating on !0 *pos.
|
||||
*/
|
||||
if (!it) {
|
||||
if (!ctx->procs.started) {
|
||||
if (WARN_ON_ONCE((*pos)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
it = kzalloc(sizeof(*it), GFP_KERNEL);
|
||||
if (!it)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
of->priv = it;
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
ctx->procs.started = true;
|
||||
} else if (!(*pos)) {
|
||||
css_task_iter_end(it);
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
|
@ -4838,9 +4859,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
|
|||
|
||||
static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
|
||||
struct cgroup *dst_cgrp,
|
||||
struct super_block *sb)
|
||||
struct super_block *sb,
|
||||
struct cgroup_namespace *ns)
|
||||
{
|
||||
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
|
||||
struct cgroup *com_cgrp = src_cgrp;
|
||||
int ret;
|
||||
|
||||
|
@ -4869,11 +4890,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
|
|||
|
||||
static int cgroup_attach_permissions(struct cgroup *src_cgrp,
|
||||
struct cgroup *dst_cgrp,
|
||||
struct super_block *sb, bool threadgroup)
|
||||
struct super_block *sb, bool threadgroup,
|
||||
struct cgroup_namespace *ns)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb);
|
||||
ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -4890,8 +4912,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
|
|||
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
|
||||
bool threadgroup)
|
||||
{
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
struct cgroup *src_cgrp, *dst_cgrp;
|
||||
struct task_struct *task;
|
||||
const struct cred *saved_cred;
|
||||
ssize_t ret;
|
||||
bool locked;
|
||||
|
||||
|
@ -4909,9 +4933,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
|
|||
src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
/* process and thread migrations follow same delegation rule */
|
||||
/*
|
||||
* Process and thread migrations follow same delegation rule. Check
|
||||
* permissions using the credentials from file open to protect against
|
||||
* inherited fd attacks.
|
||||
*/
|
||||
saved_cred = override_creds(of->file->f_cred);
|
||||
ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
|
||||
of->file->f_path.dentry->d_sb, threadgroup);
|
||||
of->file->f_path.dentry->d_sb,
|
||||
threadgroup, ctx->ns);
|
||||
revert_creds(saved_cred);
|
||||
if (ret)
|
||||
goto out_finish;
|
||||
|
||||
|
@ -6130,7 +6161,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
|
|||
goto err;
|
||||
|
||||
ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
|
||||
!(kargs->flags & CLONE_THREAD));
|
||||
!(kargs->flags & CLONE_THREAD),
|
||||
current->nsproxy->cgroup_ns);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
|
|
@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)
|
|||
|
||||
int cg_create(const char *cgroup)
|
||||
{
|
||||
return mkdir(cgroup, 0644);
|
||||
return mkdir(cgroup, 0755);
|
||||
}
|
||||
|
||||
int cg_wait_for_proc_count(const char *cgroup, int count)
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <linux/limits.h>
|
||||
#include <linux/sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
|
@ -674,6 +677,166 @@ cleanup:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cgroup migration permission check should be performed based on the
|
||||
* credentials at the time of open instead of write.
|
||||
*/
|
||||
static int test_cgcore_lesser_euid_open(const char *root)
|
||||
{
|
||||
const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
|
||||
int ret = KSFT_FAIL;
|
||||
char *cg_test_a = NULL, *cg_test_b = NULL;
|
||||
char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
|
||||
int cg_test_b_procs_fd = -1;
|
||||
uid_t saved_uid;
|
||||
|
||||
cg_test_a = cg_name(root, "cg_test_a");
|
||||
cg_test_b = cg_name(root, "cg_test_b");
|
||||
|
||||
if (!cg_test_a || !cg_test_b)
|
||||
goto cleanup;
|
||||
|
||||
cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
|
||||
cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
|
||||
|
||||
if (!cg_test_a_procs || !cg_test_b_procs)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cg_test_a) || cg_create(cg_test_b))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_enter_current(cg_test_a))
|
||||
goto cleanup;
|
||||
|
||||
if (chown(cg_test_a_procs, test_euid, -1) ||
|
||||
chown(cg_test_b_procs, test_euid, -1))
|
||||
goto cleanup;
|
||||
|
||||
saved_uid = geteuid();
|
||||
if (seteuid(test_euid))
|
||||
goto cleanup;
|
||||
|
||||
cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
|
||||
|
||||
if (seteuid(saved_uid))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_test_b_procs_fd < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
cg_enter_current(root);
|
||||
if (cg_test_b_procs_fd >= 0)
|
||||
close(cg_test_b_procs_fd);
|
||||
if (cg_test_b)
|
||||
cg_destroy(cg_test_b);
|
||||
if (cg_test_a)
|
||||
cg_destroy(cg_test_a);
|
||||
free(cg_test_b_procs);
|
||||
free(cg_test_a_procs);
|
||||
free(cg_test_b);
|
||||
free(cg_test_a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct lesser_ns_open_thread_arg {
|
||||
const char *path;
|
||||
int fd;
|
||||
int err;
|
||||
};
|
||||
|
||||
static int lesser_ns_open_thread_fn(void *arg)
|
||||
{
|
||||
struct lesser_ns_open_thread_arg *targ = arg;
|
||||
|
||||
targ->fd = open(targ->path, O_RDWR);
|
||||
targ->err = errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cgroup migration permission check should be performed based on the cgroup
|
||||
* namespace at the time of open instead of write.
|
||||
*/
|
||||
static int test_cgcore_lesser_ns_open(const char *root)
|
||||
{
|
||||
static char stack[65536];
|
||||
const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
|
||||
int ret = KSFT_FAIL;
|
||||
char *cg_test_a = NULL, *cg_test_b = NULL;
|
||||
char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
|
||||
int cg_test_b_procs_fd = -1;
|
||||
struct lesser_ns_open_thread_arg targ = { .fd = -1 };
|
||||
pid_t pid;
|
||||
int status;
|
||||
|
||||
cg_test_a = cg_name(root, "cg_test_a");
|
||||
cg_test_b = cg_name(root, "cg_test_b");
|
||||
|
||||
if (!cg_test_a || !cg_test_b)
|
||||
goto cleanup;
|
||||
|
||||
cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
|
||||
cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
|
||||
|
||||
if (!cg_test_a_procs || !cg_test_b_procs)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cg_test_a) || cg_create(cg_test_b))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_enter_current(cg_test_b))
|
||||
goto cleanup;
|
||||
|
||||
if (chown(cg_test_a_procs, test_euid, -1) ||
|
||||
chown(cg_test_b_procs, test_euid, -1))
|
||||
goto cleanup;
|
||||
|
||||
targ.path = cg_test_b_procs;
|
||||
pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
|
||||
CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
|
||||
&targ);
|
||||
if (pid < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (waitpid(pid, &status, 0) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (!WIFEXITED(status))
|
||||
goto cleanup;
|
||||
|
||||
cg_test_b_procs_fd = targ.fd;
|
||||
if (cg_test_b_procs_fd < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_enter_current(cg_test_a))
|
||||
goto cleanup;
|
||||
|
||||
if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
cg_enter_current(root);
|
||||
if (cg_test_b_procs_fd >= 0)
|
||||
close(cg_test_b_procs_fd);
|
||||
if (cg_test_b)
|
||||
cg_destroy(cg_test_b);
|
||||
if (cg_test_a)
|
||||
cg_destroy(cg_test_a);
|
||||
free(cg_test_b_procs);
|
||||
free(cg_test_a_procs);
|
||||
free(cg_test_b);
|
||||
free(cg_test_a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define T(x) { x, #x }
|
||||
struct corecg_test {
|
||||
int (*fn)(const char *root);
|
||||
|
@ -689,6 +852,8 @@ struct corecg_test {
|
|||
T(test_cgcore_proc_migration),
|
||||
T(test_cgcore_thread_migration),
|
||||
T(test_cgcore_destroy),
|
||||
T(test_cgcore_lesser_euid_open),
|
||||
T(test_cgcore_lesser_ns_open),
|
||||
};
|
||||
#undef T
|
||||
|
||||
|
|
Loading…
Reference in New Issue