Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "This includes Roman's cgroup2 freezer implementation. It's a separate machanism from cgroup1 freezer. Instead of blocking user tasks in arbitrary uninterruptible sleeps, the new implementation extends jobctl stop - frozen tasks are trapped in jobctl stop until thawed and can be killed and ptraced. Lots of thanks to Oleg for sheperding the effort. Other than that, there are a few trivial changes" * 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: never call do_group_exit() with task->frozen bit set kernel: cgroup: fix misuse of %x cgroup: get rid of cgroup_freezer_frozen_exit() cgroup: prevent spurious transition into non-frozen state cgroup: Remove unused cgrp variable cgroup: document cgroup v2 freezer interface cgroup: add tracing points for cgroup v2 freezer cgroup: make TRACE_CGROUP_PATH irq-safe kselftests: cgroup: add freezer controller self-tests kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy() cgroup: cgroup v2 freezer cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock cgroup: implement __cgroup_task_count() helper cgroup: rename freezer.c into legacy_freezer.c cgroup: remove extra cgroup_migrate_finish() call
This commit is contained in:
commit
abde77eb5c
|
@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
|
|||
populated
|
||||
1 if the cgroup or its descendants contains any live
|
||||
processes; otherwise, 0.
|
||||
frozen
|
||||
1 if the cgroup is frozen; otherwise, 0.
|
||||
|
||||
cgroup.max.descendants
|
||||
A read-write single value files. The default is "max".
|
||||
|
@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
|
|||
A dying cgroup can consume system resources not exceeding
|
||||
limits, which were active at the moment of cgroup deletion.
|
||||
|
||||
cgroup.freeze
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
Allowed values are "0" and "1". The default is "0".
|
||||
|
||||
Writing "1" to the file causes freezing of the cgroup and all
|
||||
descendant cgroups. This means that all belonging processes will
|
||||
be stopped and will not run until the cgroup will be explicitly
|
||||
unfrozen. Freezing of the cgroup may take some time; when this action
|
||||
is completed, the "frozen" value in the cgroup.events control file
|
||||
will be updated to "1" and the corresponding notification will be
|
||||
issued.
|
||||
|
||||
A cgroup can be frozen either by its own settings, or by settings
|
||||
of any ancestor cgroups. If any of ancestor cgroups is frozen, the
|
||||
cgroup will remain frozen.
|
||||
|
||||
Processes in the frozen cgroup can be killed by a fatal signal.
|
||||
They also can enter and leave a frozen cgroup: either by an explicit
|
||||
move by a user, or if freezing of the cgroup races with fork().
|
||||
If a process is moved to a frozen cgroup, it stops. If a process is
|
||||
moved out of a frozen cgroup, it becomes running.
|
||||
|
||||
Frozen status of a cgroup doesn't affect any cgroup tree operations:
|
||||
it's possible to delete a frozen (and empty) cgroup, as well as
|
||||
create new sub-cgroups.
|
||||
|
||||
Controllers
|
||||
===========
|
||||
|
|
|
@ -65,6 +65,12 @@ enum {
|
|||
* specified at mount time and thus is implemented here.
|
||||
*/
|
||||
CGRP_CPUSET_CLONE_CHILDREN,
|
||||
|
||||
/* Control group has to be frozen. */
|
||||
CGRP_FREEZE,
|
||||
|
||||
/* Cgroup is frozen. */
|
||||
CGRP_FROZEN,
|
||||
};
|
||||
|
||||
/* cgroup_root->flags */
|
||||
|
@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
|
|||
struct cgroup *updated_next; /* NULL iff not on the list */
|
||||
};
|
||||
|
||||
struct cgroup_freezer_state {
|
||||
/* Should the cgroup and its descendants be frozen. */
|
||||
bool freeze;
|
||||
|
||||
/* Should the cgroup actually be frozen? */
|
||||
int e_freeze;
|
||||
|
||||
/* Fields below are protected by css_set_lock */
|
||||
|
||||
/* Number of frozen descendant cgroups */
|
||||
int nr_frozen_descendants;
|
||||
|
||||
/*
|
||||
* Number of tasks, which are counted as frozen:
|
||||
* frozen, SIGSTOPped, and PTRACEd.
|
||||
*/
|
||||
int nr_frozen_tasks;
|
||||
};
|
||||
|
||||
struct cgroup {
|
||||
/* self css with NULL ->ss, points back to this cgroup */
|
||||
struct cgroup_subsys_state self;
|
||||
|
@ -349,6 +374,11 @@ struct cgroup {
|
|||
* Dying cgroups are cgroups which were deleted by a user,
|
||||
* but are still existing because someone else is holding a reference.
|
||||
* max_descendants is a maximum allowed number of descent cgroups.
|
||||
*
|
||||
* nr_descendants and nr_dying_descendants are protected
|
||||
* by cgroup_mutex and css_set_lock. It's fine to read them holding
|
||||
* any of cgroup_mutex and css_set_lock; for writing both locks
|
||||
* should be held.
|
||||
*/
|
||||
int nr_descendants;
|
||||
int nr_dying_descendants;
|
||||
|
@ -448,6 +478,9 @@ struct cgroup {
|
|||
/* If there is block congestion on this cgroup. */
|
||||
atomic_t congestion_count;
|
||||
|
||||
/* Used to store internal freezer state */
|
||||
struct cgroup_freezer_state freezer;
|
||||
|
||||
/* ids of the ancestors at each level including self */
|
||||
int ancestor_ids[];
|
||||
};
|
||||
|
|
|
@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
|
|||
free_cgroup_ns(ns);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
||||
void cgroup_enter_frozen(void);
|
||||
void cgroup_leave_frozen(bool always_leave);
|
||||
void cgroup_update_frozen(struct cgroup *cgrp);
|
||||
void cgroup_freeze(struct cgroup *cgrp, bool freeze);
|
||||
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
|
||||
struct cgroup *dst);
|
||||
|
||||
static inline bool cgroup_task_freeze(struct task_struct *task)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (task->flags & PF_KTHREAD)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool cgroup_task_frozen(struct task_struct *task)
|
||||
{
|
||||
return task->frozen;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_CGROUPS */
|
||||
|
||||
static inline void cgroup_enter_frozen(void) { }
|
||||
static inline void cgroup_leave_frozen(bool always_leave) { }
|
||||
static inline bool cgroup_task_freeze(struct task_struct *task)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool cgroup_task_frozen(struct task_struct *task)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_CGROUPS */
|
||||
|
||||
#endif /* _LINUX_CGROUP_H */
|
||||
|
|
|
@ -726,6 +726,8 @@ struct task_struct {
|
|||
#ifdef CONFIG_CGROUPS
|
||||
/* disallow userland-initiated cgroup migration */
|
||||
unsigned no_cgroup_migration:1;
|
||||
/* task is frozen/stopped (used by the cgroup freezer) */
|
||||
unsigned frozen:1;
|
||||
#endif
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
/* to be used once the psi infrastructure lands upstream. */
|
||||
|
|
|
@ -18,6 +18,7 @@ struct task_struct;
|
|||
#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
|
||||
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
|
||||
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
|
||||
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
|
||||
|
||||
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
|
||||
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
|
||||
|
@ -26,6 +27,7 @@ struct task_struct;
|
|||
#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
|
||||
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
|
||||
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
|
||||
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
|
||||
|
||||
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
|
||||
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
|
||||
|
|
|
@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
|
|||
TP_ARGS(cgrp, path)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(cgroup, cgroup_freeze,
|
||||
|
||||
TP_PROTO(struct cgroup *cgrp, const char *path),
|
||||
|
||||
TP_ARGS(cgrp, path)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(cgroup, cgroup_unfreeze,
|
||||
|
||||
TP_PROTO(struct cgroup *cgrp, const char *path),
|
||||
|
||||
TP_ARGS(cgrp, path)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(cgroup_migrate,
|
||||
|
||||
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
||||
|
@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
|
|||
TP_ARGS(dst_cgrp, path, task, threadgroup)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(cgroup_event,
|
||||
|
||||
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
||||
|
||||
TP_ARGS(cgrp, path, val),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, root )
|
||||
__field( int, id )
|
||||
__field( int, level )
|
||||
__string( path, path )
|
||||
__field( int, val )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->root = cgrp->root->hierarchy_id;
|
||||
__entry->id = cgrp->id;
|
||||
__entry->level = cgrp->level;
|
||||
__assign_str(path, path);
|
||||
__entry->val = val;
|
||||
),
|
||||
|
||||
TP_printk("root=%d id=%d level=%d path=%s val=%d",
|
||||
__entry->root, __entry->id, __entry->level, __get_str(path),
|
||||
__entry->val)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
|
||||
|
||||
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
||||
|
||||
TP_ARGS(cgrp, path, val)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
|
||||
|
||||
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
||||
|
||||
TP_ARGS(cgrp, path, val)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_CGROUP_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
|
||||
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
|
||||
|
||||
obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
|
||||
obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
|
||||
obj-$(CONFIG_CGROUP_PIDS) += pids.o
|
||||
obj-$(CONFIG_CGROUP_RDMA) += rdma.o
|
||||
obj-$(CONFIG_CPUSETS) += cpuset.o
|
||||
|
|
|
@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
|
|||
#define TRACE_CGROUP_PATH(type, cgrp, ...) \
|
||||
do { \
|
||||
if (trace_cgroup_##type##_enabled()) { \
|
||||
spin_lock(&trace_cgroup_path_lock); \
|
||||
unsigned long flags; \
|
||||
spin_lock_irqsave(&trace_cgroup_path_lock, \
|
||||
flags); \
|
||||
cgroup_path(cgrp, trace_cgroup_path, \
|
||||
TRACE_CGROUP_PATH_LEN); \
|
||||
trace_cgroup_##type(cgrp, trace_cgroup_path, \
|
||||
##__VA_ARGS__); \
|
||||
spin_unlock(&trace_cgroup_path_lock); \
|
||||
spin_unlock_irqrestore(&trace_cgroup_path_lock, \
|
||||
flags); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
|
|||
int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
|
||||
struct kernfs_root *kf_root);
|
||||
|
||||
int __cgroup_task_count(const struct cgroup *cgrp);
|
||||
int cgroup_task_count(const struct cgroup *cgrp);
|
||||
|
||||
/*
|
||||
|
|
|
@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
|
|||
return l;
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_task_count - count the number of tasks in a cgroup.
|
||||
* @cgrp: the cgroup in question
|
||||
*/
|
||||
int cgroup_task_count(const struct cgroup *cgrp)
|
||||
{
|
||||
int count = 0;
|
||||
struct cgrp_cset_link *link;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
list_for_each_entry(link, &cgrp->cset_links, cset_link)
|
||||
count += link->cset->nr_tasks;
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a cgroup's pidarray with either procs' tgids or tasks' pids
|
||||
*/
|
||||
|
|
|
@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
|
|||
css_get(&cgrp->self);
|
||||
}
|
||||
|
||||
/**
|
||||
* __cgroup_task_count - count the number of tasks in a cgroup. The caller
|
||||
* is responsible for taking the css_set_lock.
|
||||
* @cgrp: the cgroup in question
|
||||
*/
|
||||
int __cgroup_task_count(const struct cgroup *cgrp)
|
||||
{
|
||||
int count = 0;
|
||||
struct cgrp_cset_link *link;
|
||||
|
||||
lockdep_assert_held(&css_set_lock);
|
||||
|
||||
list_for_each_entry(link, &cgrp->cset_links, cset_link)
|
||||
count += link->cset->nr_tasks;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_task_count - count the number of tasks in a cgroup.
|
||||
* @cgrp: the cgroup in question
|
||||
*/
|
||||
int cgroup_task_count(const struct cgroup *cgrp)
|
||||
{
|
||||
int count;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
count = __cgroup_task_count(cgrp);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cgroup *cgrp = of->kn->parent->priv;
|
||||
|
@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
|
|||
break;
|
||||
|
||||
cgroup1_check_for_release(cgrp);
|
||||
TRACE_CGROUP_PATH(notify_populated, cgrp,
|
||||
cgroup_is_populated(cgrp));
|
||||
cgroup_file_notify(&cgrp->events_file);
|
||||
|
||||
child = cgrp;
|
||||
|
@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
|
|||
get_css_set(to_cset);
|
||||
to_cset->nr_tasks++;
|
||||
css_set_move_task(task, from_cset, to_cset, true);
|
||||
put_css_set_locked(from_cset);
|
||||
from_cset->nr_tasks--;
|
||||
/*
|
||||
* If the source or destination cgroup is frozen,
|
||||
* the task might require to change its state.
|
||||
*/
|
||||
cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
|
||||
to_cset->dfl_cgrp);
|
||||
put_css_set_locked(from_cset);
|
||||
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
|
|||
|
||||
dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
|
||||
if (!dst_cset)
|
||||
goto err;
|
||||
return -ENOMEM;
|
||||
|
||||
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
|
||||
|
||||
|
@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
|
|||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
cgroup_migrate_finish(mgctx);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
|
|||
|
||||
static int cgroup_events_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
seq_printf(seq, "populated %d\n",
|
||||
cgroup_is_populated(seq_css(seq)->cgroup));
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
|
||||
seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
|
||||
seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
|||
}
|
||||
#endif
|
||||
|
||||
static int cgroup_freeze_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
|
||||
seq_printf(seq, "%d\n", cgrp->freezer.freeze);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
ssize_t ret;
|
||||
int freeze;
|
||||
|
||||
ret = kstrtoint(strstrip(buf), 0, &freeze);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (freeze < 0 || freeze > 1)
|
||||
return -ERANGE;
|
||||
|
||||
cgrp = cgroup_kn_lock_live(of->kn, false);
|
||||
if (!cgrp)
|
||||
return -ENOENT;
|
||||
|
||||
cgroup_freeze(cgrp, freeze);
|
||||
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cgroup_file_open(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cftype *cft = of->kn->priv;
|
||||
|
@ -4653,6 +4729,12 @@ static struct cftype cgroup_base_files[] = {
|
|||
.name = "cgroup.stat",
|
||||
.seq_show = cgroup_stat_show,
|
||||
},
|
||||
{
|
||||
.name = "cgroup.freeze",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = cgroup_freeze_show,
|
||||
.write = cgroup_freeze_write,
|
||||
},
|
||||
{
|
||||
.name = "cpu.stat",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
|
@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
|
|||
if (cgroup_on_dfl(cgrp))
|
||||
cgroup_rstat_flush(cgrp);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
for (tcgrp = cgroup_parent(cgrp); tcgrp;
|
||||
tcgrp = cgroup_parent(tcgrp))
|
||||
tcgrp->nr_dying_descendants--;
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
|
||||
cgrp->id = -1;
|
||||
|
@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
|
|||
if (ret)
|
||||
goto out_psi_free;
|
||||
|
||||
/*
|
||||
* New cgroup inherits effective freeze counter, and
|
||||
* if the parent has to be frozen, the child has too.
|
||||
*/
|
||||
cgrp->freezer.e_freeze = parent->freezer.e_freeze;
|
||||
if (cgrp->freezer.e_freeze)
|
||||
set_bit(CGRP_FROZEN, &cgrp->flags);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
|
||||
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
|
||||
|
||||
if (tcgrp != cgrp)
|
||||
if (tcgrp != cgrp) {
|
||||
tcgrp->nr_descendants++;
|
||||
|
||||
/*
|
||||
* If the new cgroup is frozen, all ancestor cgroups
|
||||
* get a new frozen descendant, but their state can't
|
||||
* change because of this.
|
||||
*/
|
||||
if (cgrp->freezer.e_freeze)
|
||||
tcgrp->freezer.nr_frozen_descendants++;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
if (notify_on_release(parent))
|
||||
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
|
||||
|
@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|||
if (parent && cgroup_is_threaded(cgrp))
|
||||
parent->nr_threaded_children--;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
|
||||
tcgrp->nr_descendants--;
|
||||
tcgrp->nr_dying_descendants++;
|
||||
/*
|
||||
* If the dying cgroup is frozen, decrease frozen descendants
|
||||
* counters of ancestor cgroups.
|
||||
*/
|
||||
if (test_bit(CGRP_FROZEN, &cgrp->flags))
|
||||
tcgrp->freezer.nr_frozen_descendants--;
|
||||
}
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
cgroup1_check_for_release(parent);
|
||||
|
||||
|
@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
|
|||
cset->nr_tasks++;
|
||||
css_set_move_task(child, NULL, cset, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the cgroup has to be frozen, the new task has too.
|
||||
* Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
|
||||
* the task into the frozen state.
|
||||
*/
|
||||
if (unlikely(cgroup_task_freeze(child))) {
|
||||
spin_lock(&child->sighand->siglock);
|
||||
WARN_ON_ONCE(child->frozen);
|
||||
child->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
|
||||
/*
|
||||
* Calling cgroup_update_frozen() isn't required here,
|
||||
* because it will be called anyway a bit later
|
||||
* from do_freezer_trap(). So we avoid cgroup's
|
||||
* transient switch from the frozen state and back.
|
||||
*/
|
||||
}
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
|
@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
|
|||
spin_lock_irq(&css_set_lock);
|
||||
css_set_move_task(tsk, cset, NULL, false);
|
||||
cset->nr_tasks--;
|
||||
|
||||
WARN_ON_ONCE(cgroup_task_frozen(tsk));
|
||||
if (unlikely(cgroup_task_freeze(tsk)))
|
||||
cgroup_update_frozen(task_dfl_cgroup(tsk));
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
} else {
|
||||
get_css_set(cset);
|
||||
|
|
|
@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
|
|||
css = cset->subsys[ss->id];
|
||||
if (!css)
|
||||
continue;
|
||||
seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
|
||||
(unsigned long)css, css->id);
|
||||
seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
|
||||
css, css->id);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
|
|||
if (css->parent)
|
||||
snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
|
||||
css->parent->id);
|
||||
seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
|
||||
(unsigned long)css, css->id,
|
||||
seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
|
||||
css, css->id,
|
||||
atomic_read(&css->online_cnt), pbuf);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,481 +1,314 @@
|
|||
/*
|
||||
* cgroup_freezer.c - control group freezer subsystem
|
||||
*
|
||||
* Copyright IBM Corporation, 2007
|
||||
*
|
||||
* Author : Cedric Le Goater <clg@fr.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/slab.h>
|
||||
//SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
#include "cgroup-internal.h"
|
||||
|
||||
#include <trace/events/cgroup.h>
|
||||
|
||||
/*
|
||||
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
|
||||
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
|
||||
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
|
||||
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
|
||||
* its ancestors has FREEZING_SELF set.
|
||||
* Propagate the cgroup frozen state upwards by the cgroup tree.
|
||||
*/
|
||||
enum freezer_state_flags {
|
||||
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
|
||||
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
|
||||
CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
|
||||
CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
|
||||
|
||||
/* mask for all FREEZING flags */
|
||||
CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
|
||||
};
|
||||
|
||||
struct freezer {
|
||||
struct cgroup_subsys_state css;
|
||||
unsigned int state;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(freezer_mutex);
|
||||
|
||||
static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
|
||||
static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
|
||||
{
|
||||
return css ? container_of(css, struct freezer, css) : NULL;
|
||||
}
|
||||
|
||||
static inline struct freezer *task_freezer(struct task_struct *task)
|
||||
{
|
||||
return css_freezer(task_css(task, freezer_cgrp_id));
|
||||
}
|
||||
|
||||
static struct freezer *parent_freezer(struct freezer *freezer)
|
||||
{
|
||||
return css_freezer(freezer->css.parent);
|
||||
}
|
||||
|
||||
bool cgroup_freezing(struct task_struct *task)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = task_freezer(task)->state & CGROUP_FREEZING;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *freezer_state_strs(unsigned int state)
|
||||
{
|
||||
if (state & CGROUP_FROZEN)
|
||||
return "FROZEN";
|
||||
if (state & CGROUP_FREEZING)
|
||||
return "FREEZING";
|
||||
return "THAWED";
|
||||
};
|
||||
|
||||
static struct cgroup_subsys_state *
|
||||
freezer_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
struct freezer *freezer;
|
||||
|
||||
freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
|
||||
if (!freezer)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return &freezer->css;
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_css_online - commit creation of a freezer css
|
||||
* @css: css being created
|
||||
*
|
||||
* We're committing to creation of @css. Mark it online and inherit
|
||||
* parent's freezing state while holding both parent's and our
|
||||
* freezer->lock.
|
||||
*/
|
||||
static int freezer_css_online(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
struct freezer *parent = parent_freezer(freezer);
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
freezer->state |= CGROUP_FREEZER_ONLINE;
|
||||
|
||||
if (parent && (parent->state & CGROUP_FREEZING)) {
|
||||
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
}
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_css_offline - initiate destruction of a freezer css
|
||||
* @css: css being destroyed
|
||||
*
|
||||
* @css is going away. Mark it dead and decrement system_freezing_count if
|
||||
* it was holding one.
|
||||
*/
|
||||
static void freezer_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
if (freezer->state & CGROUP_FREEZING)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
|
||||
freezer->state = 0;
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
static void freezer_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
kfree(css_freezer(css));
|
||||
}
|
||||
|
||||
/*
|
||||
* Tasks can be migrated into a different freezer anytime regardless of its
|
||||
* current state. freezer_attach() is responsible for making new tasks
|
||||
* conform to the current state.
|
||||
*
|
||||
* Freezer state changes and task migration are synchronized via
|
||||
* @freezer->lock. freezer_attach() makes the new tasks conform to the
|
||||
* current state and all following state changes can see the new tasks.
|
||||
*/
|
||||
static void freezer_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *new_css;
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
int desc = 1;
|
||||
|
||||
/*
|
||||
* Make the new tasks conform to the current state of @new_css.
|
||||
* For simplicity, when migrating any task to a FROZEN cgroup, we
|
||||
* revert it to FREEZING and let update_if_frozen() determine the
|
||||
* correct state later.
|
||||
* If the new state is frozen, some freezing ancestor cgroups may change
|
||||
* their state too, depending on if all their descendants are frozen.
|
||||
*
|
||||
* Tasks in @tset are on @new_css but may not conform to its
|
||||
* current state before executing the following - !frozen tasks may
|
||||
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
|
||||
* Otherwise, all ancestor cgroups are forced into the non-frozen state.
|
||||
*/
|
||||
cgroup_taskset_for_each(task, new_css, tset) {
|
||||
struct freezer *freezer = css_freezer(new_css);
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
__thaw_task(task);
|
||||
while ((cgrp = cgroup_parent(cgrp))) {
|
||||
if (frozen) {
|
||||
cgrp->freezer.nr_frozen_descendants += desc;
|
||||
if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
|
||||
test_bit(CGRP_FREEZE, &cgrp->flags) &&
|
||||
cgrp->freezer.nr_frozen_descendants ==
|
||||
cgrp->nr_descendants) {
|
||||
set_bit(CGRP_FROZEN, &cgrp->flags);
|
||||
cgroup_file_notify(&cgrp->events_file);
|
||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
|
||||
desc++;
|
||||
}
|
||||
} else {
|
||||
freeze_task(task);
|
||||
/* clear FROZEN and propagate upwards */
|
||||
while (freezer && (freezer->state & CGROUP_FROZEN)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
freezer = parent_freezer(freezer);
|
||||
cgrp->freezer.nr_frozen_descendants -= desc;
|
||||
if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
|
||||
clear_bit(CGRP_FROZEN, &cgrp->flags);
|
||||
cgroup_file_notify(&cgrp->events_file);
|
||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
|
||||
desc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_fork - cgroup post fork callback
|
||||
* @task: a task which has just been forked
|
||||
*
|
||||
* @task has just been created and should conform to the current state of
|
||||
* the cgroup_freezer it belongs to. This function may race against
|
||||
* freezer_attach(). Losing to freezer_attach() means that we don't have
|
||||
* to do anything as freezer_attach() will put @task into the appropriate
|
||||
* state.
|
||||
/*
|
||||
* Revisit the cgroup frozen state.
|
||||
* Checks if the cgroup is really frozen and perform all state transitions.
|
||||
*/
|
||||
static void freezer_fork(struct task_struct *task)
|
||||
void cgroup_update_frozen(struct cgroup *cgrp)
|
||||
{
|
||||
struct freezer *freezer;
|
||||
bool frozen;
|
||||
|
||||
lockdep_assert_held(&css_set_lock);
|
||||
|
||||
/*
|
||||
* The root cgroup is non-freezable, so we can skip locking the
|
||||
* freezer. This is safe regardless of race with task migration.
|
||||
* If we didn't race or won, skipping is obviously the right thing
|
||||
* to do. If we lost and root is the new cgroup, noop is still the
|
||||
* right thing to do.
|
||||
* If the cgroup has to be frozen (CGRP_FREEZE bit set),
|
||||
* and all tasks are frozen and/or stopped, let's consider
|
||||
* the cgroup frozen. Otherwise it's not frozen.
|
||||
*/
|
||||
if (task_css_is_root(task, freezer_cgrp_id))
|
||||
return;
|
||||
frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
|
||||
cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
|
||||
freezer = task_freezer(task);
|
||||
if (freezer->state & CGROUP_FREEZING)
|
||||
freeze_task(task);
|
||||
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* update_if_frozen - update whether a cgroup finished freezing
|
||||
* @css: css of interest
|
||||
*
|
||||
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
|
||||
* calling this function. If the current state is FREEZING but not FROZEN,
|
||||
* this function checks whether all tasks of this cgroup and the descendant
|
||||
* cgroups finished freezing and, if so, sets FROZEN.
|
||||
*
|
||||
* The caller is responsible for grabbing RCU read lock and calling
|
||||
* update_if_frozen() on all descendants prior to invoking this function.
|
||||
*
|
||||
* Task states and freezer state might disagree while tasks are being
|
||||
* migrated into or out of @css, so we can't verify task states against
|
||||
* @freezer state here. See freezer_attach() for details.
|
||||
*/
|
||||
static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
struct cgroup_subsys_state *pos;
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
lockdep_assert_held(&freezer_mutex);
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING) ||
|
||||
(freezer->state & CGROUP_FROZEN))
|
||||
return;
|
||||
|
||||
/* are all (live) children frozen? */
|
||||
rcu_read_lock();
|
||||
css_for_each_child(pos, css) {
|
||||
struct freezer *child = css_freezer(pos);
|
||||
|
||||
if ((child->state & CGROUP_FREEZER_ONLINE) &&
|
||||
!(child->state & CGROUP_FROZEN)) {
|
||||
rcu_read_unlock();
|
||||
if (frozen) {
|
||||
/* Already there? */
|
||||
if (test_bit(CGRP_FROZEN, &cgrp->flags))
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(CGRP_FROZEN, &cgrp->flags);
|
||||
} else {
|
||||
/* Already there? */
|
||||
if (!test_bit(CGRP_FROZEN, &cgrp->flags))
|
||||
return;
|
||||
|
||||
clear_bit(CGRP_FROZEN, &cgrp->flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
cgroup_file_notify(&cgrp->events_file);
|
||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
|
||||
|
||||
/* are all tasks frozen? */
|
||||
css_task_iter_start(css, 0, &it);
|
||||
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
if (freezing(task)) {
|
||||
/*
|
||||
* freezer_should_skip() indicates that the task
|
||||
* should be skipped when determining freezing
|
||||
* completion. Consider it frozen in addition to
|
||||
* the usual frozen condition.
|
||||
*/
|
||||
if (!frozen(task) && !freezer_should_skip(task))
|
||||
goto out_iter_end;
|
||||
}
|
||||
}
|
||||
|
||||
freezer->state |= CGROUP_FROZEN;
|
||||
out_iter_end:
|
||||
css_task_iter_end(&it);
|
||||
/* Update the state of ancestor cgroups. */
|
||||
cgroup_propagate_frozen(cgrp, frozen);
|
||||
}
|
||||
|
||||
static int freezer_read(struct seq_file *m, void *v)
|
||||
{
|
||||
struct cgroup_subsys_state *css = seq_css(m), *pos;
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
|
||||
/* update states bottom-up */
|
||||
css_for_each_descendant_post(pos, css) {
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
update_if_frozen(pos);
|
||||
|
||||
rcu_read_lock();
|
||||
css_put(pos);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
|
||||
seq_puts(m, freezer_state_strs(css_freezer(css)->state));
|
||||
seq_putc(m, '\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freeze_cgroup(struct freezer *freezer)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
freeze_task(task);
|
||||
css_task_iter_end(&it);
|
||||
}
|
||||
|
||||
static void unfreeze_cgroup(struct freezer *freezer)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
__thaw_task(task);
|
||||
css_task_iter_end(&it);
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_apply_state - apply state change to a single cgroup_freezer
|
||||
* @freezer: freezer to apply state change to
|
||||
* @freeze: whether to freeze or unfreeze
|
||||
* @state: CGROUP_FREEZING_* flag to set or clear
|
||||
*
|
||||
* Set or clear @state on @cgroup according to @freeze, and perform
|
||||
* freezing or thawing as necessary.
|
||||
/*
|
||||
* Increment cgroup's nr_frozen_tasks.
|
||||
*/
|
||||
static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
unsigned int state)
|
||||
static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
|
||||
{
|
||||
/* also synchronizes against task migration, see freezer_attach() */
|
||||
lockdep_assert_held(&freezer_mutex);
|
||||
cgrp->freezer.nr_frozen_tasks++;
|
||||
}
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
|
||||
/*
|
||||
* Decrement cgroup's nr_frozen_tasks.
|
||||
*/
|
||||
static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
|
||||
{
|
||||
cgrp->freezer.nr_frozen_tasks--;
|
||||
WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enter frozen/stopped state, if not yet there. Update cgroup's counters,
|
||||
* and revisit the state of the cgroup, if necessary.
|
||||
*/
|
||||
void cgroup_enter_frozen(void)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (current->frozen)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
current->frozen = true;
|
||||
cgrp = task_dfl_cgroup(current);
|
||||
cgroup_inc_frozen_cnt(cgrp);
|
||||
cgroup_update_frozen(cgrp);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally leave frozen/stopped state. Update cgroup's counters,
|
||||
* and revisit the state of the cgroup, if necessary.
|
||||
*
|
||||
* If always_leave is not set, and the cgroup is freezing,
|
||||
* we're racing with the cgroup freezing. In this case, we don't
|
||||
* drop the frozen counter to avoid a transient switch to
|
||||
* the unfrozen state.
|
||||
*/
|
||||
void cgroup_leave_frozen(bool always_leave)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
cgrp = task_dfl_cgroup(current);
|
||||
if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
|
||||
cgroup_dec_frozen_cnt(cgrp);
|
||||
cgroup_update_frozen(cgrp);
|
||||
WARN_ON_ONCE(!current->frozen);
|
||||
current->frozen = false;
|
||||
} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
current->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
set_thread_flag(TIF_SIGPENDING);
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
}
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
|
||||
* jobctl bit.
|
||||
*/
|
||||
static void cgroup_freeze_task(struct task_struct *task, bool freeze)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/* If the task is about to die, don't bother with freezing it. */
|
||||
if (!lock_task_sighand(task, &flags))
|
||||
return;
|
||||
|
||||
if (freeze) {
|
||||
if (!(freezer->state & CGROUP_FREEZING))
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
freezer->state |= state;
|
||||
freeze_cgroup(freezer);
|
||||
task->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
signal_wake_up(task, false);
|
||||
} else {
|
||||
bool was_freezing = freezer->state & CGROUP_FREEZING;
|
||||
|
||||
freezer->state &= ~state;
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
if (was_freezing)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
unfreeze_cgroup(freezer);
|
||||
}
|
||||
task->jobctl &= ~JOBCTL_TRAP_FREEZE;
|
||||
wake_up_process(task);
|
||||
}
|
||||
|
||||
unlock_task_sighand(task, &flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_change_state - change the freezing state of a cgroup_freezer
|
||||
* @freezer: freezer of interest
|
||||
* @freeze: whether to freeze or thaw
|
||||
*
|
||||
* Freeze or thaw @freezer according to @freeze. The operations are
|
||||
* recursive - all descendants of @freezer will be affected.
|
||||
/*
|
||||
* Freeze or unfreeze all tasks in the given cgroup.
|
||||
*/
|
||||
static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
|
||||
{
|
||||
struct cgroup_subsys_state *pos;
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
if (freeze)
|
||||
set_bit(CGRP_FREEZE, &cgrp->flags);
|
||||
else
|
||||
clear_bit(CGRP_FREEZE, &cgrp->flags);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
if (freeze)
|
||||
TRACE_CGROUP_PATH(freeze, cgrp);
|
||||
else
|
||||
TRACE_CGROUP_PATH(unfreeze, cgrp);
|
||||
|
||||
css_task_iter_start(&cgrp->self, 0, &it);
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
/*
|
||||
* Ignore kernel threads here. Freezing cgroups containing
|
||||
* kthreads isn't supported.
|
||||
*/
|
||||
if (task->flags & PF_KTHREAD)
|
||||
continue;
|
||||
cgroup_freeze_task(task, freeze);
|
||||
}
|
||||
css_task_iter_end(&it);
|
||||
|
||||
/*
|
||||
* Update all its descendants in pre-order traversal. Each
|
||||
* descendant will try to inherit its parent's FREEZING state as
|
||||
* CGROUP_FREEZING_PARENT.
|
||||
* Cgroup state should be revisited here to cover empty leaf cgroups
|
||||
* and cgroups which descendants are already in the desired state.
|
||||
*/
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
css_for_each_descendant_pre(pos, &freezer->css) {
|
||||
struct freezer *pos_f = css_freezer(pos);
|
||||
struct freezer *parent = parent_freezer(pos_f);
|
||||
spin_lock_irq(&css_set_lock);
|
||||
if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
|
||||
cgroup_update_frozen(cgrp);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* Adjust the task state (freeze or unfreeze) and revisit the state of
|
||||
* source and destination cgroups.
|
||||
*/
|
||||
void cgroup_freezer_migrate_task(struct task_struct *task,
|
||||
struct cgroup *src, struct cgroup *dst)
|
||||
{
|
||||
lockdep_assert_held(&css_set_lock);
|
||||
|
||||
if (pos_f == freezer)
|
||||
freezer_apply_state(pos_f, freeze,
|
||||
CGROUP_FREEZING_SELF);
|
||||
else
|
||||
freezer_apply_state(pos_f,
|
||||
parent->state & CGROUP_FREEZING,
|
||||
CGROUP_FREEZING_PARENT);
|
||||
/*
|
||||
* Kernel threads are not supposed to be frozen at all.
|
||||
*/
|
||||
if (task->flags & PF_KTHREAD)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
css_put(pos);
|
||||
/*
|
||||
* Adjust counters of freezing and frozen tasks.
|
||||
* Note, that if the task is frozen, but the destination cgroup is not
|
||||
* frozen, we bump both counters to keep them balanced.
|
||||
*/
|
||||
if (task->frozen) {
|
||||
cgroup_inc_frozen_cnt(dst);
|
||||
cgroup_dec_frozen_cnt(src);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
cgroup_update_frozen(dst);
|
||||
cgroup_update_frozen(src);
|
||||
|
||||
/*
|
||||
* Force the task to the desired state.
|
||||
*/
|
||||
cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
|
||||
}
|
||||
|
||||
static ssize_t freezer_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
void cgroup_freeze(struct cgroup *cgrp, bool freeze)
|
||||
{
|
||||
bool freeze;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cgroup *dsct;
|
||||
bool applied = false;
|
||||
|
||||
buf = strstrip(buf);
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
if (strcmp(buf, freezer_state_strs(0)) == 0)
|
||||
freeze = false;
|
||||
else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
|
||||
freeze = true;
|
||||
else
|
||||
return -EINVAL;
|
||||
/*
|
||||
* Nothing changed? Just exit.
|
||||
*/
|
||||
if (cgrp->freezer.freeze == freeze)
|
||||
return;
|
||||
|
||||
freezer_change_state(css_freezer(of_css(of)), freeze);
|
||||
return nbytes;
|
||||
cgrp->freezer.freeze = freeze;
|
||||
|
||||
/*
|
||||
* Propagate changes downwards the cgroup tree.
|
||||
*/
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
dsct = css->cgroup;
|
||||
|
||||
if (cgroup_is_dead(dsct))
|
||||
continue;
|
||||
|
||||
if (freeze) {
|
||||
dsct->freezer.e_freeze++;
|
||||
/*
|
||||
* Already frozen because of ancestor's settings?
|
||||
*/
|
||||
if (dsct->freezer.e_freeze > 1)
|
||||
continue;
|
||||
} else {
|
||||
dsct->freezer.e_freeze--;
|
||||
/*
|
||||
* Still frozen because of ancestor's settings?
|
||||
*/
|
||||
if (dsct->freezer.e_freeze > 0)
|
||||
continue;
|
||||
|
||||
WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do change actual state: freeze or unfreeze.
|
||||
*/
|
||||
cgroup_do_freeze(dsct, freeze);
|
||||
applied = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Even if the actual state hasn't changed, let's notify a user.
|
||||
* The state can be enforced by an ancestor cgroup: the cgroup
|
||||
* can already be in the desired state or it can be locked in the
|
||||
* opposite state, so that the transition will never happen.
|
||||
* In both cases it's better to notify a user, that there is
|
||||
* nothing to wait for.
|
||||
*/
|
||||
if (!applied) {
|
||||
TRACE_CGROUP_PATH(notify_frozen, cgrp,
|
||||
test_bit(CGRP_FROZEN, &cgrp->flags));
|
||||
cgroup_file_notify(&cgrp->events_file);
|
||||
}
|
||||
}
|
||||
|
||||
static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
|
||||
}
|
||||
|
||||
static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "state",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = freezer_read,
|
||||
.write = freezer_write,
|
||||
},
|
||||
{
|
||||
.name = "self_freezing",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.read_u64 = freezer_self_freezing_read,
|
||||
},
|
||||
{
|
||||
.name = "parent_freezing",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.read_u64 = freezer_parent_freezing_read,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
struct cgroup_subsys freezer_cgrp_subsys = {
|
||||
.css_alloc = freezer_css_alloc,
|
||||
.css_online = freezer_css_online,
|
||||
.css_offline = freezer_css_offline,
|
||||
.css_free = freezer_css_free,
|
||||
.attach = freezer_attach,
|
||||
.fork = freezer_fork,
|
||||
.legacy_cftypes = files,
|
||||
};
|
||||
|
|
|
@ -0,0 +1,481 @@
|
|||
/*
|
||||
* cgroup_freezer.c - control group freezer subsystem
|
||||
*
|
||||
* Copyright IBM Corporation, 2007
|
||||
*
|
||||
* Author : Cedric Le Goater <clg@fr.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/*
|
||||
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
|
||||
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
|
||||
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
|
||||
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
|
||||
* its ancestors has FREEZING_SELF set.
|
||||
*/
|
||||
enum freezer_state_flags {
|
||||
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
|
||||
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
|
||||
CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
|
||||
CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
|
||||
|
||||
/* mask for all FREEZING flags */
|
||||
CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
|
||||
};
|
||||
|
||||
struct freezer {
|
||||
struct cgroup_subsys_state css;
|
||||
unsigned int state;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(freezer_mutex);
|
||||
|
||||
static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return css ? container_of(css, struct freezer, css) : NULL;
|
||||
}
|
||||
|
||||
static inline struct freezer *task_freezer(struct task_struct *task)
|
||||
{
|
||||
return css_freezer(task_css(task, freezer_cgrp_id));
|
||||
}
|
||||
|
||||
static struct freezer *parent_freezer(struct freezer *freezer)
|
||||
{
|
||||
return css_freezer(freezer->css.parent);
|
||||
}
|
||||
|
||||
bool cgroup_freezing(struct task_struct *task)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = task_freezer(task)->state & CGROUP_FREEZING;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *freezer_state_strs(unsigned int state)
|
||||
{
|
||||
if (state & CGROUP_FROZEN)
|
||||
return "FROZEN";
|
||||
if (state & CGROUP_FREEZING)
|
||||
return "FREEZING";
|
||||
return "THAWED";
|
||||
};
|
||||
|
||||
static struct cgroup_subsys_state *
|
||||
freezer_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
struct freezer *freezer;
|
||||
|
||||
freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
|
||||
if (!freezer)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return &freezer->css;
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_css_online - commit creation of a freezer css
|
||||
* @css: css being created
|
||||
*
|
||||
* We're committing to creation of @css. Mark it online and inherit
|
||||
* parent's freezing state while holding both parent's and our
|
||||
* freezer->lock.
|
||||
*/
|
||||
static int freezer_css_online(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
struct freezer *parent = parent_freezer(freezer);
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
freezer->state |= CGROUP_FREEZER_ONLINE;
|
||||
|
||||
if (parent && (parent->state & CGROUP_FREEZING)) {
|
||||
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
}
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_css_offline - initiate destruction of a freezer css
|
||||
* @css: css being destroyed
|
||||
*
|
||||
* @css is going away. Mark it dead and decrement system_freezing_count if
|
||||
* it was holding one.
|
||||
*/
|
||||
static void freezer_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
if (freezer->state & CGROUP_FREEZING)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
|
||||
freezer->state = 0;
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
static void freezer_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
kfree(css_freezer(css));
|
||||
}
|
||||
|
||||
/*
|
||||
* Tasks can be migrated into a different freezer anytime regardless of its
|
||||
* current state. freezer_attach() is responsible for making new tasks
|
||||
* conform to the current state.
|
||||
*
|
||||
* Freezer state changes and task migration are synchronized via
|
||||
* @freezer->lock. freezer_attach() makes the new tasks conform to the
|
||||
* current state and all following state changes can see the new tasks.
|
||||
*/
|
||||
static void freezer_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *new_css;
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
/*
|
||||
* Make the new tasks conform to the current state of @new_css.
|
||||
* For simplicity, when migrating any task to a FROZEN cgroup, we
|
||||
* revert it to FREEZING and let update_if_frozen() determine the
|
||||
* correct state later.
|
||||
*
|
||||
* Tasks in @tset are on @new_css but may not conform to its
|
||||
* current state before executing the following - !frozen tasks may
|
||||
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
|
||||
*/
|
||||
cgroup_taskset_for_each(task, new_css, tset) {
|
||||
struct freezer *freezer = css_freezer(new_css);
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
__thaw_task(task);
|
||||
} else {
|
||||
freeze_task(task);
|
||||
/* clear FROZEN and propagate upwards */
|
||||
while (freezer && (freezer->state & CGROUP_FROZEN)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
freezer = parent_freezer(freezer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_fork - cgroup post fork callback
|
||||
* @task: a task which has just been forked
|
||||
*
|
||||
* @task has just been created and should conform to the current state of
|
||||
* the cgroup_freezer it belongs to. This function may race against
|
||||
* freezer_attach(). Losing to freezer_attach() means that we don't have
|
||||
* to do anything as freezer_attach() will put @task into the appropriate
|
||||
* state.
|
||||
*/
|
||||
static void freezer_fork(struct task_struct *task)
|
||||
{
|
||||
struct freezer *freezer;
|
||||
|
||||
/*
|
||||
* The root cgroup is non-freezable, so we can skip locking the
|
||||
* freezer. This is safe regardless of race with task migration.
|
||||
* If we didn't race or won, skipping is obviously the right thing
|
||||
* to do. If we lost and root is the new cgroup, noop is still the
|
||||
* right thing to do.
|
||||
*/
|
||||
if (task_css_is_root(task, freezer_cgrp_id))
|
||||
return;
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
|
||||
freezer = task_freezer(task);
|
||||
if (freezer->state & CGROUP_FREEZING)
|
||||
freeze_task(task);
|
||||
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* update_if_frozen - update whether a cgroup finished freezing
|
||||
* @css: css of interest
|
||||
*
|
||||
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
|
||||
* calling this function. If the current state is FREEZING but not FROZEN,
|
||||
* this function checks whether all tasks of this cgroup and the descendant
|
||||
* cgroups finished freezing and, if so, sets FROZEN.
|
||||
*
|
||||
* The caller is responsible for grabbing RCU read lock and calling
|
||||
* update_if_frozen() on all descendants prior to invoking this function.
|
||||
*
|
||||
* Task states and freezer state might disagree while tasks are being
|
||||
* migrated into or out of @css, so we can't verify task states against
|
||||
* @freezer state here. See freezer_attach() for details.
|
||||
*/
|
||||
static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
struct cgroup_subsys_state *pos;
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
lockdep_assert_held(&freezer_mutex);
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING) ||
|
||||
(freezer->state & CGROUP_FROZEN))
|
||||
return;
|
||||
|
||||
/* are all (live) children frozen? */
|
||||
rcu_read_lock();
|
||||
css_for_each_child(pos, css) {
|
||||
struct freezer *child = css_freezer(pos);
|
||||
|
||||
if ((child->state & CGROUP_FREEZER_ONLINE) &&
|
||||
!(child->state & CGROUP_FROZEN)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/* are all tasks frozen? */
|
||||
css_task_iter_start(css, 0, &it);
|
||||
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
if (freezing(task)) {
|
||||
/*
|
||||
* freezer_should_skip() indicates that the task
|
||||
* should be skipped when determining freezing
|
||||
* completion. Consider it frozen in addition to
|
||||
* the usual frozen condition.
|
||||
*/
|
||||
if (!frozen(task) && !freezer_should_skip(task))
|
||||
goto out_iter_end;
|
||||
}
|
||||
}
|
||||
|
||||
freezer->state |= CGROUP_FROZEN;
|
||||
out_iter_end:
|
||||
css_task_iter_end(&it);
|
||||
}
|
||||
|
||||
static int freezer_read(struct seq_file *m, void *v)
|
||||
{
|
||||
struct cgroup_subsys_state *css = seq_css(m), *pos;
|
||||
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
|
||||
/* update states bottom-up */
|
||||
css_for_each_descendant_post(pos, css) {
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
update_if_frozen(pos);
|
||||
|
||||
rcu_read_lock();
|
||||
css_put(pos);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
|
||||
seq_puts(m, freezer_state_strs(css_freezer(css)->state));
|
||||
seq_putc(m, '\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freeze_cgroup(struct freezer *freezer)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
freeze_task(task);
|
||||
css_task_iter_end(&it);
|
||||
}
|
||||
|
||||
static void unfreeze_cgroup(struct freezer *freezer)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
__thaw_task(task);
|
||||
css_task_iter_end(&it);
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_apply_state - apply state change to a single cgroup_freezer
|
||||
* @freezer: freezer to apply state change to
|
||||
* @freeze: whether to freeze or unfreeze
|
||||
* @state: CGROUP_FREEZING_* flag to set or clear
|
||||
*
|
||||
* Set or clear @state on @cgroup according to @freeze, and perform
|
||||
* freezing or thawing as necessary.
|
||||
*/
|
||||
static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
unsigned int state)
|
||||
{
|
||||
/* also synchronizes against task migration, see freezer_attach() */
|
||||
lockdep_assert_held(&freezer_mutex);
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
|
||||
return;
|
||||
|
||||
if (freeze) {
|
||||
if (!(freezer->state & CGROUP_FREEZING))
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
freezer->state |= state;
|
||||
freeze_cgroup(freezer);
|
||||
} else {
|
||||
bool was_freezing = freezer->state & CGROUP_FREEZING;
|
||||
|
||||
freezer->state &= ~state;
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
if (was_freezing)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
unfreeze_cgroup(freezer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_change_state - change the freezing state of a cgroup_freezer
|
||||
* @freezer: freezer of interest
|
||||
* @freeze: whether to freeze or thaw
|
||||
*
|
||||
* Freeze or thaw @freezer according to @freeze. The operations are
|
||||
* recursive - all descendants of @freezer will be affected.
|
||||
*/
|
||||
static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
{
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
/*
|
||||
* Update all its descendants in pre-order traversal. Each
|
||||
* descendant will try to inherit its parent's FREEZING state as
|
||||
* CGROUP_FREEZING_PARENT.
|
||||
*/
|
||||
mutex_lock(&freezer_mutex);
|
||||
rcu_read_lock();
|
||||
css_for_each_descendant_pre(pos, &freezer->css) {
|
||||
struct freezer *pos_f = css_freezer(pos);
|
||||
struct freezer *parent = parent_freezer(pos_f);
|
||||
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (pos_f == freezer)
|
||||
freezer_apply_state(pos_f, freeze,
|
||||
CGROUP_FREEZING_SELF);
|
||||
else
|
||||
freezer_apply_state(pos_f,
|
||||
parent->state & CGROUP_FREEZING,
|
||||
CGROUP_FREEZING_PARENT);
|
||||
|
||||
rcu_read_lock();
|
||||
css_put(pos);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
static ssize_t freezer_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
bool freeze;
|
||||
|
||||
buf = strstrip(buf);
|
||||
|
||||
if (strcmp(buf, freezer_state_strs(0)) == 0)
|
||||
freeze = false;
|
||||
else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
|
||||
freeze = true;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
freezer_change_state(css_freezer(of_css(of)), freeze);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
|
||||
}
|
||||
|
||||
static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
|
||||
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "state",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = freezer_read,
|
||||
.write = freezer_write,
|
||||
},
|
||||
{
|
||||
.name = "self_freezing",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.read_u64 = freezer_self_freezing_read,
|
||||
},
|
||||
{
|
||||
.name = "parent_freezing",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.read_u64 = freezer_parent_freezing_read,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
struct cgroup_subsys freezer_cgrp_subsys = {
|
||||
.css_alloc = freezer_css_alloc,
|
||||
.css_online = freezer_css_online,
|
||||
.css_offline = freezer_css_offline,
|
||||
.css_free = freezer_css_free,
|
||||
.attach = freezer_attach,
|
||||
.fork = freezer_fork,
|
||||
.legacy_cftypes = files,
|
||||
};
|
|
@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
|
|||
int killed;
|
||||
|
||||
freezer_do_not_count();
|
||||
cgroup_enter_frozen();
|
||||
killed = wait_for_completion_killable(vfork);
|
||||
cgroup_leave_frozen(false);
|
||||
freezer_count();
|
||||
|
||||
if (killed) {
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/compiler.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/cgroup.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/signal.h>
|
||||
|
@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
|
|||
|
||||
static bool recalc_sigpending_tsk(struct task_struct *t)
|
||||
{
|
||||
if ((t->jobctl & JOBCTL_PENDING_MASK) ||
|
||||
if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
|
||||
PENDING(&t->pending, &t->blocked) ||
|
||||
PENDING(&t->signal->shared_pending, &t->blocked)) {
|
||||
PENDING(&t->signal->shared_pending, &t->blocked) ||
|
||||
cgroup_task_frozen(t)) {
|
||||
set_tsk_thread_flag(t, TIF_SIGPENDING);
|
||||
return true;
|
||||
}
|
||||
|
@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
|
|||
preempt_disable();
|
||||
read_unlock(&tasklist_lock);
|
||||
preempt_enable_no_resched();
|
||||
cgroup_enter_frozen();
|
||||
freezable_schedule();
|
||||
} else {
|
||||
/*
|
||||
|
@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
|
|||
}
|
||||
|
||||
/* Now we don't run again until woken by SIGCONT or SIGKILL */
|
||||
cgroup_enter_frozen();
|
||||
freezable_schedule();
|
||||
return true;
|
||||
} else {
|
||||
|
@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* do_freezer_trap - handle the freezer jobctl trap
|
||||
*
|
||||
* Puts the task into frozen state, if only the task is not about to quit.
|
||||
* In this case it drops JOBCTL_TRAP_FREEZE.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Must be called with @current->sighand->siglock held,
|
||||
* which is always released before returning.
|
||||
*/
|
||||
static void do_freezer_trap(void)
|
||||
__releases(¤t->sighand->siglock)
|
||||
{
|
||||
/*
|
||||
* If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
|
||||
* let's make another loop to give it a chance to be handled.
|
||||
* In any case, we'll return back.
|
||||
*/
|
||||
if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
|
||||
JOBCTL_TRAP_FREEZE) {
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we're sure that there is no pending fatal signal and no
|
||||
* pending traps. Clear TIF_SIGPENDING to not get out of schedule()
|
||||
* immediately (if there is a non-fatal signal pending), and
|
||||
* put the task into sleep.
|
||||
*/
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
clear_thread_flag(TIF_SIGPENDING);
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
cgroup_enter_frozen();
|
||||
freezable_schedule();
|
||||
}
|
||||
|
||||
static int ptrace_signal(int signr, kernel_siginfo_t *info)
|
||||
{
|
||||
/*
|
||||
|
@ -2452,9 +2493,24 @@ relock:
|
|||
do_signal_stop(0))
|
||||
goto relock;
|
||||
|
||||
if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
|
||||
do_jobctl_trap();
|
||||
if (unlikely(current->jobctl &
|
||||
(JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
|
||||
if (current->jobctl & JOBCTL_TRAP_MASK) {
|
||||
do_jobctl_trap();
|
||||
spin_unlock_irq(&sighand->siglock);
|
||||
} else if (current->jobctl & JOBCTL_TRAP_FREEZE)
|
||||
do_freezer_trap();
|
||||
|
||||
goto relock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the task is leaving the frozen state, let's update
|
||||
* cgroup counters and reset the frozen bit.
|
||||
*/
|
||||
if (unlikely(cgroup_task_frozen(current))) {
|
||||
spin_unlock_irq(&sighand->siglock);
|
||||
cgroup_leave_frozen(false);
|
||||
goto relock;
|
||||
}
|
||||
|
||||
|
@ -2550,6 +2606,8 @@ relock:
|
|||
|
||||
fatal:
|
||||
spin_unlock_irq(&sighand->siglock);
|
||||
if (unlikely(cgroup_task_frozen(current)))
|
||||
cgroup_leave_frozen(true);
|
||||
|
||||
/*
|
||||
* Anything else is fatal, maybe with a core dump.
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
test_memcontrol
|
||||
test_core
|
||||
test_freezer
|
||||
|
|
|
@ -5,8 +5,10 @@ all:
|
|||
|
||||
TEST_GEN_PROGS = test_memcontrol
|
||||
TEST_GEN_PROGS += test_core
|
||||
TEST_GEN_PROGS += test_freezer
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
$(OUTPUT)/test_memcontrol: cgroup_util.c
|
||||
$(OUTPUT)/test_core: cgroup_util.c
|
||||
$(OUTPUT)/test_freezer: cgroup_util.c
|
||||
|
|
|
@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index)
|
|||
return ret;
|
||||
}
|
||||
|
||||
char *cg_control(const char *cgroup, const char *control)
|
||||
{
|
||||
size_t len = strlen(cgroup) + strlen(control) + 2;
|
||||
char *ret = malloc(len);
|
||||
|
||||
snprintf(ret, len, "%s/%s", cgroup, control);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
@ -196,7 +206,32 @@ int cg_create(const char *cgroup)
|
|||
return mkdir(cgroup, 0644);
|
||||
}
|
||||
|
||||
static int cg_killall(const char *cgroup)
|
||||
int cg_wait_for_proc_count(const char *cgroup, int count)
|
||||
{
|
||||
char buf[10 * PAGE_SIZE] = {0};
|
||||
int attempts;
|
||||
char *ptr;
|
||||
|
||||
for (attempts = 10; attempts >= 0; attempts--) {
|
||||
int nr = 0;
|
||||
|
||||
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
|
||||
break;
|
||||
|
||||
for (ptr = buf; *ptr; ptr++)
|
||||
if (*ptr == '\n')
|
||||
nr++;
|
||||
|
||||
if (nr >= count)
|
||||
return 0;
|
||||
|
||||
usleep(100000);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int cg_killall(const char *cgroup)
|
||||
{
|
||||
char buf[PAGE_SIZE];
|
||||
char *ptr = buf;
|
||||
|
@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup)
|
|||
retry:
|
||||
ret = rmdir(cgroup);
|
||||
if (ret && errno == EBUSY) {
|
||||
ret = cg_killall(cgroup);
|
||||
if (ret)
|
||||
return ret;
|
||||
cg_killall(cgroup);
|
||||
usleep(100);
|
||||
goto retry;
|
||||
}
|
||||
|
@ -240,6 +273,14 @@ retry:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int cg_enter(const char *cgroup, int pid)
|
||||
{
|
||||
char pidbuf[64];
|
||||
|
||||
snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
|
||||
return cg_write(cgroup, "cgroup.procs", pidbuf);
|
||||
}
|
||||
|
||||
int cg_enter_current(const char *cgroup)
|
||||
{
|
||||
char pidbuf[64];
|
||||
|
@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score)
|
|||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char proc_read_text(int pid, const char *item, char *buf, size_t size)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
||||
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
|
||||
|
||||
return read_text(path, buf, size);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err)
|
|||
extern int cg_find_unified_root(char *root, size_t len);
|
||||
extern char *cg_name(const char *root, const char *name);
|
||||
extern char *cg_name_indexed(const char *root, const char *name, int index);
|
||||
extern char *cg_control(const char *cgroup, const char *control);
|
||||
extern int cg_create(const char *cgroup);
|
||||
extern int cg_destroy(const char *cgroup);
|
||||
extern int cg_read(const char *cgroup, const char *control,
|
||||
|
@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf);
|
|||
extern int cg_run(const char *cgroup,
|
||||
int (*fn)(const char *cgroup, void *arg),
|
||||
void *arg);
|
||||
extern int cg_enter(const char *cgroup, int pid);
|
||||
extern int cg_enter_current(const char *cgroup);
|
||||
extern int cg_run_nowait(const char *cgroup,
|
||||
int (*fn)(const char *cgroup, void *arg),
|
||||
|
@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size);
|
|||
extern int alloc_anon(const char *cgroup, void *arg);
|
||||
extern int is_swap_enabled(void);
|
||||
extern int set_oom_adj_score(int pid, int score);
|
||||
extern int cg_wait_for_proc_count(const char *cgroup, int count);
|
||||
extern int cg_killall(const char *cgroup);
|
||||
extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
|
||||
|
|
|
@ -0,0 +1,851 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <stdbool.h>
|
||||
#include <linux/limits.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <poll.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/inotify.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "../kselftest.h"
|
||||
#include "cgroup_util.h"
|
||||
|
||||
#define DEBUG
|
||||
#ifdef DEBUG
|
||||
#define debug(args...) fprintf(stderr, args)
|
||||
#else
|
||||
#define debug(args...)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check if the cgroup is frozen by looking at the cgroup.events::frozen value.
|
||||
*/
|
||||
static int cg_check_frozen(const char *cgroup, bool frozen)
|
||||
{
|
||||
if (frozen) {
|
||||
if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) {
|
||||
debug("Cgroup %s isn't frozen\n", cgroup);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Check the cgroup.events::frozen value.
|
||||
*/
|
||||
if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) {
|
||||
debug("Cgroup %s is frozen\n", cgroup);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Freeze the given cgroup.
|
||||
*/
|
||||
static int cg_freeze_nowait(const char *cgroup, bool freeze)
|
||||
{
|
||||
return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for waiting on cgroup.events file.
|
||||
*/
|
||||
static int cg_prepare_for_wait(const char *cgroup)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
|
||||
fd = inotify_init1(0);
|
||||
if (fd == -1) {
|
||||
debug("Error: inotify_init1() failed\n");
|
||||
return fd;
|
||||
}
|
||||
|
||||
ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
|
||||
IN_MODIFY);
|
||||
if (ret == -1) {
|
||||
debug("Error: inotify_add_watch() failed\n");
|
||||
close(fd);
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for an event. If there are no events for 10 seconds,
|
||||
* treat this an error.
|
||||
*/
|
||||
static int cg_wait_for(int fd)
|
||||
{
|
||||
int ret = -1;
|
||||
struct pollfd fds = {
|
||||
.fd = fd,
|
||||
.events = POLLIN,
|
||||
};
|
||||
|
||||
while (true) {
|
||||
ret = poll(&fds, 1, 10000);
|
||||
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
debug("Error: poll() failed\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret > 0 && fds.revents & POLLIN) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a task to the given cgroup and wait for a cgroup frozen event.
|
||||
* All transient events (e.g. populated) are ignored.
|
||||
*/
|
||||
static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid,
|
||||
bool frozen)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
int attempts;
|
||||
|
||||
fd = cg_prepare_for_wait(cgroup);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
ret = cg_enter(cgroup, pid);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (attempts = 0; attempts < 10; attempts++) {
|
||||
ret = cg_wait_for(fd);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = cg_check_frozen(cgroup, frozen);
|
||||
if (ret)
|
||||
continue;
|
||||
}
|
||||
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Freeze the given cgroup and wait for the inotify signal.
|
||||
* If there are no events in 10 seconds, treat this as an error.
|
||||
* Then check that the cgroup is in the desired state.
|
||||
*/
|
||||
static int cg_freeze_wait(const char *cgroup, bool freeze)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
|
||||
fd = cg_prepare_for_wait(cgroup);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
ret = cg_freeze_nowait(cgroup, freeze);
|
||||
if (ret) {
|
||||
debug("Error: cg_freeze_nowait() failed\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = cg_wait_for(fd);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = cg_check_frozen(cgroup, freeze);
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A simple process running in a sleep loop until being
|
||||
* re-parented.
|
||||
*/
|
||||
static int child_fn(const char *cgroup, void *arg)
|
||||
{
|
||||
int ppid = getppid();
|
||||
|
||||
while (getppid() == ppid)
|
||||
usleep(1000);
|
||||
|
||||
return getppid() == ppid;
|
||||
}
|
||||
|
||||
/*
|
||||
* A simple test for the cgroup freezer: populated the cgroup with 100
|
||||
* running processes and freeze it. Then unfreeze it. Then it kills all
|
||||
* processes and destroys the cgroup.
|
||||
*/
|
||||
static int test_cgfreezer_simple(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
int i;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_simple");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < 100; i++)
|
||||
cg_run_nowait(cgroup, child_fn, NULL);
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 100))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test creates the following hierarchy:
|
||||
* A
|
||||
* / / \ \
|
||||
* B E I K
|
||||
* /\ |
|
||||
* C D F
|
||||
* |
|
||||
* G
|
||||
* |
|
||||
* H
|
||||
*
|
||||
* with a process in C, H and 3 processes in K.
|
||||
* Then it tries to freeze and unfreeze the whole tree.
|
||||
*/
|
||||
static int test_cgfreezer_tree(const char *root)
|
||||
{
|
||||
char *cgroup[10] = {0};
|
||||
int ret = KSFT_FAIL;
|
||||
int i;
|
||||
|
||||
cgroup[0] = cg_name(root, "cg_test_tree_A");
|
||||
if (!cgroup[0])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[1] = cg_name(cgroup[0], "B");
|
||||
if (!cgroup[1])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[2] = cg_name(cgroup[1], "C");
|
||||
if (!cgroup[2])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[3] = cg_name(cgroup[1], "D");
|
||||
if (!cgroup[3])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[4] = cg_name(cgroup[0], "E");
|
||||
if (!cgroup[4])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[5] = cg_name(cgroup[4], "F");
|
||||
if (!cgroup[5])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[6] = cg_name(cgroup[5], "G");
|
||||
if (!cgroup[6])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[7] = cg_name(cgroup[6], "H");
|
||||
if (!cgroup[7])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[8] = cg_name(cgroup[0], "I");
|
||||
if (!cgroup[8])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[9] = cg_name(cgroup[0], "K");
|
||||
if (!cgroup[9])
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
if (cg_create(cgroup[i]))
|
||||
goto cleanup;
|
||||
|
||||
cg_run_nowait(cgroup[2], child_fn, NULL);
|
||||
cg_run_nowait(cgroup[7], child_fn, NULL);
|
||||
cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
|
||||
/*
|
||||
* Wait until all child processes will enter
|
||||
* corresponding cgroups.
|
||||
*/
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup[2], 1) ||
|
||||
cg_wait_for_proc_count(cgroup[7], 1) ||
|
||||
cg_wait_for_proc_count(cgroup[9], 3))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Freeze B.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[1], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Freeze F.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[5], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Freeze G.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[6], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Check that A and E are not frozen.
|
||||
*/
|
||||
if (cg_check_frozen(cgroup[0], false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[4], false))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Freeze A. Check that A, B and E are frozen.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[0], true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[1], true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[4], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Unfreeze B, F and G
|
||||
*/
|
||||
if (cg_freeze_nowait(cgroup[1], false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_nowait(cgroup[5], false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_nowait(cgroup[6], false))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Check that C and H are still frozen.
|
||||
*/
|
||||
if (cg_check_frozen(cgroup[2], true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[7], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Unfreeze A. Check that A, C and K are not frozen.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[0], false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[2], false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[9], false))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
for (i = 9; i >= 0 && cgroup[i]; i--) {
|
||||
cg_destroy(cgroup[i]);
|
||||
free(cgroup[i]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A fork bomb emulator.
|
||||
*/
|
||||
static int forkbomb_fn(const char *cgroup, void *arg)
|
||||
{
|
||||
int ppid;
|
||||
|
||||
fork();
|
||||
fork();
|
||||
|
||||
ppid = getppid();
|
||||
|
||||
while (getppid() == ppid)
|
||||
usleep(1000);
|
||||
|
||||
return getppid() == ppid;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test runs a fork bomb in a cgroup and tries to freeze it.
|
||||
* Then it kills all processes and checks that cgroup isn't populated
|
||||
* anymore.
|
||||
*/
|
||||
static int test_cgfreezer_forkbomb(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
|
||||
cgroup = cg_name(root, "cg_forkbomb_test");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
cg_run_nowait(cgroup, forkbomb_fn, NULL);
|
||||
|
||||
usleep(100000);
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_killall(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 0))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test creates two nested cgroups, freezes the parent
|
||||
* and removes the child. Then it checks that the parent cgroup
|
||||
* remains frozen and it's possible to create a new child
|
||||
* without unfreezing. The new child is frozen too.
|
||||
*/
|
||||
static int test_cgfreezer_rmdir(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *parent, *child = NULL;
|
||||
|
||||
parent = cg_name(root, "cg_test_rmdir_A");
|
||||
if (!parent)
|
||||
goto cleanup;
|
||||
|
||||
child = cg_name(parent, "cg_test_rmdir_B");
|
||||
if (!child)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(parent))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(child))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(parent, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_destroy(child))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(parent, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(child))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(child, true))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (child)
|
||||
cg_destroy(child);
|
||||
free(child);
|
||||
if (parent)
|
||||
cg_destroy(parent);
|
||||
free(parent);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test creates two cgroups: A and B, runs a process in A
|
||||
* and performs several migrations:
|
||||
* 1) A (running) -> B (frozen)
|
||||
* 2) B (frozen) -> A (running)
|
||||
* 3) A (frozen) -> B (frozen)
|
||||
*
|
||||
* On each step it checks the actual state of both cgroups.
|
||||
*/
|
||||
static int test_cgfreezer_migrate(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup[2] = {0};
|
||||
int pid;
|
||||
|
||||
cgroup[0] = cg_name(root, "cg_test_migrate_A");
|
||||
if (!cgroup[0])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[1] = cg_name(root, "cg_test_migrate_B");
|
||||
if (!cgroup[1])
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup[0]))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup[1]))
|
||||
goto cleanup;
|
||||
|
||||
pid = cg_run_nowait(cgroup[0], child_fn, NULL);
|
||||
if (pid < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup[0], 1))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Migrate from A (running) to B (frozen)
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[1], true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[0], false))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Migrate from B (frozen) to A (running)
|
||||
*/
|
||||
if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[1], true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Migrate from A (frozen) to B (frozen)
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup[0], true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup[0], true))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup[0])
|
||||
cg_destroy(cgroup[0]);
|
||||
free(cgroup[0]);
|
||||
if (cgroup[1])
|
||||
cg_destroy(cgroup[1]);
|
||||
free(cgroup[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test checks that ptrace works with a tracing process in a frozen cgroup.
|
||||
*/
|
||||
static int test_cgfreezer_ptrace(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
siginfo_t siginfo;
|
||||
int pid;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_ptrace");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
pid = cg_run_nowait(cgroup, child_fn, NULL);
|
||||
if (pid < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 1))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
waitpid(pid, NULL, 0);
|
||||
|
||||
/*
|
||||
* Cgroup has to remain frozen, however the test task
|
||||
* is in traced state.
|
||||
*/
|
||||
if (cg_check_frozen(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the process is stopped.
|
||||
*/
|
||||
static int proc_check_stopped(int pid)
|
||||
{
|
||||
char buf[PAGE_SIZE];
|
||||
int len;
|
||||
|
||||
len = proc_read_text(pid, "stat", buf, sizeof(buf));
|
||||
if (len == -1) {
|
||||
debug("Can't get %d stat\n", pid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(buf, "(test_freezer) T ") == NULL) {
|
||||
debug("Process %d in the unexpected state: %s\n", pid, buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that it's possible to freeze a cgroup with a stopped process.
|
||||
*/
|
||||
static int test_cgfreezer_stopped(const char *root)
|
||||
{
|
||||
int pid, ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_stopped");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
pid = cg_run_nowait(cgroup, child_fn, NULL);
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 1))
|
||||
goto cleanup;
|
||||
|
||||
if (kill(pid, SIGSTOP))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_check_frozen(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
if (proc_check_stopped(pid))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that it's possible to freeze a cgroup with a ptraced process.
|
||||
*/
|
||||
static int test_cgfreezer_ptraced(const char *root)
|
||||
{
|
||||
int pid, ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
siginfo_t siginfo;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_ptraced");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
pid = cg_run_nowait(cgroup, child_fn, NULL);
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 1))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
waitpid(pid, NULL, 0);
|
||||
|
||||
if (cg_check_frozen(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* cg_check_frozen(cgroup, true) will fail here,
|
||||
* because the task in in the TRACEd state.
|
||||
*/
|
||||
if (cg_freeze_wait(cgroup, false))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
|
||||
goto cleanup;
|
||||
|
||||
if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfork_fn(const char *cgroup, void *arg)
|
||||
{
|
||||
int pid = vfork();
|
||||
|
||||
if (pid == 0)
|
||||
while (true)
|
||||
sleep(1);
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that it's possible to freeze a cgroup with a process,
|
||||
* which called vfork() and is waiting for a child.
|
||||
*/
|
||||
static int test_cgfreezer_vfork(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_vfork");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
cg_run_nowait(cgroup, vfork_fn, NULL);
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 2))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_freeze_wait(cgroup, true))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define T(x) { x, #x }
|
||||
struct cgfreezer_test {
|
||||
int (*fn)(const char *root);
|
||||
const char *name;
|
||||
} tests[] = {
|
||||
T(test_cgfreezer_simple),
|
||||
T(test_cgfreezer_tree),
|
||||
T(test_cgfreezer_forkbomb),
|
||||
T(test_cgfreezer_rmdir),
|
||||
T(test_cgfreezer_migrate),
|
||||
T(test_cgfreezer_ptrace),
|
||||
T(test_cgfreezer_stopped),
|
||||
T(test_cgfreezer_ptraced),
|
||||
T(test_cgfreezer_vfork),
|
||||
};
|
||||
#undef T
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
char root[PATH_MAX];
|
||||
int i, ret = EXIT_SUCCESS;
|
||||
|
||||
if (cg_find_unified_root(root, sizeof(root)))
|
||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||
switch (tests[i].fn(root)) {
|
||||
case KSFT_PASS:
|
||||
ksft_test_result_pass("%s\n", tests[i].name);
|
||||
break;
|
||||
case KSFT_SKIP:
|
||||
ksft_test_result_skip("%s\n", tests[i].name);
|
||||
break;
|
||||
default:
|
||||
ret = EXIT_FAILURE;
|
||||
ksft_test_result_fail("%s\n", tests[i].name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
Loading…
Reference in New Issue