Merge branch 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Two patches to fix a deadlock which can be easily triggered if memcg charge moving is used. This bug was introduced while converting threadgroup locking to a global percpu_rwsem and is caused by cgroup controller task migration path depending on the ability to create new kthreads. cpuset had a similar issue which was fixed by performing heavy-lifting operations asynchronous to task migration. The two patches fix the same issue in memcg in a similar way. The first patch makes the mechanism generic and the second relocates memcg charge moving outside the migration path. Given that we don't want to perform heavy operations while writelocking threadgroup lock anyway, moving them out of the way is a desirable solution. One thing to note is that the problem was difficult to debug because lockdep couldn't figure out the deadlock condition. Looking into how to improve that" * 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: memcg: relocate charge moving from ->attach to ->post_attach cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback
This commit is contained in:
commit
763cfc86ee
|
@ -444,6 +444,7 @@ struct cgroup_subsys {
|
||||||
int (*can_attach)(struct cgroup_taskset *tset);
|
int (*can_attach)(struct cgroup_taskset *tset);
|
||||||
void (*cancel_attach)(struct cgroup_taskset *tset);
|
void (*cancel_attach)(struct cgroup_taskset *tset);
|
||||||
void (*attach)(struct cgroup_taskset *tset);
|
void (*attach)(struct cgroup_taskset *tset);
|
||||||
|
void (*post_attach)(void);
|
||||||
int (*can_fork)(struct task_struct *task);
|
int (*can_fork)(struct task_struct *task);
|
||||||
void (*cancel_fork)(struct task_struct *task);
|
void (*cancel_fork)(struct task_struct *task);
|
||||||
void (*fork)(struct task_struct *task);
|
void (*fork)(struct task_struct *task);
|
||||||
|
|
|
@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
|
||||||
task_unlock(current);
|
task_unlock(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void cpuset_post_attach_flush(void);
|
|
||||||
|
|
||||||
#else /* !CONFIG_CPUSETS */
|
#else /* !CONFIG_CPUSETS */
|
||||||
|
|
||||||
static inline bool cpusets_enabled(void) { return false; }
|
static inline bool cpusets_enabled(void) { return false; }
|
||||||
|
@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void cpuset_post_attach_flush(void)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* !CONFIG_CPUSETS */
|
#endif /* !CONFIG_CPUSETS */
|
||||||
|
|
||||||
#endif /* _LINUX_CPUSET_H */
|
#endif /* _LINUX_CPUSET_H */
|
||||||
|
|
|
@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
|
||||||
size_t nbytes, loff_t off, bool threadgroup)
|
size_t nbytes, loff_t off, bool threadgroup)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk;
|
struct task_struct *tsk;
|
||||||
|
struct cgroup_subsys *ss;
|
||||||
struct cgroup *cgrp;
|
struct cgroup *cgrp;
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
int ret;
|
int ssid, ret;
|
||||||
|
|
||||||
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
|
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -2875,8 +2876,10 @@ out_unlock_rcu:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
out_unlock_threadgroup:
|
out_unlock_threadgroup:
|
||||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
percpu_up_write(&cgroup_threadgroup_rwsem);
|
||||||
|
for_each_subsys(ss, ssid)
|
||||||
|
if (ss->post_attach)
|
||||||
|
ss->post_attach();
|
||||||
cgroup_kn_unlock(of->kn);
|
cgroup_kn_unlock(of->kn);
|
||||||
cpuset_post_attach_flush();
|
|
||||||
return ret ?: nbytes;
|
return ret ?: nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,6 @@
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/workqueue.h>
|
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
|
|
||||||
|
@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cpuset_post_attach_flush(void)
|
static void cpuset_post_attach(void)
|
||||||
{
|
{
|
||||||
flush_workqueue(cpuset_migrate_mm_wq);
|
flush_workqueue(cpuset_migrate_mm_wq);
|
||||||
}
|
}
|
||||||
|
@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||||
.can_attach = cpuset_can_attach,
|
.can_attach = cpuset_can_attach,
|
||||||
.cancel_attach = cpuset_cancel_attach,
|
.cancel_attach = cpuset_cancel_attach,
|
||||||
.attach = cpuset_attach,
|
.attach = cpuset_attach,
|
||||||
|
.post_attach = cpuset_post_attach,
|
||||||
.bind = cpuset_bind,
|
.bind = cpuset_bind,
|
||||||
.legacy_cftypes = files,
|
.legacy_cftypes = files,
|
||||||
.early_init = true,
|
.early_init = true,
|
||||||
|
|
|
@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
|
||||||
/* "mc" and its members are protected by cgroup_mutex */
|
/* "mc" and its members are protected by cgroup_mutex */
|
||||||
static struct move_charge_struct {
|
static struct move_charge_struct {
|
||||||
spinlock_t lock; /* for from, to */
|
spinlock_t lock; /* for from, to */
|
||||||
|
struct mm_struct *mm;
|
||||||
struct mem_cgroup *from;
|
struct mem_cgroup *from;
|
||||||
struct mem_cgroup *to;
|
struct mem_cgroup *to;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)
|
||||||
|
|
||||||
static void mem_cgroup_clear_mc(void)
|
static void mem_cgroup_clear_mc(void)
|
||||||
{
|
{
|
||||||
|
struct mm_struct *mm = mc.mm;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we must clear moving_task before waking up waiters at the end of
|
* we must clear moving_task before waking up waiters at the end of
|
||||||
* task migration.
|
* task migration.
|
||||||
|
@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
|
||||||
spin_lock(&mc.lock);
|
spin_lock(&mc.lock);
|
||||||
mc.from = NULL;
|
mc.from = NULL;
|
||||||
mc.to = NULL;
|
mc.to = NULL;
|
||||||
|
mc.mm = NULL;
|
||||||
spin_unlock(&mc.lock);
|
spin_unlock(&mc.lock);
|
||||||
|
|
||||||
|
mmput(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||||
|
@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||||
VM_BUG_ON(mc.moved_swap);
|
VM_BUG_ON(mc.moved_swap);
|
||||||
|
|
||||||
spin_lock(&mc.lock);
|
spin_lock(&mc.lock);
|
||||||
|
mc.mm = mm;
|
||||||
mc.from = from;
|
mc.from = from;
|
||||||
mc.to = memcg;
|
mc.to = memcg;
|
||||||
mc.flags = move_flags;
|
mc.flags = move_flags;
|
||||||
|
@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||||
ret = mem_cgroup_precharge_mc(mm);
|
ret = mem_cgroup_precharge_mc(mm);
|
||||||
if (ret)
|
if (ret)
|
||||||
mem_cgroup_clear_mc();
|
mem_cgroup_clear_mc();
|
||||||
|
} else {
|
||||||
|
mmput(mm);
|
||||||
}
|
}
|
||||||
mmput(mm);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4852,11 +4860,11 @@ put: /* get_mctgt_type() gets the page */
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_cgroup_move_charge(struct mm_struct *mm)
|
static void mem_cgroup_move_charge(void)
|
||||||
{
|
{
|
||||||
struct mm_walk mem_cgroup_move_charge_walk = {
|
struct mm_walk mem_cgroup_move_charge_walk = {
|
||||||
.pmd_entry = mem_cgroup_move_charge_pte_range,
|
.pmd_entry = mem_cgroup_move_charge_pte_range,
|
||||||
.mm = mm,
|
.mm = mc.mm,
|
||||||
};
|
};
|
||||||
|
|
||||||
lru_add_drain_all();
|
lru_add_drain_all();
|
||||||
|
@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
|
||||||
atomic_inc(&mc.from->moving_account);
|
atomic_inc(&mc.from->moving_account);
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
retry:
|
retry:
|
||||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
|
||||||
/*
|
/*
|
||||||
* Someone who are holding the mmap_sem might be waiting in
|
* Someone who are holding the mmap_sem might be waiting in
|
||||||
* waitq. So we cancel all extra charges, wake up all waiters,
|
* waitq. So we cancel all extra charges, wake up all waiters,
|
||||||
|
@ -4885,23 +4893,16 @@ retry:
|
||||||
* additional charge, the page walk just aborts.
|
* additional charge, the page walk just aborts.
|
||||||
*/
|
*/
|
||||||
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
|
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mc.mm->mmap_sem);
|
||||||
atomic_dec(&mc.from->moving_account);
|
atomic_dec(&mc.from->moving_account);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
|
static void mem_cgroup_move_task(void)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys_state *css;
|
if (mc.to) {
|
||||||
struct task_struct *p = cgroup_taskset_first(tset, &css);
|
mem_cgroup_move_charge();
|
||||||
struct mm_struct *mm = get_task_mm(p);
|
|
||||||
|
|
||||||
if (mm) {
|
|
||||||
if (mc.to)
|
|
||||||
mem_cgroup_move_charge(mm);
|
|
||||||
mmput(mm);
|
|
||||||
}
|
|
||||||
if (mc.to)
|
|
||||||
mem_cgroup_clear_mc();
|
mem_cgroup_clear_mc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#else /* !CONFIG_MMU */
|
#else /* !CONFIG_MMU */
|
||||||
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||||
|
@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||||
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
|
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
|
static void mem_cgroup_move_task(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
|
||||||
.css_reset = mem_cgroup_css_reset,
|
.css_reset = mem_cgroup_css_reset,
|
||||||
.can_attach = mem_cgroup_can_attach,
|
.can_attach = mem_cgroup_can_attach,
|
||||||
.cancel_attach = mem_cgroup_cancel_attach,
|
.cancel_attach = mem_cgroup_cancel_attach,
|
||||||
.attach = mem_cgroup_move_task,
|
.post_attach = mem_cgroup_move_task,
|
||||||
.bind = mem_cgroup_bind,
|
.bind = mem_cgroup_bind,
|
||||||
.dfl_cftypes = memory_files,
|
.dfl_cftypes = memory_files,
|
||||||
.legacy_cftypes = mem_cgroup_legacy_files,
|
.legacy_cftypes = mem_cgroup_legacy_files,
|
||||||
|
|
Loading…
Reference in New Issue