memcg: clean up waiting move acct

Now, for checking a memcg is under task-account-moving, we do css_tryget()
against mc.to and mc.from.  But this is just complicating things.  This
patch makes the check easier.

This patch adds a spinlock to move_charge_struct and guard modification of
mc.to and mc.from.  By this, we don't have to think about complicated
races arount this not-critical path.

[balbir@linux.vnet.ibm.com: don't crash on a null memcg being passed]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
KAMEZAWA Hiroyuki 2010-08-10 18:02:58 -07:00 committed by Linus Torvalds
parent 4b53433468
commit 2bd9bb206b
1 changed files with 29 additions and 22 deletions

View File

@ -268,6 +268,7 @@ enum move_type {
/* "mc" and its members are protected by cgroup_mutex */ /* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct { static struct move_charge_struct {
spinlock_t lock; /* for from, to, moving_task */
struct mem_cgroup *from; struct mem_cgroup *from;
struct mem_cgroup *to; struct mem_cgroup *to;
unsigned long precharge; unsigned long precharge;
@ -276,6 +277,7 @@ static struct move_charge_struct {
struct task_struct *moving_task; /* a task moving charges */ struct task_struct *moving_task; /* a task moving charges */
wait_queue_head_t waitq; /* a waitq for other context */ wait_queue_head_t waitq; /* a waitq for other context */
} mc = { } mc = {
.lock = __SPIN_LOCK_UNLOCKED(mc.lock),
.waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
}; };
@ -1051,26 +1053,24 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
static bool mem_cgroup_under_move(struct mem_cgroup *mem) static bool mem_cgroup_under_move(struct mem_cgroup *mem)
{ {
struct mem_cgroup *from = mc.from; struct mem_cgroup *from;
struct mem_cgroup *to = mc.to; struct mem_cgroup *to;
bool ret = false; bool ret = false;
/*
if (from == mem || to == mem) * Unlike task_move routines, we access mc.to, mc.from not under
return true; * mutual exclusion by cgroup_mutex. Here, we take spinlock instead.
*/
if (!from || !to || !mem->use_hierarchy) spin_lock(&mc.lock);
return false; from = mc.from;
to = mc.to;
rcu_read_lock(); if (!from)
if (css_tryget(&from->css)) { goto unlock;
ret = css_is_ancestor(&from->css, &mem->css); if (from == mem || to == mem
css_put(&from->css); || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css))
} || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css)))
if (!ret && css_tryget(&to->css)) { ret = true;
ret = css_is_ancestor(&to->css, &mem->css); unlock:
css_put(&to->css); spin_unlock(&mc.lock);
}
rcu_read_unlock();
return ret; return ret;
} }
@ -1406,7 +1406,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem)
static void memcg_oom_recover(struct mem_cgroup *mem) static void memcg_oom_recover(struct mem_cgroup *mem)
{ {
if (atomic_read(&mem->oom_lock)) if (mem && atomic_read(&mem->oom_lock))
memcg_wakeup_oom(mem); memcg_wakeup_oom(mem);
} }
@ -4441,11 +4441,13 @@ static int mem_cgroup_precharge_mc(struct mm_struct *mm)
static void mem_cgroup_clear_mc(void) static void mem_cgroup_clear_mc(void)
{ {
struct mem_cgroup *from = mc.from;
struct mem_cgroup *to = mc.to;
/* we must uncharge all the leftover precharges from mc.to */ /* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) { if (mc.precharge) {
__mem_cgroup_cancel_charge(mc.to, mc.precharge); __mem_cgroup_cancel_charge(mc.to, mc.precharge);
mc.precharge = 0; mc.precharge = 0;
memcg_oom_recover(mc.to);
} }
/* /*
* we didn't uncharge from mc.from at mem_cgroup_move_account(), so * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
@ -4454,7 +4456,6 @@ static void mem_cgroup_clear_mc(void)
if (mc.moved_charge) { if (mc.moved_charge) {
__mem_cgroup_cancel_charge(mc.from, mc.moved_charge); __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0; mc.moved_charge = 0;
memcg_oom_recover(mc.from);
} }
/* we must fixup refcnts and charges */ /* we must fixup refcnts and charges */
if (mc.moved_swap) { if (mc.moved_swap) {
@ -4479,9 +4480,13 @@ static void mem_cgroup_clear_mc(void)
mc.moved_swap = 0; mc.moved_swap = 0;
} }
spin_lock(&mc.lock);
mc.from = NULL; mc.from = NULL;
mc.to = NULL; mc.to = NULL;
mc.moving_task = NULL; mc.moving_task = NULL;
spin_unlock(&mc.lock);
memcg_oom_recover(from);
memcg_oom_recover(to);
wake_up_all(&mc.waitq); wake_up_all(&mc.waitq);
} }
@ -4510,12 +4515,14 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
VM_BUG_ON(mc.moved_charge); VM_BUG_ON(mc.moved_charge);
VM_BUG_ON(mc.moved_swap); VM_BUG_ON(mc.moved_swap);
VM_BUG_ON(mc.moving_task); VM_BUG_ON(mc.moving_task);
spin_lock(&mc.lock);
mc.from = from; mc.from = from;
mc.to = mem; mc.to = mem;
mc.precharge = 0; mc.precharge = 0;
mc.moved_charge = 0; mc.moved_charge = 0;
mc.moved_swap = 0; mc.moved_swap = 0;
mc.moving_task = current; mc.moving_task = current;
spin_unlock(&mc.lock);
ret = mem_cgroup_precharge_mc(mm); ret = mem_cgroup_precharge_mc(mm);
if (ret) if (ret)