memcg: simplify corner case handling of LRU.
This patch simplifies LRU handling of racy case (memcg+SwapCache). At charging, SwapCache tend to be on LRU already. So, before overwriting pc->mem_cgroup, the page must be removed from LRU and added to LRU later. This patch does spin_lock(zone->lru_lock); if (PageLRU(page)) remove from LRU overwrite pc->mem_cgroup if (PageLRU(page)) add to new LRU. spin_unlock(zone->lru_lock); And guarantee all pages are not on LRU at modifying pc->mem_cgroup. This patch also unfies lru handling of replace_page_cache() and swapin. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Miklos Szeredi <mszeredi@suse.cz> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Ying Han <yinghan@google.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
dc67d50465
commit
36b62ad539
109
mm/memcontrol.c
109
mm/memcontrol.c
|
@ -1136,86 +1136,6 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
|
|||
return mem_cgroup_lru_add_list(zone, page, to);
|
||||
}
|
||||
|
||||
/*
|
||||
* At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed
|
||||
* while it's linked to lru because the page may be reused after it's fully
|
||||
* uncharged. To handle that, unlink page_cgroup from LRU when charge it again.
|
||||
* It's done under lock_page and expected that zone->lru_lock isnever held.
|
||||
*/
|
||||
static void mem_cgroup_lru_del_before_commit(struct page *page)
|
||||
{
|
||||
enum lru_list lru;
|
||||
unsigned long flags;
|
||||
struct zone *zone = page_zone(page);
|
||||
struct page_cgroup *pc = lookup_page_cgroup(page);
|
||||
|
||||
/*
|
||||
* Doing this check without taking ->lru_lock seems wrong but this
|
||||
* is safe. Because if page_cgroup's USED bit is unset, the page
|
||||
* will not be added to any memcg's LRU. If page_cgroup's USED bit is
|
||||
* set, the commit after this will fail, anyway.
|
||||
* This all charge/uncharge is done under some mutual execustion.
|
||||
* So, we don't need to taking care of changes in USED bit.
|
||||
*/
|
||||
if (likely(!PageLRU(page)))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
lru = page_lru(page);
|
||||
/*
|
||||
* The uncharged page could still be registered to the LRU of
|
||||
* the stale pc->mem_cgroup.
|
||||
*
|
||||
* As pc->mem_cgroup is about to get overwritten, the old LRU
|
||||
* accounting needs to be taken care of. Let root_mem_cgroup
|
||||
* babysit the page until the new memcg is responsible for it.
|
||||
*
|
||||
* The PCG_USED bit is guarded by lock_page() as the page is
|
||||
* swapcache/pagecache.
|
||||
*/
|
||||
if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
|
||||
del_page_from_lru_list(zone, page, lru);
|
||||
add_page_to_lru_list(zone, page, lru);
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
}
|
||||
|
||||
static void mem_cgroup_lru_add_after_commit(struct page *page)
|
||||
{
|
||||
enum lru_list lru;
|
||||
unsigned long flags;
|
||||
struct zone *zone = page_zone(page);
|
||||
struct page_cgroup *pc = lookup_page_cgroup(page);
|
||||
/*
|
||||
* putback: charge:
|
||||
* SetPageLRU SetPageCgroupUsed
|
||||
* smp_mb smp_mb
|
||||
* PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
|
||||
*
|
||||
* Ensure that one of the two sides adds the page to the memcg
|
||||
* LRU during a race.
|
||||
*/
|
||||
smp_mb();
|
||||
/* taking care of that the page is added to LRU while we commit it */
|
||||
if (likely(!PageLRU(page)))
|
||||
return;
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
lru = page_lru(page);
|
||||
/*
|
||||
* If the page is not on the LRU, someone will soon put it
|
||||
* there. If it is, and also already accounted for on the
|
||||
* memcg-side, it must be on the right lruvec as setting
|
||||
* pc->mem_cgroup and PageCgroupUsed is properly ordered.
|
||||
* Otherwise, root_mem_cgroup has been babysitting the page
|
||||
* during the charge. Move it to the new memcg now.
|
||||
*/
|
||||
if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
|
||||
del_page_from_lru_list(zone, page, lru);
|
||||
add_page_to_lru_list(zone, page, lru);
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether given mem is same or in the root_mem_cgroup's
|
||||
* hierarchy subtree
|
||||
|
@ -2775,14 +2695,27 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
|
|||
enum charge_type ctype)
|
||||
{
|
||||
struct page_cgroup *pc = lookup_page_cgroup(page);
|
||||
struct zone *zone = page_zone(page);
|
||||
unsigned long flags;
|
||||
bool removed = false;
|
||||
|
||||
/*
|
||||
* In some case, SwapCache, FUSE(splice_buf->radixtree), the page
|
||||
* is already on LRU. It means the page may on some other page_cgroup's
|
||||
* LRU. Take care of it.
|
||||
*/
|
||||
mem_cgroup_lru_del_before_commit(page);
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
if (PageLRU(page)) {
|
||||
del_page_from_lru_list(zone, page, page_lru(page));
|
||||
ClearPageLRU(page);
|
||||
removed = true;
|
||||
}
|
||||
__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
|
||||
mem_cgroup_lru_add_after_commit(page);
|
||||
if (removed) {
|
||||
add_page_to_lru_list(zone, page, page_lru(page));
|
||||
SetPageLRU(page);
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3383,9 +3316,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
|
|||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct page_cgroup *pc;
|
||||
struct zone *zone;
|
||||
enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
|
||||
unsigned long flags;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
@ -3401,20 +3332,12 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
|
|||
if (PageSwapBacked(oldpage))
|
||||
type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
|
||||
|
||||
zone = page_zone(newpage);
|
||||
pc = lookup_page_cgroup(newpage);
|
||||
/*
|
||||
* Even if newpage->mapping was NULL before starting replacement,
|
||||
* the newpage may be on LRU(or pagevec for LRU) already. We lock
|
||||
* LRU while we overwrite pc->mem_cgroup.
|
||||
*/
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
if (PageLRU(newpage))
|
||||
del_page_from_lru_list(zone, newpage, page_lru(newpage));
|
||||
__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
|
||||
if (PageLRU(newpage))
|
||||
add_page_to_lru_list(zone, newpage, page_lru(newpage));
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
__mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
|
|
Loading…
Reference in New Issue