mm: fix vm-scalability regression in cgroup-aware workingset code
Commit23047a96d7
("mm: workingset: per-cgroup cache thrash detection") added a page->mem_cgroup lookup to the cache eviction, refault, and activation paths, as well as locking to the activation path, and the vm-scalability tests showed a regression of -23%. While the test in question is an artificial worst-case scenario that doesn't occur in real workloads - reading two sparse files in parallel at full CPU speed just to hammer the LRU paths - there is still some optimizations that can be done in those paths. Inline the lookup functions to eliminate calls. Also, page->mem_cgroup doesn't need to be stabilized when counting an activation; we merely need to hold the RCU lock to prevent the memcg from being freed. This cuts down on overhead quite a bit:23047a96d7
063f6715e77a7be5770d6081fe ---------------- -------------------------- %stddev %change %stddev \ | \ 21621405 +- 0% +11.3% 24069657 +- 2% vm-scalability.throughput [linux@roeck-us.net: drop unnecessary include file] [hannes@cmpxchg.org: add WARN_ON_ONCE()s] Link: http://lkml.kernel.org/r/20160707194024.GA26580@cmpxchg.org Link: http://lkml.kernel.org/r/20160624175101.GA3024@cmpxchg.org Reported-by: Ye Xiaolong <xiaolong.ye@intel.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Vladimir Davydov <vdavydov@virtuozzo.com> Signed-off-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
400bc7fd4f
commit
55779ec759
|
@ -314,7 +314,48 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);
|
|||
|
||||
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
|
||||
|
||||
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
|
||||
static inline struct mem_cgroup_per_zone *
|
||||
mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
|
||||
{
|
||||
int nid = zone_to_nid(zone);
|
||||
int zid = zone_idx(zone);
|
||||
|
||||
return &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
|
||||
* @zone: zone of the wanted lruvec
|
||||
* @memcg: memcg of the wanted lruvec
|
||||
*
|
||||
* Returns the lru list vector holding pages for the given @zone and
|
||||
* @mem. This can be the global zone lruvec, if the memory controller
|
||||
* is disabled.
|
||||
*/
|
||||
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
lruvec = &zone->lruvec;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mz = mem_cgroup_zone_zoneinfo(memcg, zone);
|
||||
lruvec = &mz->lruvec;
|
||||
out:
|
||||
/*
|
||||
* Since a node can be onlined after the mem_cgroup was created,
|
||||
* we have to be prepared to initialize lruvec->zone here;
|
||||
* and if offlined then reonlined, we need to reinitialize it.
|
||||
*/
|
||||
if (unlikely(lruvec->zone != zone))
|
||||
lruvec->zone = zone;
|
||||
return lruvec;
|
||||
}
|
||||
|
||||
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
|
||||
|
||||
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
|
||||
|
|
|
@ -973,11 +973,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
|
|||
{
|
||||
return page->mem_cgroup;
|
||||
}
|
||||
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return READ_ONCE(page->mem_cgroup);
|
||||
}
|
||||
#else
|
||||
static inline struct mem_cgroup *page_memcg(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
@ -323,15 +323,6 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
|
|||
|
||||
#endif /* !CONFIG_SLOB */
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
|
||||
{
|
||||
int nid = zone_to_nid(zone);
|
||||
int zid = zone_idx(zone);
|
||||
|
||||
return &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_css_from_page - css of the memcg associated with a page
|
||||
* @page: page of interest
|
||||
|
@ -943,39 +934,6 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
|
|||
iter != NULL; \
|
||||
iter = mem_cgroup_iter(NULL, iter, NULL))
|
||||
|
||||
/**
|
||||
* mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
|
||||
* @zone: zone of the wanted lruvec
|
||||
* @memcg: memcg of the wanted lruvec
|
||||
*
|
||||
* Returns the lru list vector holding pages for the given @zone and
|
||||
* @mem. This can be the global zone lruvec, if the memory controller
|
||||
* is disabled.
|
||||
*/
|
||||
struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
lruvec = &zone->lruvec;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mz = mem_cgroup_zone_zoneinfo(memcg, zone);
|
||||
lruvec = &mz->lruvec;
|
||||
out:
|
||||
/*
|
||||
* Since a node can be onlined after the mem_cgroup was created,
|
||||
* we have to be prepared to initialize lruvec->zone here;
|
||||
* and if offlined then reonlined, we need to reinitialize it.
|
||||
*/
|
||||
if (unlikely(lruvec->zone != zone))
|
||||
lruvec->zone = zone;
|
||||
return lruvec;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
|
||||
* @page: the page
|
||||
|
|
|
@ -305,9 +305,10 @@ bool workingset_refault(void *shadow)
|
|||
*/
|
||||
void workingset_activation(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lock_page_memcg(page);
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* Filter non-memcg pages here, e.g. unmap can call
|
||||
* mark_page_accessed() on VDSO pages.
|
||||
|
@ -315,12 +316,13 @@ void workingset_activation(struct page *page)
|
|||
* XXX: See workingset_refault() - this should return
|
||||
* root_mem_cgroup even for !CONFIG_MEMCG.
|
||||
*/
|
||||
if (!mem_cgroup_disabled() && !page_memcg(page))
|
||||
memcg = page_memcg_rcu(page);
|
||||
if (!mem_cgroup_disabled() && !memcg)
|
||||
goto out;
|
||||
lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page));
|
||||
lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg);
|
||||
atomic_long_inc(&lruvec->inactive_age);
|
||||
out:
|
||||
unlock_page_memcg(page);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue