2008-02-07 16:13:50 +08:00
|
|
|
/* memcontrol.h - Memory Controller
|
|
|
|
*
|
|
|
|
* Copyright IBM Corporation, 2007
|
|
|
|
* Author Balbir Singh <balbir@linux.vnet.ibm.com>
|
|
|
|
*
|
2008-02-07 16:13:51 +08:00
|
|
|
* Copyright 2007 OpenVZ SWsoft Inc
|
|
|
|
* Author: Pavel Emelianov <xemul@openvz.org>
|
|
|
|
*
|
2008-02-07 16:13:50 +08:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_MEMCONTROL_H
|
|
|
|
#define _LINUX_MEMCONTROL_H
|
2009-01-08 10:08:02 +08:00
|
|
|
#include <linux/cgroup.h>
|
2008-02-07 16:13:51 +08:00
|
|
|
struct mem_cgroup;
|
|
|
|
struct page_cgroup;
|
2008-02-07 16:13:59 +08:00
|
|
|
struct page;
|
|
|
|
struct mm_struct;
|
2008-02-07 16:13:51 +08:00
|
|
|
|
2008-03-05 06:28:39 +08:00
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
2009-01-08 10:08:10 +08:00
|
|
|
/*
|
|
|
|
* All "charge" functions with gfp_mask should use GFP_KERNEL or
|
|
|
|
* (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
|
|
|
|
* alloc memory but reclaims memory from all available zones. So, "where I want
|
|
|
|
* memory from" bits of gfp_mask has no meaning. So any bits of that field is
|
|
|
|
* available but adding a rule is better. charge functions' gfp_mask should
|
|
|
|
* be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
|
|
|
|
* codes.
|
|
|
|
* (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
|
|
|
|
*/
|
2008-02-07 16:13:51 +08:00
|
|
|
|
2009-01-08 10:07:48 +08:00
|
|
|
extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
|
2008-02-07 16:14:02 +08:00
|
|
|
gfp_t gfp_mask);
|
2009-01-08 10:07:48 +08:00
|
|
|
/* for swap handling */
|
2009-01-08 10:08:00 +08:00
|
|
|
extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
|
|
|
|
struct page *page, gfp_t mask, struct mem_cgroup **ptr);
|
2009-01-08 10:07:48 +08:00
|
|
|
extern void mem_cgroup_commit_charge_swapin(struct page *page,
|
|
|
|
struct mem_cgroup *ptr);
|
|
|
|
extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
|
|
|
|
|
2008-03-05 06:29:08 +08:00
|
|
|
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
|
|
|
gfp_t gfp_mask);
|
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 10:08:01 +08:00
|
|
|
extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
|
|
|
|
extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
|
|
|
|
extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
|
|
|
|
extern void mem_cgroup_del_lru(struct page *page);
|
|
|
|
extern void mem_cgroup_move_lists(struct page *page,
|
|
|
|
enum lru_list from, enum lru_list to);
|
2008-02-07 16:14:41 +08:00
|
|
|
extern void mem_cgroup_uncharge_page(struct page *page);
|
memcg: remove refcnt from page_cgroup
memcg: performance improvements
Patch Description
1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed)
2/5 ... swapcache handling patch
3/5 ... add helper function for shmem's memory reclaim patch
4/5 ... optimize by likely/unlikely ppatch
5/5 ... remove redundunt check patch (shmem handling is fixed.)
Unix bench result.
== 2.6.26-rc2-mm1 + memory resource controller
Execl Throughput 2915.4 lps (29.6 secs, 3 samples)
C Compiler Throughput 1019.3 lpm (60.0 secs, 3 samples)
Shell Scripts (1 concurrent) 5796.0 lpm (60.0 secs, 3 samples)
Shell Scripts (8 concurrent) 1097.7 lpm (60.0 secs, 3 samples)
Shell Scripts (16 concurrent) 565.3 lpm (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks 1022128.0 KBps (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks 544057.0 KBps (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks 346481.0 KBps (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks 319325.0 KBps (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks 148788.0 KBps (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks 99051.0 KBps (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks 2058917.0 KBps (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks 1606109.0 KBps (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks 854789.0 KBps (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places 126145.2 lpm (30.0 secs, 3 samples)
INDEX VALUES
TEST BASELINE RESULT INDEX
Execl Throughput 43.0 2915.4 678.0
File Copy 1024 bufsize 2000 maxblocks 3960.0 346481.0 875.0
File Copy 256 bufsize 500 maxblocks 1655.0 99051.0 598.5
File Copy 4096 bufsize 8000 maxblocks 5800.0 854789.0 1473.8
Shell Scripts (8 concurrent) 6.0 1097.7 1829.5
=========
FINAL SCORE 991.3
== 2.6.26-rc2-mm1 + this set ==
Execl Throughput 3012.9 lps (29.9 secs, 3 samples)
C Compiler Throughput 981.0 lpm (60.0 secs, 3 samples)
Shell Scripts (1 concurrent) 5872.0 lpm (60.0 secs, 3 samples)
Shell Scripts (8 concurrent) 1120.3 lpm (60.0 secs, 3 samples)
Shell Scripts (16 concurrent) 578.0 lpm (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks 1003993.0 KBps (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks 550452.0 KBps (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks 347159.0 KBps (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks 314644.0 KBps (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks 151852.0 KBps (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks 101000.0 KBps (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks 2033256.0 KBps (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks 1611814.0 KBps (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks 847979.0 KBps (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places 128148.7 lpm (30.0 secs, 3 samples)
INDEX VALUES
TEST BASELINE RESULT INDEX
Execl Throughput 43.0 3012.9 700.7
File Copy 1024 bufsize 2000 maxblocks 3960.0 347159.0 876.7
File Copy 256 bufsize 500 maxblocks 1655.0 101000.0 610.3
File Copy 4096 bufsize 8000 maxblocks 5800.0 847979.0 1462.0
Shell Scripts (8 concurrent) 6.0 1120.3 1867.2
=========
FINAL SCORE 1004.6
This patch:
Remove refcnt from page_cgroup().
After this,
* A page is charged only when !page_mapped() && no page_cgroup is assigned.
* Anon page is newly mapped.
* File page is added to mapping->tree.
* A page is uncharged only when
* Anon page is fully unmapped.
* File page is removed from LRU.
There is no change in behavior from user's view.
This patch also removes unnecessary calls in rmap.c which was used only for
refcnt mangement.
[akpm@linux-foundation.org: fix warning]
[hugh@veritas.com: fix shmem_unuse_inode charging]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 16:47:14 +08:00
|
|
|
extern void mem_cgroup_uncharge_cache_page(struct page *page);
|
2009-05-01 06:08:19 +08:00
|
|
|
extern int mem_cgroup_shmem_charge_fallback(struct page *page,
|
2009-01-08 10:08:35 +08:00
|
|
|
struct mm_struct *mm, gfp_t gfp_mask);
|
2008-07-25 16:47:15 +08:00
|
|
|
|
2008-02-07 16:13:56 +08:00
|
|
|
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
|
|
|
|
struct list_head *dst,
|
|
|
|
unsigned long *scanned, int order,
|
|
|
|
int mode, struct zone *z,
|
|
|
|
struct mem_cgroup *mem_cont,
|
2008-10-19 11:26:32 +08:00
|
|
|
int active, int file);
|
2008-02-07 16:13:58 +08:00
|
|
|
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
|
2008-02-07 16:14:06 +08:00
|
|
|
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
|
2008-02-07 16:14:03 +08:00
|
|
|
|
cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner.
This approach was suggested by Paul Menage. The advantage of this approach
is that, once the mm->owner is known, using the subsystem id, the cgroup
can be determined. It also allows several control groups that are
virtually grouped by mm_struct, to exist independent of the memory
controller i.e., without adding mem_cgroup's for each controller, to
mm_struct.
A new config option CONFIG_MM_OWNER is added and the memory resource
controller selects this config option.
This patch also adds cgroup callbacks to notify subsystems when mm->owner
changes. The mm_cgroup_changed callback is called with the task_lock() of
the new task held and is called just prior to changing the mm->owner.
I am indebted to Paul Menage for the several reviews of this patchset and
helping me make it lighter and simpler.
This patch was tested on a powerpc box, it was compiled with both the
MM_OWNER config turned on and off.
After the thread group leader exits, it's moved to init_css_state by
cgroup_exit(), thus all future charges from runnings threads would be
redirected to the init_css_set's subsystem.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>,
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 16:00:16 +08:00
|
|
|
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
|
|
|
|
|
2009-01-08 10:08:07 +08:00
|
|
|
static inline
|
|
|
|
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
|
|
|
|
{
|
|
|
|
struct mem_cgroup *mem;
|
|
|
|
rcu_read_lock();
|
2009-04-22 03:24:41 +08:00
|
|
|
mem = mem_cgroup_from_task(rcu_dereference((mm)->owner));
|
2009-01-08 10:08:07 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
return cgroup == mem;
|
|
|
|
}
|
2008-02-07 16:13:53 +08:00
|
|
|
|
2008-07-25 16:47:10 +08:00
|
|
|
extern int
|
2009-01-08 10:07:50 +08:00
|
|
|
mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
|
|
|
|
extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
|
|
|
|
struct page *oldpage, struct page *newpage);
|
2008-02-07 16:14:10 +08:00
|
|
|
|
2008-02-07 16:14:32 +08:00
|
|
|
/*
|
|
|
|
* For memory reclaim.
|
|
|
|
*/
|
2008-02-07 16:14:34 +08:00
|
|
|
extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
|
|
|
|
extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
|
|
|
|
int priority);
|
|
|
|
extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
|
|
|
|
int priority);
|
2009-01-08 10:08:25 +08:00
|
|
|
int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
|
2009-06-17 06:32:28 +08:00
|
|
|
int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
|
2009-01-08 10:08:19 +08:00
|
|
|
unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
|
|
|
|
struct zone *zone,
|
|
|
|
enum lru_list lru);
|
2009-01-08 10:08:20 +08:00
|
|
|
struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
|
|
|
|
struct zone *zone);
|
|
|
|
struct zone_reclaim_stat*
|
|
|
|
mem_cgroup_get_reclaim_stat_from_page(struct page *page);
|
2009-04-03 07:57:39 +08:00
|
|
|
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
|
|
|
|
struct task_struct *p);
|
2008-02-07 16:14:32 +08:00
|
|
|
|
2009-01-08 10:07:57 +08:00
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
|
extern int do_swap_account;
|
|
|
|
#endif
|
2009-01-08 10:08:02 +08:00
|
|
|
|
|
|
|
static inline bool mem_cgroup_disabled(void)
|
|
|
|
{
|
|
|
|
if (mem_cgroup_subsys.disabled)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-01-08 10:08:08 +08:00
|
|
|
extern bool mem_cgroup_oom_called(struct task_struct *task);
|
|
|
|
|
2008-10-19 11:28:16 +08:00
|
|
|
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
2009-01-08 10:07:48 +08:00
|
|
|
struct mem_cgroup;
|
|
|
|
|
|
|
|
static inline int mem_cgroup_newpage_charge(struct page *page,
|
2008-03-05 06:29:08 +08:00
|
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
2008-02-07 16:13:53 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-03-05 06:29:08 +08:00
|
|
|
static inline int mem_cgroup_cache_charge(struct page *page,
|
|
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
2008-02-07 16:13:53 +08:00
|
|
|
{
|
2008-03-05 06:29:08 +08:00
|
|
|
return 0;
|
2008-02-07 16:13:53 +08:00
|
|
|
}
|
|
|
|
|
2009-01-08 10:08:00 +08:00
|
|
|
static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
|
|
|
|
struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
|
2009-01-08 10:07:48 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_commit_charge_swapin(struct page *page,
|
|
|
|
struct mem_cgroup *ptr)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-02-07 16:13:53 +08:00
|
|
|
static inline void mem_cgroup_uncharge_page(struct page *page)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
memcg: remove refcnt from page_cgroup
memcg: performance improvements
Patch Description
1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed)
2/5 ... swapcache handling patch
3/5 ... add helper function for shmem's memory reclaim patch
4/5 ... optimize by likely/unlikely ppatch
5/5 ... remove redundunt check patch (shmem handling is fixed.)
Unix bench result.
== 2.6.26-rc2-mm1 + memory resource controller
Execl Throughput 2915.4 lps (29.6 secs, 3 samples)
C Compiler Throughput 1019.3 lpm (60.0 secs, 3 samples)
Shell Scripts (1 concurrent) 5796.0 lpm (60.0 secs, 3 samples)
Shell Scripts (8 concurrent) 1097.7 lpm (60.0 secs, 3 samples)
Shell Scripts (16 concurrent) 565.3 lpm (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks 1022128.0 KBps (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks 544057.0 KBps (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks 346481.0 KBps (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks 319325.0 KBps (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks 148788.0 KBps (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks 99051.0 KBps (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks 2058917.0 KBps (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks 1606109.0 KBps (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks 854789.0 KBps (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places 126145.2 lpm (30.0 secs, 3 samples)
INDEX VALUES
TEST BASELINE RESULT INDEX
Execl Throughput 43.0 2915.4 678.0
File Copy 1024 bufsize 2000 maxblocks 3960.0 346481.0 875.0
File Copy 256 bufsize 500 maxblocks 1655.0 99051.0 598.5
File Copy 4096 bufsize 8000 maxblocks 5800.0 854789.0 1473.8
Shell Scripts (8 concurrent) 6.0 1097.7 1829.5
=========
FINAL SCORE 991.3
== 2.6.26-rc2-mm1 + this set ==
Execl Throughput 3012.9 lps (29.9 secs, 3 samples)
C Compiler Throughput 981.0 lpm (60.0 secs, 3 samples)
Shell Scripts (1 concurrent) 5872.0 lpm (60.0 secs, 3 samples)
Shell Scripts (8 concurrent) 1120.3 lpm (60.0 secs, 3 samples)
Shell Scripts (16 concurrent) 578.0 lpm (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks 1003993.0 KBps (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks 550452.0 KBps (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks 347159.0 KBps (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks 314644.0 KBps (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks 151852.0 KBps (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks 101000.0 KBps (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks 2033256.0 KBps (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks 1611814.0 KBps (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks 847979.0 KBps (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places 128148.7 lpm (30.0 secs, 3 samples)
INDEX VALUES
TEST BASELINE RESULT INDEX
Execl Throughput 43.0 3012.9 700.7
File Copy 1024 bufsize 2000 maxblocks 3960.0 347159.0 876.7
File Copy 256 bufsize 500 maxblocks 1655.0 101000.0 610.3
File Copy 4096 bufsize 8000 maxblocks 5800.0 847979.0 1462.0
Shell Scripts (8 concurrent) 6.0 1120.3 1867.2
=========
FINAL SCORE 1004.6
This patch:
Remove refcnt from page_cgroup().
After this,
* A page is charged only when !page_mapped() && no page_cgroup is assigned.
* Anon page is newly mapped.
* File page is added to mapping->tree.
* A page is uncharged only when
* Anon page is fully unmapped.
* File page is removed from LRU.
There is no change in behavior from user's view.
This patch also removes unnecessary calls in rmap.c which was used only for
refcnt mangement.
[akpm@linux-foundation.org: fix warning]
[hugh@veritas.com: fix shmem_unuse_inode charging]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 16:47:14 +08:00
|
|
|
static inline void mem_cgroup_uncharge_cache_page(struct page *page)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2009-05-01 06:08:19 +08:00
|
|
|
static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
|
2009-01-08 10:08:35 +08:00
|
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
2008-07-25 16:47:15 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 10:08:01 +08:00
|
|
|
static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
|
|
|
|
{
|
|
|
|
return ;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
|
|
|
|
{
|
|
|
|
return ;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_del_lru(struct page *page)
|
|
|
|
{
|
|
|
|
return ;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
|
2008-02-07 16:13:56 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-03-05 06:29:01 +08:00
|
|
|
static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
|
2008-02-07 16:14:01 +08:00
|
|
|
{
|
2008-02-09 16:10:15 +08:00
|
|
|
return 1;
|
2008-02-07 16:14:01 +08:00
|
|
|
}
|
|
|
|
|
2008-02-07 16:14:06 +08:00
|
|
|
static inline int task_in_mem_cgroup(struct task_struct *task,
|
|
|
|
const struct mem_cgroup *mem)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-07-25 16:47:10 +08:00
|
|
|
static inline int
|
2009-01-08 10:07:50 +08:00
|
|
|
mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
|
2008-02-07 16:14:10 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-01-08 10:07:50 +08:00
|
|
|
static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
|
|
|
|
struct page *oldpage,
|
|
|
|
struct page *newpage)
|
2008-02-07 16:14:10 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-02-07 16:14:34 +08:00
|
|
|
static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
|
|
|
|
int priority)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
|
|
|
|
int priority)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2009-01-08 10:08:02 +08:00
|
|
|
static inline bool mem_cgroup_disabled(void)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-08 10:08:08 +08:00
|
|
|
|
|
|
|
static inline bool mem_cgroup_oom_called(struct task_struct *task)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
2009-01-08 10:08:18 +08:00
|
|
|
|
|
|
|
static inline int
|
2009-01-08 10:08:25 +08:00
|
|
|
mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
|
2009-01-08 10:08:18 +08:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2009-06-17 06:32:28 +08:00
|
|
|
static inline int
|
|
|
|
mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2009-01-08 10:08:19 +08:00
|
|
|
static inline unsigned long
|
|
|
|
mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
|
|
|
|
enum lru_list lru)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-01-08 10:08:20 +08:00
|
|
|
static inline struct zone_reclaim_stat*
|
|
|
|
mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct zone_reclaim_stat*
|
|
|
|
mem_cgroup_get_reclaim_stat_from_page(struct page *page)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-04-03 07:57:39 +08:00
|
|
|
static inline void
|
|
|
|
mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-02-07 16:13:51 +08:00
|
|
|
#endif /* CONFIG_CGROUP_MEM_CONT */
|
|
|
|
|
2008-02-07 16:13:50 +08:00
|
|
|
#endif /* _LINUX_MEMCONTROL_H */
|
|
|
|
|