ksm: separate stable_node

Though we still do well to keep rmap_items in the unstable tree without a
separate tree_item at the node, for several reasons it becomes awkward to
keep rmap_items in the stable tree without a separate stable_node: lack of
space in the nicely-sized rmap_item, the need for an anchor as rmap_items
are removed, the need for a node even when temporarily no rmap_items are
attached to it.

So declare struct stable_node (rb_node to place it in the tree and
hlist_head for the rmap_items hanging off it), and convert stable tree
handling to use it: without yet taking advantage of it.  Note how one
stable_tree_insert() of a node now has _two_ stable_tree_append()s of the
two rmap_items being merged.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Hugh Dickins 2009-12-14 17:59:20 -08:00 committed by Linus Torvalds
parent 6514d511db
commit 7b6ba2c7d3
1 changed files with 101 additions and 79 deletions

172
mm/ksm.c
View File

@ -105,6 +105,16 @@ struct ksm_scan {
unsigned long seqnr; unsigned long seqnr;
}; };
/**
* struct stable_node - node of the stable rbtree
* @node: rb node of this ksm page in the stable tree
* @hlist: hlist head of rmap_items using this ksm page
*/
struct stable_node {
struct rb_node node;
struct hlist_head hlist;
};
/** /**
* struct rmap_item - reverse mapping item for virtual addresses * struct rmap_item - reverse mapping item for virtual addresses
* @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
@ -112,28 +122,28 @@ struct ksm_scan {
* @mm: the memory structure this rmap_item is pointing into * @mm: the memory structure this rmap_item is pointing into
* @address: the virtual address this rmap_item tracks (+ flags in low bits) * @address: the virtual address this rmap_item tracks (+ flags in low bits)
* @oldchecksum: previous checksum of the page at that virtual address * @oldchecksum: previous checksum of the page at that virtual address
* @node: rb_node of this rmap_item in either unstable or stable tree * @node: rb node of this rmap_item in the unstable tree
* @next: next rmap_item hanging off the same node of the stable tree * @head: pointer to stable_node heading this list in the stable tree
* @prev: previous rmap_item hanging off the same node of the stable tree * @hlist: link into hlist of rmap_items hanging off that stable_node
*/ */
struct rmap_item { struct rmap_item {
struct rmap_item *rmap_list; struct rmap_item *rmap_list;
unsigned long filler; unsigned long filler;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long address; /* + low bits used for flags below */ unsigned long address; /* + low bits used for flags below */
union {
unsigned int oldchecksum; /* when unstable */ unsigned int oldchecksum; /* when unstable */
struct rmap_item *next; /* when stable */
};
union { union {
struct rb_node node; /* when tree node */ struct rb_node node; /* when node of unstable tree */
struct rmap_item *prev; /* in stable list */ struct { /* when listed from stable tree */
struct stable_node *head;
struct hlist_node hlist;
};
}; };
}; };
#define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */
#define NODE_FLAG 0x100 /* is a node of unstable or stable tree */ #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */
#define STABLE_FLAG 0x200 /* is a node or list item of stable tree */ #define STABLE_FLAG 0x200 /* is listed from the stable tree */
/* The stable and unstable tree heads */ /* The stable and unstable tree heads */
static struct rb_root root_stable_tree = RB_ROOT; static struct rb_root root_stable_tree = RB_ROOT;
@ -150,6 +160,7 @@ static struct ksm_scan ksm_scan = {
}; };
static struct kmem_cache *rmap_item_cache; static struct kmem_cache *rmap_item_cache;
static struct kmem_cache *stable_node_cache;
static struct kmem_cache *mm_slot_cache; static struct kmem_cache *mm_slot_cache;
/* The number of nodes in the stable tree */ /* The number of nodes in the stable tree */
@ -192,13 +203,19 @@ static int __init ksm_slab_init(void)
if (!rmap_item_cache) if (!rmap_item_cache)
goto out; goto out;
stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
if (!stable_node_cache)
goto out_free1;
mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0); mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
if (!mm_slot_cache) if (!mm_slot_cache)
goto out_free; goto out_free2;
return 0; return 0;
out_free: out_free2:
kmem_cache_destroy(stable_node_cache);
out_free1:
kmem_cache_destroy(rmap_item_cache); kmem_cache_destroy(rmap_item_cache);
out: out:
return -ENOMEM; return -ENOMEM;
@ -207,6 +224,7 @@ out:
static void __init ksm_slab_free(void) static void __init ksm_slab_free(void)
{ {
kmem_cache_destroy(mm_slot_cache); kmem_cache_destroy(mm_slot_cache);
kmem_cache_destroy(stable_node_cache);
kmem_cache_destroy(rmap_item_cache); kmem_cache_destroy(rmap_item_cache);
mm_slot_cache = NULL; mm_slot_cache = NULL;
} }
@ -228,6 +246,16 @@ static inline void free_rmap_item(struct rmap_item *rmap_item)
kmem_cache_free(rmap_item_cache, rmap_item); kmem_cache_free(rmap_item_cache, rmap_item);
} }
static inline struct stable_node *alloc_stable_node(void)
{
return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
}
static inline void free_stable_node(struct stable_node *stable_node)
{
kmem_cache_free(stable_node_cache, stable_node);
}
static inline struct mm_slot *alloc_mm_slot(void) static inline struct mm_slot *alloc_mm_slot(void)
{ {
if (!mm_slot_cache) /* initialization failed */ if (!mm_slot_cache) /* initialization failed */
@ -429,36 +457,22 @@ static struct page *get_ksm_page(struct rmap_item *rmap_item)
*/ */
static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
{ {
if (in_stable_tree(rmap_item)) { if (rmap_item->address & STABLE_FLAG) {
struct rmap_item *next_item = rmap_item->next; struct stable_node *stable_node;
if (rmap_item->address & NODE_FLAG) { stable_node = rmap_item->head;
if (next_item) { hlist_del(&rmap_item->hlist);
rb_replace_node(&rmap_item->node, if (stable_node->hlist.first)
&next_item->node,
&root_stable_tree);
next_item->address |= NODE_FLAG;
ksm_pages_sharing--; ksm_pages_sharing--;
} else { else {
rb_erase(&rmap_item->node, &root_stable_tree); rb_erase(&stable_node->node, &root_stable_tree);
free_stable_node(stable_node);
ksm_pages_shared--; ksm_pages_shared--;
} }
} else {
struct rmap_item *prev_item = rmap_item->prev;
BUG_ON(prev_item->next != rmap_item);
prev_item->next = next_item;
if (next_item) {
BUG_ON(next_item->prev != rmap_item);
next_item->prev = rmap_item->prev;
}
ksm_pages_sharing--;
}
rmap_item->next = NULL;
rmap_item->address &= PAGE_MASK; rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & NODE_FLAG) { } else if (rmap_item->address & UNSTABLE_FLAG) {
unsigned char age; unsigned char age;
/* /*
* Usually ksmd can and must skip the rb_erase, because * Usually ksmd can and must skip the rb_erase, because
@ -859,31 +873,32 @@ up:
* This function checks if there is a page inside the stable tree * This function checks if there is a page inside the stable tree
* with identical content to the page that we are scanning right now. * with identical content to the page that we are scanning right now.
* *
* This function return rmap_item pointer to the identical item if found, * This function returns the stable tree node of identical content if found,
* NULL otherwise. * NULL otherwise.
*/ */
static struct rmap_item *stable_tree_search(struct page *page, static struct stable_node *stable_tree_search(struct page *page,
struct page **tree_pagep) struct page **tree_pagep)
{ {
struct rb_node *node = root_stable_tree.rb_node; struct rb_node *node = root_stable_tree.rb_node;
struct stable_node *stable_node;
while (node) { while (node) {
struct rmap_item *tree_rmap_item, *next_rmap_item; struct hlist_node *hlist, *hnext;
struct rmap_item *tree_rmap_item;
struct page *tree_page; struct page *tree_page;
int ret; int ret;
tree_rmap_item = rb_entry(node, struct rmap_item, node); stable_node = rb_entry(node, struct stable_node, node);
while (tree_rmap_item) { hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
&stable_node->hlist, hlist) {
BUG_ON(!in_stable_tree(tree_rmap_item)); BUG_ON(!in_stable_tree(tree_rmap_item));
cond_resched(); cond_resched();
tree_page = get_ksm_page(tree_rmap_item); tree_page = get_ksm_page(tree_rmap_item);
if (tree_page) if (tree_page)
break; break;
next_rmap_item = tree_rmap_item->next;
remove_rmap_item_from_tree(tree_rmap_item); remove_rmap_item_from_tree(tree_rmap_item);
tree_rmap_item = next_rmap_item;
} }
if (!tree_rmap_item) if (!hlist)
return NULL; return NULL;
ret = memcmp_pages(page, tree_page); ret = memcmp_pages(page, tree_page);
@ -896,7 +911,7 @@ static struct rmap_item *stable_tree_search(struct page *page,
node = node->rb_right; node = node->rb_right;
} else { } else {
*tree_pagep = tree_page; *tree_pagep = tree_page;
return tree_rmap_item; return stable_node;
} }
} }
@ -907,31 +922,32 @@ static struct rmap_item *stable_tree_search(struct page *page,
* stable_tree_insert - insert rmap_item pointing to new ksm page * stable_tree_insert - insert rmap_item pointing to new ksm page
* into the stable tree. * into the stable tree.
* *
* This function returns rmap_item if success, NULL otherwise. * This function returns the stable tree node just allocated on success,
* NULL otherwise.
*/ */
static struct rmap_item *stable_tree_insert(struct page *kpage, static struct stable_node *stable_tree_insert(struct page *kpage)
struct rmap_item *rmap_item)
{ {
struct rb_node **new = &root_stable_tree.rb_node; struct rb_node **new = &root_stable_tree.rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct stable_node *stable_node;
while (*new) { while (*new) {
struct rmap_item *tree_rmap_item, *next_rmap_item; struct hlist_node *hlist, *hnext;
struct rmap_item *tree_rmap_item;
struct page *tree_page; struct page *tree_page;
int ret; int ret;
tree_rmap_item = rb_entry(*new, struct rmap_item, node); stable_node = rb_entry(*new, struct stable_node, node);
while (tree_rmap_item) { hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
&stable_node->hlist, hlist) {
BUG_ON(!in_stable_tree(tree_rmap_item)); BUG_ON(!in_stable_tree(tree_rmap_item));
cond_resched(); cond_resched();
tree_page = get_ksm_page(tree_rmap_item); tree_page = get_ksm_page(tree_rmap_item);
if (tree_page) if (tree_page)
break; break;
next_rmap_item = tree_rmap_item->next;
remove_rmap_item_from_tree(tree_rmap_item); remove_rmap_item_from_tree(tree_rmap_item);
tree_rmap_item = next_rmap_item;
} }
if (!tree_rmap_item) if (!hlist)
return NULL; return NULL;
ret = memcmp_pages(kpage, tree_page); ret = memcmp_pages(kpage, tree_page);
@ -952,13 +968,16 @@ static struct rmap_item *stable_tree_insert(struct page *kpage,
} }
} }
rmap_item->address |= NODE_FLAG | STABLE_FLAG; stable_node = alloc_stable_node();
rmap_item->next = NULL; if (!stable_node)
rb_link_node(&rmap_item->node, parent, new); return NULL;
rb_insert_color(&rmap_item->node, &root_stable_tree);
ksm_pages_shared++; rb_link_node(&stable_node->node, parent, new);
return rmap_item; rb_insert_color(&stable_node->node, &root_stable_tree);
INIT_HLIST_HEAD(&stable_node->hlist);
return stable_node;
} }
/* /*
@ -1018,7 +1037,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
} }
} }
rmap_item->address |= NODE_FLAG; rmap_item->address |= UNSTABLE_FLAG;
rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
rb_link_node(&rmap_item->node, parent, new); rb_link_node(&rmap_item->node, parent, new);
rb_insert_color(&rmap_item->node, &root_unstable_tree); rb_insert_color(&rmap_item->node, &root_unstable_tree);
@ -1033,18 +1052,16 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
* the same ksm page. * the same ksm page.
*/ */
static void stable_tree_append(struct rmap_item *rmap_item, static void stable_tree_append(struct rmap_item *rmap_item,
struct rmap_item *tree_rmap_item) struct stable_node *stable_node)
{ {
rmap_item->next = tree_rmap_item->next; rmap_item->head = stable_node;
rmap_item->prev = tree_rmap_item;
if (tree_rmap_item->next)
tree_rmap_item->next->prev = rmap_item;
tree_rmap_item->next = rmap_item;
rmap_item->address |= STABLE_FLAG; rmap_item->address |= STABLE_FLAG;
hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
if (rmap_item->hlist.next)
ksm_pages_sharing++; ksm_pages_sharing++;
else
ksm_pages_shared++;
} }
/* /*
@ -1060,6 +1077,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
{ {
struct rmap_item *tree_rmap_item; struct rmap_item *tree_rmap_item;
struct page *tree_page = NULL; struct page *tree_page = NULL;
struct stable_node *stable_node;
struct page *kpage; struct page *kpage;
unsigned int checksum; unsigned int checksum;
int err; int err;
@ -1067,8 +1085,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
remove_rmap_item_from_tree(rmap_item); remove_rmap_item_from_tree(rmap_item);
/* We first start with searching the page inside the stable tree */ /* We first start with searching the page inside the stable tree */
tree_rmap_item = stable_tree_search(page, &tree_page); stable_node = stable_tree_search(page, &tree_page);
if (tree_rmap_item) { if (stable_node) {
kpage = tree_page; kpage = tree_page;
if (page == kpage) /* forked */ if (page == kpage) /* forked */
err = 0; err = 0;
@ -1080,7 +1098,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
* The page was successfully merged: * The page was successfully merged:
* add its rmap_item to the stable tree. * add its rmap_item to the stable tree.
*/ */
stable_tree_append(rmap_item, tree_rmap_item); stable_tree_append(rmap_item, stable_node);
} }
put_page(kpage); put_page(kpage);
return; return;
@ -1121,19 +1139,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
if (kpage) { if (kpage) {
remove_rmap_item_from_tree(tree_rmap_item); remove_rmap_item_from_tree(tree_rmap_item);
stable_node = stable_tree_insert(kpage);
if (stable_node) {
stable_tree_append(tree_rmap_item, stable_node);
stable_tree_append(rmap_item, stable_node);
}
put_page(kpage);
/* /*
* If we fail to insert the page into the stable tree, * If we fail to insert the page into the stable tree,
* we will have 2 virtual addresses that are pointing * we will have 2 virtual addresses that are pointing
* to a ksm page left outside the stable tree, * to a ksm page left outside the stable tree,
* in which case we need to break_cow on both. * in which case we need to break_cow on both.
*/ */
if (stable_tree_insert(kpage, tree_rmap_item)) if (!stable_node) {
stable_tree_append(rmap_item, tree_rmap_item);
else {
break_cow(tree_rmap_item); break_cow(tree_rmap_item);
break_cow(rmap_item); break_cow(rmap_item);
} }
put_page(kpage);
} }
} }
} }