thp: introduce khugepaged_prealloc_page and khugepaged_alloc_page

They are used to abstract the difference between NUMA enabled and NUMA
disabled to make the code more readable

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Xiao Guangrong 2012-10-08 16:29:51 -07:00 committed by Linus Torvalds
parent 420256ef02
commit 26234f36ef
1 changed files with 108 additions and 78 deletions

View File

@ -1827,11 +1827,110 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
}
}
static void khugepaged_alloc_sleep(void)
{
wait_event_freezable_timeout(khugepaged_wait, false,
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
}
#ifdef CONFIG_NUMA
static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
{
if (IS_ERR(*hpage)) {
if (!*wait)
return false;
*wait = false;
khugepaged_alloc_sleep();
} else if (*hpage) {
put_page(*hpage);
*hpage = NULL;
}
return true;
}
static struct page
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
int node)
{
VM_BUG_ON(*hpage);
/*
* Allocate the page while the vma is still valid and under
* the mmap_sem read mode so there is no memory allocation
* later when we take the mmap_sem in write mode. This is more
* friendly behavior (OTOH it may actually hide bugs) to
* filesystems in userland with daemons allocating memory in
* the userland I/O paths. Allocating memory with the
* mmap_sem in read mode is good idea also to allow greater
* scalability.
*/
*hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
node, __GFP_OTHER_NODE);
/*
* After allocating the hugepage, release the mmap_sem read lock in
* preparation for taking it in write mode.
*/
up_read(&mm->mmap_sem);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
return NULL;
}
count_vm_event(THP_COLLAPSE_ALLOC);
return *hpage;
}
#else
static struct page *khugepaged_alloc_hugepage(bool *wait)
{
struct page *hpage;
do {
hpage = alloc_hugepage(khugepaged_defrag());
if (!hpage) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
if (!*wait)
return NULL;
*wait = false;
khugepaged_alloc_sleep();
} else
count_vm_event(THP_COLLAPSE_ALLOC);
} while (unlikely(!hpage) && likely(khugepaged_enabled()));
return hpage;
}
static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
{
if (!*hpage)
*hpage = khugepaged_alloc_hugepage(wait);
if (unlikely(!*hpage))
return false;
return true;
}
static struct page
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
int node)
{
up_read(&mm->mmap_sem);
VM_BUG_ON(!*hpage);
return *hpage;
}
#endif
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
struct vm_area_struct *vma,
int node)
unsigned long address,
struct page **hpage,
struct vm_area_struct *vma,
int node)
{
pgd_t *pgd;
pud_t *pud;
@ -1844,38 +1943,11 @@ static void collapse_huge_page(struct mm_struct *mm,
unsigned long hstart, hend;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
#ifndef CONFIG_NUMA
up_read(&mm->mmap_sem);
VM_BUG_ON(!*hpage);
new_page = *hpage;
#else
VM_BUG_ON(*hpage);
/*
* Allocate the page while the vma is still valid and under
* the mmap_sem read mode so there is no memory allocation
* later when we take the mmap_sem in write mode. This is more
* friendly behavior (OTOH it may actually hide bugs) to
* filesystems in userland with daemons allocating memory in
* the userland I/O paths. Allocating memory with the
* mmap_sem in read mode is good idea also to allow greater
* scalability.
*/
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
node, __GFP_OTHER_NODE);
/*
* After allocating the hugepage, release the mmap_sem read lock in
* preparation for taking it in write mode.
*/
up_read(&mm->mmap_sem);
if (unlikely(!new_page)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
/* release the mmap_sem read lock. */
new_page = khugepaged_alloc_page(hpage, mm, vma, address, node);
if (!new_page)
return;
}
*hpage = new_page;
count_vm_event(THP_COLLAPSE_ALLOC);
#endif
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
return;
@ -2215,34 +2287,6 @@ static int khugepaged_wait_event(void)
kthread_should_stop();
}
static void khugepaged_alloc_sleep(void)
{
wait_event_freezable_timeout(khugepaged_wait, false,
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
}
#ifndef CONFIG_NUMA
static struct page *khugepaged_alloc_hugepage(bool *wait)
{
struct page *hpage;
do {
hpage = alloc_hugepage(khugepaged_defrag());
if (!hpage) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
if (!*wait)
return NULL;
*wait = false;
khugepaged_alloc_sleep();
} else
count_vm_event(THP_COLLAPSE_ALLOC);
} while (unlikely(!hpage) && likely(khugepaged_enabled()));
return hpage;
}
#endif
static void khugepaged_do_scan(void)
{
struct page *hpage = NULL;
@ -2253,23 +2297,9 @@ static void khugepaged_do_scan(void)
barrier(); /* write khugepaged_pages_to_scan to local stack */
while (progress < pages) {
#ifndef CONFIG_NUMA
if (!hpage)
hpage = khugepaged_alloc_hugepage(&wait);
if (unlikely(!hpage))
if (!khugepaged_prealloc_page(&hpage, &wait))
break;
#else
if (IS_ERR(hpage)) {
if (!wait)
break;
wait = false;
khugepaged_alloc_sleep();
} else if (hpage) {
put_page(hpage);
hpage = NULL;
}
#endif
cond_resched();
if (unlikely(kthread_should_stop() || freezing(current)))