mm: follow_hugetlb_page flags

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Hugh Dickins 2009-09-21 17:03:27 -07:00 committed by Linus Torvalds
parent 8e4b9a6071
commit 2a15efc953
3 changed files with 48 additions and 32 deletions

View File

@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, int *, int, unsigned int flags);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *);
void __unmap_hugepage_range(struct vm_area_struct *,

View File

@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
return find_lock_page(mapping, idx);
}
/* Return whether there is a pagecache page to back given address within VMA */
static bool hugetlbfs_backed(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
{
struct address_space *mapping;
pgoff_t idx;
struct page *page;
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
page = find_get_page(mapping, idx);
if (page)
put_page(page);
return page != NULL;
}
static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, unsigned int flags)
{
@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
return NULL;
}
static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
{
if (!ptep || write || shared)
return 0;
else
return huge_pte_none(huge_ptep_get(ptep));
}
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i,
int write)
unsigned int flags)
{
unsigned long pfn_offset;
unsigned long vaddr = *position;
int remainder = *length;
struct hstate *h = hstate_vma(vma);
int zeropage_ok = 0;
int shared = vma->vm_flags & VM_SHARED;
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
pte_t *pte;
int absent;
struct page *page;
/*
* Some archs (sparc64, sh*) have multiple pte_ts to
* each hugepage. We have to make * sure we get the
* each hugepage. We have to make sure we get the
* first, for the page indexing below to work.
*/
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
if (huge_zeropage_ok(pte, write, shared))
zeropage_ok = 1;
absent = !pte || huge_pte_none(huge_ptep_get(pte));
if (!pte ||
(huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
(write && !pte_write(huge_ptep_get(pte)))) {
/*
* When coredumping, it suits get_dump_page if we just return
* an error if there's a hole and no huge pagecache to back it.
*/
if (absent &&
((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
remainder = 0;
break;
}
if (absent ||
((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
int ret;
spin_unlock(&mm->page_table_lock);
ret = hugetlb_fault(mm, vma, vaddr, write);
ret = hugetlb_fault(mm, vma, vaddr,
(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
spin_lock(&mm->page_table_lock);
if (!(ret & VM_FAULT_ERROR))
continue;
remainder = 0;
if (!i)
i = -EFAULT;
break;
}
@ -2266,9 +2281,6 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(huge_ptep_get(pte));
same_page:
if (pages) {
if (zeropage_ok)
pages[i] = ZERO_PAGE(0);
else
pages[i] = mem_map_offset(page, pfn_offset);
get_page(pages[i]);
}
@ -2293,7 +2305,7 @@ same_page:
*length = remainder;
*position = vaddr;
return i;
return i ? i : -EFAULT;
}
void hugetlb_change_protection(struct vm_area_struct *vma,

View File

@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
!(vm_flags & vma->vm_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i, write);
continue;
}
foll_flags = FOLL_TOUCH;
if (pages)
foll_flags |= FOLL_GET;
if (flags & GUP_FLAGS_DUMP)
foll_flags |= FOLL_DUMP;
if (write)
foll_flags |= FOLL_WRITE;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i, foll_flags);
continue;
}
do {
struct page *page;