mm: introduce fault_env
The idea borrowed from Peter's patch from patchset on speculative page faults[1]: Instead of passing around the endless list of function arguments, replace the lot with a single structure so we can change context without endless function signature changes. The changes are mostly mechanical with exception of faultaround code: filemap_map_pages() got reworked a bit. This patch is preparation for the next one. [1] http://lkml.kernel.org/r/20141020222841.302891540@infradead.org Link: http://lkml.kernel.org/r/1466021202-61880-9-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
dcddffd41d
commit
bae473a423
|
@ -548,13 +548,13 @@ subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
|
||||||
locked. The VM will unlock the page.
|
locked. The VM will unlock the page.
|
||||||
|
|
||||||
->map_pages() is called when VM asks to map easy accessible pages.
|
->map_pages() is called when VM asks to map easy accessible pages.
|
||||||
Filesystem should find and map pages associated with offsets from "pgoff"
|
Filesystem should find and map pages associated with offsets from "start_pgoff"
|
||||||
till "max_pgoff". ->map_pages() is called with page table locked and must
|
till "end_pgoff". ->map_pages() is called with page table locked and must
|
||||||
not block. If it's not possible to reach a page without blocking,
|
not block. If it's not possible to reach a page without blocking,
|
||||||
filesystem should skip it. Filesystem should use do_set_pte() to setup
|
filesystem should skip it. Filesystem should use do_set_pte() to setup
|
||||||
page table entry. Pointer to entry associated with offset "pgoff" is
|
page table entry. Pointer to entry associated with the page is passed in
|
||||||
passed in "pte" field in vm_fault structure. Pointers to entries for other
|
"pte" field in fault_env structure. Pointers to entries for other offsets
|
||||||
offsets should be calculated relative to "pte".
|
should be calculated relative to "pte".
|
||||||
|
|
||||||
->page_mkwrite() is called when a previously read-only pte is
|
->page_mkwrite() is called when a previously read-only pte is
|
||||||
about to become writeable. The filesystem again must ensure that there are
|
about to become writeable. The filesystem again must ensure that there are
|
||||||
|
|
|
@ -257,10 +257,9 @@ out:
|
||||||
* fatal_signal_pending()s, and the mmap_sem must be released before
|
* fatal_signal_pending()s, and the mmap_sem must be released before
|
||||||
* returning it.
|
* returning it.
|
||||||
*/
|
*/
|
||||||
int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
int handle_userfault(struct fault_env *fe, unsigned long reason)
|
||||||
unsigned int flags, unsigned long reason)
|
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = fe->vma->vm_mm;
|
||||||
struct userfaultfd_ctx *ctx;
|
struct userfaultfd_ctx *ctx;
|
||||||
struct userfaultfd_wait_queue uwq;
|
struct userfaultfd_wait_queue uwq;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -269,7 +268,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||||
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
||||||
|
|
||||||
ret = VM_FAULT_SIGBUS;
|
ret = VM_FAULT_SIGBUS;
|
||||||
ctx = vma->vm_userfaultfd_ctx.ctx;
|
ctx = fe->vma->vm_userfaultfd_ctx.ctx;
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -302,17 +301,17 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||||
* without first stopping userland access to the memory. For
|
* without first stopping userland access to the memory. For
|
||||||
* VM_UFFD_MISSING userfaults this is enough for now.
|
* VM_UFFD_MISSING userfaults this is enough for now.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
|
||||||
/*
|
/*
|
||||||
* Validate the invariant that nowait must allow retry
|
* Validate the invariant that nowait must allow retry
|
||||||
* to be sure not to return SIGBUS erroneously on
|
* to be sure not to return SIGBUS erroneously on
|
||||||
* nowait invocations.
|
* nowait invocations.
|
||||||
*/
|
*/
|
||||||
BUG_ON(flags & FAULT_FLAG_RETRY_NOWAIT);
|
BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
|
||||||
#ifdef CONFIG_DEBUG_VM
|
#ifdef CONFIG_DEBUG_VM
|
||||||
if (printk_ratelimit()) {
|
if (printk_ratelimit()) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"FAULT_FLAG_ALLOW_RETRY missing %x\n", flags);
|
"FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
|
||||||
dump_stack();
|
dump_stack();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -324,7 +323,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||||
* and wait.
|
* and wait.
|
||||||
*/
|
*/
|
||||||
ret = VM_FAULT_RETRY;
|
ret = VM_FAULT_RETRY;
|
||||||
if (flags & FAULT_FLAG_RETRY_NOWAIT)
|
if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* take the reference before dropping the mmap_sem */
|
/* take the reference before dropping the mmap_sem */
|
||||||
|
@ -332,10 +331,11 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||||
|
|
||||||
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
|
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
|
||||||
uwq.wq.private = current;
|
uwq.wq.private = current;
|
||||||
uwq.msg = userfault_msg(address, flags, reason);
|
uwq.msg = userfault_msg(fe->address, fe->flags, reason);
|
||||||
uwq.ctx = ctx;
|
uwq.ctx = ctx;
|
||||||
|
|
||||||
return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
return_to_userland =
|
||||||
|
(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
||||||
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
||||||
|
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock(&ctx->fault_pending_wqh.lock);
|
||||||
|
@ -353,7 +353,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||||
TASK_KILLABLE);
|
TASK_KILLABLE);
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock(&ctx->fault_pending_wqh.lock);
|
||||||
|
|
||||||
must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
|
must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
|
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
|
||||||
|
|
|
@ -1,20 +1,12 @@
|
||||||
#ifndef _LINUX_HUGE_MM_H
|
#ifndef _LINUX_HUGE_MM_H
|
||||||
#define _LINUX_HUGE_MM_H
|
#define _LINUX_HUGE_MM_H
|
||||||
|
|
||||||
extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
|
||||||
struct vm_area_struct *vma,
|
|
||||||
unsigned long address, pmd_t *pmd,
|
|
||||||
unsigned int flags);
|
|
||||||
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||||
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
|
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
|
||||||
struct vm_area_struct *vma);
|
struct vm_area_struct *vma);
|
||||||
extern void huge_pmd_set_accessed(struct mm_struct *mm,
|
extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
|
||||||
struct vm_area_struct *vma,
|
extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
|
||||||
unsigned long address, pmd_t *pmd,
|
|
||||||
pmd_t orig_pmd, int dirty);
|
|
||||||
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
||||||
unsigned long address, pmd_t *pmd,
|
|
||||||
pmd_t orig_pmd);
|
|
||||||
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
||||||
unsigned long addr,
|
unsigned long addr,
|
||||||
pmd_t *pmd,
|
pmd_t *pmd,
|
||||||
|
@ -134,8 +126,7 @@ static inline int hpage_nr_pages(struct page *page)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
|
||||||
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
|
|
||||||
|
|
||||||
extern struct page *huge_zero_page;
|
extern struct page *huge_zero_page;
|
||||||
|
|
||||||
|
@ -196,8 +187,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
|
||||||
unsigned long addr, pmd_t pmd, pmd_t *pmdp)
|
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -309,10 +309,27 @@ struct vm_fault {
|
||||||
* VM_FAULT_DAX_LOCKED and fill in
|
* VM_FAULT_DAX_LOCKED and fill in
|
||||||
* entry here.
|
* entry here.
|
||||||
*/
|
*/
|
||||||
/* for ->map_pages() only */
|
};
|
||||||
pgoff_t max_pgoff; /* map pages for offset from pgoff till
|
|
||||||
* max_pgoff inclusive */
|
/*
|
||||||
pte_t *pte; /* pte entry associated with ->pgoff */
|
* Page fault context: passes though page fault handler instead of endless list
|
||||||
|
* of function arguments.
|
||||||
|
*/
|
||||||
|
struct fault_env {
|
||||||
|
struct vm_area_struct *vma; /* Target VMA */
|
||||||
|
unsigned long address; /* Faulting virtual address */
|
||||||
|
unsigned int flags; /* FAULT_FLAG_xxx flags */
|
||||||
|
pmd_t *pmd; /* Pointer to pmd entry matching
|
||||||
|
* the 'address'
|
||||||
|
*/
|
||||||
|
pte_t *pte; /* Pointer to pte entry matching
|
||||||
|
* the 'address'. NULL if the page
|
||||||
|
* table hasn't been allocated.
|
||||||
|
*/
|
||||||
|
spinlock_t *ptl; /* Page table lock.
|
||||||
|
* Protects pte page table if 'pte'
|
||||||
|
* is not NULL, otherwise pmd.
|
||||||
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -327,7 +344,8 @@ struct vm_operations_struct {
|
||||||
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
|
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||||
int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
|
int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
|
||||||
pmd_t *, unsigned int flags);
|
pmd_t *, unsigned int flags);
|
||||||
void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
|
void (*map_pages)(struct fault_env *fe,
|
||||||
|
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||||
|
|
||||||
/* notification that a previously read-only page is about to become
|
/* notification that a previously read-only page is about to become
|
||||||
* writable, if an error is returned it will cause a SIGBUS */
|
* writable, if an error is returned it will cause a SIGBUS */
|
||||||
|
@ -600,8 +618,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
|
void do_set_pte(struct fault_env *fe, struct page *page);
|
||||||
struct page *page, pte_t *pte, bool write, bool anon);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2062,7 +2079,8 @@ extern void truncate_inode_pages_final(struct address_space *);
|
||||||
|
|
||||||
/* generic vm_area_ops exported for stackable file systems */
|
/* generic vm_area_ops exported for stackable file systems */
|
||||||
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
|
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
|
||||||
extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
|
extern void filemap_map_pages(struct fault_env *fe,
|
||||||
|
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||||
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||||
|
|
||||||
/* mm/page-writeback.c */
|
/* mm/page-writeback.c */
|
||||||
|
|
|
@ -27,8 +27,7 @@
|
||||||
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
|
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
|
||||||
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
|
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
|
||||||
|
|
||||||
extern int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
extern int handle_userfault(struct fault_env *fe, unsigned long reason);
|
||||||
unsigned int flags, unsigned long reason);
|
|
||||||
|
|
||||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||||
unsigned long src_start, unsigned long len);
|
unsigned long src_start, unsigned long len);
|
||||||
|
@ -56,10 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
|
||||||
#else /* CONFIG_USERFAULTFD */
|
#else /* CONFIG_USERFAULTFD */
|
||||||
|
|
||||||
/* mm helpers */
|
/* mm helpers */
|
||||||
static inline int handle_userfault(struct vm_area_struct *vma,
|
static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
|
||||||
unsigned long address,
|
|
||||||
unsigned int flags,
|
|
||||||
unsigned long reason)
|
|
||||||
{
|
{
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
28
mm/filemap.c
28
mm/filemap.c
|
@ -2128,22 +2128,27 @@ page_not_uptodate:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(filemap_fault);
|
EXPORT_SYMBOL(filemap_fault);
|
||||||
|
|
||||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
|
void filemap_map_pages(struct fault_env *fe,
|
||||||
|
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||||
{
|
{
|
||||||
struct radix_tree_iter iter;
|
struct radix_tree_iter iter;
|
||||||
void **slot;
|
void **slot;
|
||||||
struct file *file = vma->vm_file;
|
struct file *file = fe->vma->vm_file;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
pgoff_t last_pgoff = start_pgoff;
|
||||||
loff_t size;
|
loff_t size;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long address = (unsigned long) vmf->virtual_address;
|
|
||||||
unsigned long addr;
|
|
||||||
pte_t *pte;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
|
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
|
||||||
if (iter.index > vmf->max_pgoff)
|
start_pgoff) {
|
||||||
|
if (iter.index > end_pgoff)
|
||||||
break;
|
break;
|
||||||
|
fe->pte += iter.index - last_pgoff;
|
||||||
|
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
|
||||||
|
last_pgoff = iter.index;
|
||||||
|
if (!pte_none(*fe->pte))
|
||||||
|
goto next;
|
||||||
repeat:
|
repeat:
|
||||||
page = radix_tree_deref_slot(slot);
|
page = radix_tree_deref_slot(slot);
|
||||||
if (unlikely(!page))
|
if (unlikely(!page))
|
||||||
|
@ -2179,14 +2184,9 @@ repeat:
|
||||||
if (page->index >= size >> PAGE_SHIFT)
|
if (page->index >= size >> PAGE_SHIFT)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
pte = vmf->pte + page->index - vmf->pgoff;
|
|
||||||
if (!pte_none(*pte))
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
if (file->f_ra.mmap_miss > 0)
|
if (file->f_ra.mmap_miss > 0)
|
||||||
file->f_ra.mmap_miss--;
|
file->f_ra.mmap_miss--;
|
||||||
addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
|
do_set_pte(fe, page);
|
||||||
do_set_pte(vma, addr, page, pte, false, false);
|
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
goto next;
|
goto next;
|
||||||
unlock:
|
unlock:
|
||||||
|
@ -2194,7 +2194,7 @@ unlock:
|
||||||
skip:
|
skip:
|
||||||
put_page(page);
|
put_page(page);
|
||||||
next:
|
next:
|
||||||
if (iter.index == vmf->max_pgoff)
|
if (iter.index == end_pgoff)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
278
mm/huge_memory.c
278
mm/huge_memory.c
|
@ -821,26 +821,23 @@ void prep_transhuge_page(struct page *page)
|
||||||
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
|
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
|
||||||
struct vm_area_struct *vma,
|
gfp_t gfp)
|
||||||
unsigned long address, pmd_t *pmd,
|
|
||||||
struct page *page, gfp_t gfp,
|
|
||||||
unsigned int flags)
|
|
||||||
{
|
{
|
||||||
|
struct vm_area_struct *vma = fe->vma;
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
pgtable_t pgtable;
|
pgtable_t pgtable;
|
||||||
spinlock_t *ptl;
|
unsigned long haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
unsigned long haddr = address & HPAGE_PMD_MASK;
|
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||||
|
|
||||||
if (mem_cgroup_try_charge(page, mm, gfp, &memcg, true)) {
|
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
|
||||||
put_page(page);
|
put_page(page);
|
||||||
count_vm_event(THP_FAULT_FALLBACK);
|
count_vm_event(THP_FAULT_FALLBACK);
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
pgtable = pte_alloc_one(mm, haddr);
|
pgtable = pte_alloc_one(vma->vm_mm, haddr);
|
||||||
if (unlikely(!pgtable)) {
|
if (unlikely(!pgtable)) {
|
||||||
mem_cgroup_cancel_charge(page, memcg, true);
|
mem_cgroup_cancel_charge(page, memcg, true);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
@ -855,12 +852,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||||
*/
|
*/
|
||||||
__SetPageUptodate(page);
|
__SetPageUptodate(page);
|
||||||
|
|
||||||
ptl = pmd_lock(mm, pmd);
|
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
||||||
if (unlikely(!pmd_none(*pmd))) {
|
if (unlikely(!pmd_none(*fe->pmd))) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
mem_cgroup_cancel_charge(page, memcg, true);
|
mem_cgroup_cancel_charge(page, memcg, true);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
pte_free(mm, pgtable);
|
pte_free(vma->vm_mm, pgtable);
|
||||||
} else {
|
} else {
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
|
|
||||||
|
@ -868,12 +865,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||||
if (userfaultfd_missing(vma)) {
|
if (userfaultfd_missing(vma)) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
mem_cgroup_cancel_charge(page, memcg, true);
|
mem_cgroup_cancel_charge(page, memcg, true);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
pte_free(mm, pgtable);
|
pte_free(vma->vm_mm, pgtable);
|
||||||
ret = handle_userfault(vma, address, flags,
|
ret = handle_userfault(fe, VM_UFFD_MISSING);
|
||||||
VM_UFFD_MISSING);
|
|
||||||
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -883,11 +879,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||||
page_add_new_anon_rmap(page, vma, haddr, true);
|
page_add_new_anon_rmap(page, vma, haddr, true);
|
||||||
mem_cgroup_commit_charge(page, memcg, false, true);
|
mem_cgroup_commit_charge(page, memcg, false, true);
|
||||||
lru_cache_add_active_or_unevictable(page, vma);
|
lru_cache_add_active_or_unevictable(page, vma);
|
||||||
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
|
||||||
set_pmd_at(mm, haddr, pmd, entry);
|
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
|
||||||
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
||||||
atomic_long_inc(&mm->nr_ptes);
|
atomic_long_inc(&vma->vm_mm->nr_ptes);
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
count_vm_event(THP_FAULT_ALLOC);
|
count_vm_event(THP_FAULT_ALLOC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -937,13 +933,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
int do_huge_pmd_anonymous_page(struct fault_env *fe)
|
||||||
unsigned long address, pmd_t *pmd,
|
|
||||||
unsigned int flags)
|
|
||||||
{
|
{
|
||||||
|
struct vm_area_struct *vma = fe->vma;
|
||||||
gfp_t gfp;
|
gfp_t gfp;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long haddr = address & HPAGE_PMD_MASK;
|
unsigned long haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
|
|
||||||
if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
|
if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
|
@ -951,42 +946,40 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
return VM_FAULT_OOM;
|
return VM_FAULT_OOM;
|
||||||
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
|
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
|
||||||
return VM_FAULT_OOM;
|
return VM_FAULT_OOM;
|
||||||
if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
|
if (!(fe->flags & FAULT_FLAG_WRITE) &&
|
||||||
|
!mm_forbids_zeropage(vma->vm_mm) &&
|
||||||
transparent_hugepage_use_zero_page()) {
|
transparent_hugepage_use_zero_page()) {
|
||||||
spinlock_t *ptl;
|
|
||||||
pgtable_t pgtable;
|
pgtable_t pgtable;
|
||||||
struct page *zero_page;
|
struct page *zero_page;
|
||||||
bool set;
|
bool set;
|
||||||
int ret;
|
int ret;
|
||||||
pgtable = pte_alloc_one(mm, haddr);
|
pgtable = pte_alloc_one(vma->vm_mm, haddr);
|
||||||
if (unlikely(!pgtable))
|
if (unlikely(!pgtable))
|
||||||
return VM_FAULT_OOM;
|
return VM_FAULT_OOM;
|
||||||
zero_page = get_huge_zero_page();
|
zero_page = get_huge_zero_page();
|
||||||
if (unlikely(!zero_page)) {
|
if (unlikely(!zero_page)) {
|
||||||
pte_free(mm, pgtable);
|
pte_free(vma->vm_mm, pgtable);
|
||||||
count_vm_event(THP_FAULT_FALLBACK);
|
count_vm_event(THP_FAULT_FALLBACK);
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
ptl = pmd_lock(mm, pmd);
|
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
set = false;
|
set = false;
|
||||||
if (pmd_none(*pmd)) {
|
if (pmd_none(*fe->pmd)) {
|
||||||
if (userfaultfd_missing(vma)) {
|
if (userfaultfd_missing(vma)) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
ret = handle_userfault(vma, address, flags,
|
ret = handle_userfault(fe, VM_UFFD_MISSING);
|
||||||
VM_UFFD_MISSING);
|
|
||||||
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
||||||
} else {
|
} else {
|
||||||
set_huge_zero_page(pgtable, mm, vma,
|
set_huge_zero_page(pgtable, vma->vm_mm, vma,
|
||||||
haddr, pmd,
|
haddr, fe->pmd, zero_page);
|
||||||
zero_page);
|
spin_unlock(fe->ptl);
|
||||||
spin_unlock(ptl);
|
|
||||||
set = true;
|
set = true;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
if (!set) {
|
if (!set) {
|
||||||
pte_free(mm, pgtable);
|
pte_free(vma->vm_mm, pgtable);
|
||||||
put_huge_zero_page();
|
put_huge_zero_page();
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -998,8 +991,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
prep_transhuge_page(page);
|
prep_transhuge_page(page);
|
||||||
return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
|
return __do_huge_pmd_anonymous_page(fe, page, gfp);
|
||||||
flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
|
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||||
|
@ -1172,38 +1164,31 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void huge_pmd_set_accessed(struct mm_struct *mm,
|
void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
|
||||||
struct vm_area_struct *vma,
|
|
||||||
unsigned long address,
|
|
||||||
pmd_t *pmd, pmd_t orig_pmd,
|
|
||||||
int dirty)
|
|
||||||
{
|
{
|
||||||
spinlock_t *ptl;
|
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
unsigned long haddr;
|
unsigned long haddr;
|
||||||
|
|
||||||
ptl = pmd_lock(mm, pmd);
|
fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
|
||||||
if (unlikely(!pmd_same(*pmd, orig_pmd)))
|
if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
entry = pmd_mkyoung(orig_pmd);
|
entry = pmd_mkyoung(orig_pmd);
|
||||||
haddr = address & HPAGE_PMD_MASK;
|
haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
|
if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
|
||||||
update_mmu_cache_pmd(vma, address, pmd);
|
fe->flags & FAULT_FLAG_WRITE))
|
||||||
|
update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
|
static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
|
||||||
struct vm_area_struct *vma,
|
struct page *page)
|
||||||
unsigned long address,
|
|
||||||
pmd_t *pmd, pmd_t orig_pmd,
|
|
||||||
struct page *page,
|
|
||||||
unsigned long haddr)
|
|
||||||
{
|
{
|
||||||
|
struct vm_area_struct *vma = fe->vma;
|
||||||
|
unsigned long haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
spinlock_t *ptl;
|
|
||||||
pgtable_t pgtable;
|
pgtable_t pgtable;
|
||||||
pmd_t _pmd;
|
pmd_t _pmd;
|
||||||
int ret = 0, i;
|
int ret = 0, i;
|
||||||
|
@ -1220,11 +1205,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
|
||||||
|
|
||||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
|
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
|
||||||
__GFP_OTHER_NODE,
|
__GFP_OTHER_NODE, vma,
|
||||||
vma, address, page_to_nid(page));
|
fe->address, page_to_nid(page));
|
||||||
if (unlikely(!pages[i] ||
|
if (unlikely(!pages[i] ||
|
||||||
mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
|
mem_cgroup_try_charge(pages[i], vma->vm_mm,
|
||||||
&memcg, false))) {
|
GFP_KERNEL, &memcg, false))) {
|
||||||
if (pages[i])
|
if (pages[i])
|
||||||
put_page(pages[i]);
|
put_page(pages[i]);
|
||||||
while (--i >= 0) {
|
while (--i >= 0) {
|
||||||
|
@ -1250,41 +1235,41 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
|
||||||
|
|
||||||
mmun_start = haddr;
|
mmun_start = haddr;
|
||||||
mmun_end = haddr + HPAGE_PMD_SIZE;
|
mmun_end = haddr + HPAGE_PMD_SIZE;
|
||||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
|
||||||
|
|
||||||
ptl = pmd_lock(mm, pmd);
|
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
||||||
if (unlikely(!pmd_same(*pmd, orig_pmd)))
|
if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
|
||||||
goto out_free_pages;
|
goto out_free_pages;
|
||||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
VM_BUG_ON_PAGE(!PageHead(page), page);
|
||||||
|
|
||||||
pmdp_huge_clear_flush_notify(vma, haddr, pmd);
|
pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
|
||||||
/* leave pmd empty until pte is filled */
|
/* leave pmd empty until pte is filled */
|
||||||
|
|
||||||
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
|
pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
|
||||||
pmd_populate(mm, &_pmd, pgtable);
|
pmd_populate(vma->vm_mm, &_pmd, pgtable);
|
||||||
|
|
||||||
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
|
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
|
||||||
pte_t *pte, entry;
|
pte_t entry;
|
||||||
entry = mk_pte(pages[i], vma->vm_page_prot);
|
entry = mk_pte(pages[i], vma->vm_page_prot);
|
||||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||||
memcg = (void *)page_private(pages[i]);
|
memcg = (void *)page_private(pages[i]);
|
||||||
set_page_private(pages[i], 0);
|
set_page_private(pages[i], 0);
|
||||||
page_add_new_anon_rmap(pages[i], vma, haddr, false);
|
page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
|
||||||
mem_cgroup_commit_charge(pages[i], memcg, false, false);
|
mem_cgroup_commit_charge(pages[i], memcg, false, false);
|
||||||
lru_cache_add_active_or_unevictable(pages[i], vma);
|
lru_cache_add_active_or_unevictable(pages[i], vma);
|
||||||
pte = pte_offset_map(&_pmd, haddr);
|
fe->pte = pte_offset_map(&_pmd, haddr);
|
||||||
VM_BUG_ON(!pte_none(*pte));
|
VM_BUG_ON(!pte_none(*fe->pte));
|
||||||
set_pte_at(mm, haddr, pte, entry);
|
set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
|
||||||
pte_unmap(pte);
|
pte_unmap(fe->pte);
|
||||||
}
|
}
|
||||||
kfree(pages);
|
kfree(pages);
|
||||||
|
|
||||||
smp_wmb(); /* make pte visible before pmd */
|
smp_wmb(); /* make pte visible before pmd */
|
||||||
pmd_populate(mm, pmd, pgtable);
|
pmd_populate(vma->vm_mm, fe->pmd, pgtable);
|
||||||
page_remove_rmap(page, true);
|
page_remove_rmap(page, true);
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
|
|
||||||
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
|
||||||
|
|
||||||
ret |= VM_FAULT_WRITE;
|
ret |= VM_FAULT_WRITE;
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
@ -1293,8 +1278,8 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
out_free_pages:
|
out_free_pages:
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
|
||||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
memcg = (void *)page_private(pages[i]);
|
memcg = (void *)page_private(pages[i]);
|
||||||
set_page_private(pages[i], 0);
|
set_page_private(pages[i], 0);
|
||||||
|
@ -1305,25 +1290,23 @@ out_free_pages:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
|
||||||
unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
|
|
||||||
{
|
{
|
||||||
spinlock_t *ptl;
|
struct vm_area_struct *vma = fe->vma;
|
||||||
int ret = 0;
|
|
||||||
struct page *page = NULL, *new_page;
|
struct page *page = NULL, *new_page;
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
unsigned long haddr;
|
unsigned long haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
unsigned long mmun_start; /* For mmu_notifiers */
|
unsigned long mmun_start; /* For mmu_notifiers */
|
||||||
unsigned long mmun_end; /* For mmu_notifiers */
|
unsigned long mmun_end; /* For mmu_notifiers */
|
||||||
gfp_t huge_gfp; /* for allocation and charge */
|
gfp_t huge_gfp; /* for allocation and charge */
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
ptl = pmd_lockptr(mm, pmd);
|
fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
|
||||||
VM_BUG_ON_VMA(!vma->anon_vma, vma);
|
VM_BUG_ON_VMA(!vma->anon_vma, vma);
|
||||||
haddr = address & HPAGE_PMD_MASK;
|
|
||||||
if (is_huge_zero_pmd(orig_pmd))
|
if (is_huge_zero_pmd(orig_pmd))
|
||||||
goto alloc;
|
goto alloc;
|
||||||
spin_lock(ptl);
|
spin_lock(fe->ptl);
|
||||||
if (unlikely(!pmd_same(*pmd, orig_pmd)))
|
if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
page = pmd_page(orig_pmd);
|
page = pmd_page(orig_pmd);
|
||||||
|
@ -1336,13 +1319,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
entry = pmd_mkyoung(orig_pmd);
|
entry = pmd_mkyoung(orig_pmd);
|
||||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||||
if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1))
|
if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry, 1))
|
||||||
update_mmu_cache_pmd(vma, address, pmd);
|
update_mmu_cache_pmd(vma, fe->address, fe->pmd);
|
||||||
ret |= VM_FAULT_WRITE;
|
ret |= VM_FAULT_WRITE;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
get_page(page);
|
get_page(page);
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
alloc:
|
alloc:
|
||||||
if (transparent_hugepage_enabled(vma) &&
|
if (transparent_hugepage_enabled(vma) &&
|
||||||
!transparent_hugepage_debug_cow()) {
|
!transparent_hugepage_debug_cow()) {
|
||||||
|
@ -1355,13 +1338,12 @@ alloc:
|
||||||
prep_transhuge_page(new_page);
|
prep_transhuge_page(new_page);
|
||||||
} else {
|
} else {
|
||||||
if (!page) {
|
if (!page) {
|
||||||
split_huge_pmd(vma, pmd, address);
|
split_huge_pmd(vma, fe->pmd, fe->address);
|
||||||
ret |= VM_FAULT_FALLBACK;
|
ret |= VM_FAULT_FALLBACK;
|
||||||
} else {
|
} else {
|
||||||
ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
|
ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
|
||||||
pmd, orig_pmd, page, haddr);
|
|
||||||
if (ret & VM_FAULT_OOM) {
|
if (ret & VM_FAULT_OOM) {
|
||||||
split_huge_pmd(vma, pmd, address);
|
split_huge_pmd(vma, fe->pmd, fe->address);
|
||||||
ret |= VM_FAULT_FALLBACK;
|
ret |= VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
@ -1370,14 +1352,12 @@ alloc:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg,
|
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
|
||||||
true))) {
|
huge_gfp, &memcg, true))) {
|
||||||
put_page(new_page);
|
put_page(new_page);
|
||||||
if (page) {
|
split_huge_pmd(vma, fe->pmd, fe->address);
|
||||||
split_huge_pmd(vma, pmd, address);
|
if (page)
|
||||||
put_page(page);
|
put_page(page);
|
||||||
} else
|
|
||||||
split_huge_pmd(vma, pmd, address);
|
|
||||||
ret |= VM_FAULT_FALLBACK;
|
ret |= VM_FAULT_FALLBACK;
|
||||||
count_vm_event(THP_FAULT_FALLBACK);
|
count_vm_event(THP_FAULT_FALLBACK);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1393,13 +1373,13 @@ alloc:
|
||||||
|
|
||||||
mmun_start = haddr;
|
mmun_start = haddr;
|
||||||
mmun_end = haddr + HPAGE_PMD_SIZE;
|
mmun_end = haddr + HPAGE_PMD_SIZE;
|
||||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
|
||||||
|
|
||||||
spin_lock(ptl);
|
spin_lock(fe->ptl);
|
||||||
if (page)
|
if (page)
|
||||||
put_page(page);
|
put_page(page);
|
||||||
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
|
if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
mem_cgroup_cancel_charge(new_page, memcg, true);
|
mem_cgroup_cancel_charge(new_page, memcg, true);
|
||||||
put_page(new_page);
|
put_page(new_page);
|
||||||
goto out_mn;
|
goto out_mn;
|
||||||
|
@ -1407,14 +1387,14 @@ alloc:
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
|
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
|
||||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||||
pmdp_huge_clear_flush_notify(vma, haddr, pmd);
|
pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
|
||||||
page_add_new_anon_rmap(new_page, vma, haddr, true);
|
page_add_new_anon_rmap(new_page, vma, haddr, true);
|
||||||
mem_cgroup_commit_charge(new_page, memcg, false, true);
|
mem_cgroup_commit_charge(new_page, memcg, false, true);
|
||||||
lru_cache_add_active_or_unevictable(new_page, vma);
|
lru_cache_add_active_or_unevictable(new_page, vma);
|
||||||
set_pmd_at(mm, haddr, pmd, entry);
|
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
|
||||||
update_mmu_cache_pmd(vma, address, pmd);
|
update_mmu_cache_pmd(vma, fe->address, fe->pmd);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
||||||
put_huge_zero_page();
|
put_huge_zero_page();
|
||||||
} else {
|
} else {
|
||||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
VM_BUG_ON_PAGE(!PageHead(page), page);
|
||||||
|
@ -1423,13 +1403,13 @@ alloc:
|
||||||
}
|
}
|
||||||
ret |= VM_FAULT_WRITE;
|
ret |= VM_FAULT_WRITE;
|
||||||
}
|
}
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
out_mn:
|
out_mn:
|
||||||
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
out_unlock:
|
out_unlock:
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1489,13 +1469,12 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NUMA hinting page fault entry point for trans huge pmds */
|
/* NUMA hinting page fault entry point for trans huge pmds */
|
||||||
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
|
||||||
unsigned long addr, pmd_t pmd, pmd_t *pmdp)
|
|
||||||
{
|
{
|
||||||
spinlock_t *ptl;
|
struct vm_area_struct *vma = fe->vma;
|
||||||
struct anon_vma *anon_vma = NULL;
|
struct anon_vma *anon_vma = NULL;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long haddr = addr & HPAGE_PMD_MASK;
|
unsigned long haddr = fe->address & HPAGE_PMD_MASK;
|
||||||
int page_nid = -1, this_nid = numa_node_id();
|
int page_nid = -1, this_nid = numa_node_id();
|
||||||
int target_nid, last_cpupid = -1;
|
int target_nid, last_cpupid = -1;
|
||||||
bool page_locked;
|
bool page_locked;
|
||||||
|
@ -1506,8 +1485,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
/* A PROT_NONE fault should not end up here */
|
/* A PROT_NONE fault should not end up here */
|
||||||
BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
|
BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
|
||||||
|
|
||||||
ptl = pmd_lock(mm, pmdp);
|
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp)))
|
if (unlikely(!pmd_same(pmd, *fe->pmd)))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1515,9 +1494,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
* without disrupting NUMA hinting information. Do not relock and
|
* without disrupting NUMA hinting information. Do not relock and
|
||||||
* check_same as the page may no longer be mapped.
|
* check_same as the page may no longer be mapped.
|
||||||
*/
|
*/
|
||||||
if (unlikely(pmd_trans_migrating(*pmdp))) {
|
if (unlikely(pmd_trans_migrating(*fe->pmd))) {
|
||||||
page = pmd_page(*pmdp);
|
page = pmd_page(*fe->pmd);
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1550,7 +1529,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
|
|
||||||
/* Migration could have started since the pmd_trans_migrating check */
|
/* Migration could have started since the pmd_trans_migrating check */
|
||||||
if (!page_locked) {
|
if (!page_locked) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
page_nid = -1;
|
page_nid = -1;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1561,12 +1540,12 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
* to serialises splits
|
* to serialises splits
|
||||||
*/
|
*/
|
||||||
get_page(page);
|
get_page(page);
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
anon_vma = page_lock_anon_vma_read(page);
|
anon_vma = page_lock_anon_vma_read(page);
|
||||||
|
|
||||||
/* Confirm the PMD did not change while page_table_lock was released */
|
/* Confirm the PMD did not change while page_table_lock was released */
|
||||||
spin_lock(ptl);
|
spin_lock(fe->ptl);
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp))) {
|
if (unlikely(!pmd_same(pmd, *fe->pmd))) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
page_nid = -1;
|
page_nid = -1;
|
||||||
|
@ -1584,9 +1563,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
* Migrate the THP to the requested node, returns with page unlocked
|
* Migrate the THP to the requested node, returns with page unlocked
|
||||||
* and access rights restored.
|
* and access rights restored.
|
||||||
*/
|
*/
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
migrated = migrate_misplaced_transhuge_page(mm, vma,
|
migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
|
||||||
pmdp, pmd, addr, page, target_nid);
|
fe->pmd, pmd, fe->address, page, target_nid);
|
||||||
if (migrated) {
|
if (migrated) {
|
||||||
flags |= TNF_MIGRATED;
|
flags |= TNF_MIGRATED;
|
||||||
page_nid = target_nid;
|
page_nid = target_nid;
|
||||||
|
@ -1601,18 +1580,18 @@ clear_pmdnuma:
|
||||||
pmd = pmd_mkyoung(pmd);
|
pmd = pmd_mkyoung(pmd);
|
||||||
if (was_writable)
|
if (was_writable)
|
||||||
pmd = pmd_mkwrite(pmd);
|
pmd = pmd_mkwrite(pmd);
|
||||||
set_pmd_at(mm, haddr, pmdp, pmd);
|
set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
|
||||||
update_mmu_cache_pmd(vma, addr, pmdp);
|
update_mmu_cache_pmd(vma, fe->address, fe->pmd);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
spin_unlock(ptl);
|
spin_unlock(fe->ptl);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (anon_vma)
|
if (anon_vma)
|
||||||
page_unlock_anon_vma_read(anon_vma);
|
page_unlock_anon_vma_read(anon_vma);
|
||||||
|
|
||||||
if (page_nid != -1)
|
if (page_nid != -1)
|
||||||
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
|
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2413,20 +2392,23 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
||||||
struct vm_area_struct *vma,
|
struct vm_area_struct *vma,
|
||||||
unsigned long address, pmd_t *pmd)
|
unsigned long address, pmd_t *pmd)
|
||||||
{
|
{
|
||||||
unsigned long _address;
|
pte_t pteval;
|
||||||
pte_t *pte, pteval;
|
|
||||||
int swapped_in = 0, ret = 0;
|
int swapped_in = 0, ret = 0;
|
||||||
|
struct fault_env fe = {
|
||||||
|
.vma = vma,
|
||||||
|
.address = address,
|
||||||
|
.flags = FAULT_FLAG_ALLOW_RETRY,
|
||||||
|
.pmd = pmd,
|
||||||
|
};
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, address);
|
fe.pte = pte_offset_map(pmd, address);
|
||||||
for (_address = address; _address < address + HPAGE_PMD_NR*PAGE_SIZE;
|
for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
|
||||||
pte++, _address += PAGE_SIZE) {
|
fe.pte++, fe.address += PAGE_SIZE) {
|
||||||
pteval = *pte;
|
pteval = *fe.pte;
|
||||||
if (!is_swap_pte(pteval))
|
if (!is_swap_pte(pteval))
|
||||||
continue;
|
continue;
|
||||||
swapped_in++;
|
swapped_in++;
|
||||||
ret = do_swap_page(mm, vma, _address, pte, pmd,
|
ret = do_swap_page(&fe, pteval);
|
||||||
FAULT_FLAG_ALLOW_RETRY,
|
|
||||||
pteval);
|
|
||||||
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
|
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
|
||||||
if (ret & VM_FAULT_RETRY) {
|
if (ret & VM_FAULT_RETRY) {
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
|
@ -2442,10 +2424,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* pte is unmapped now, we need to map it */
|
/* pte is unmapped now, we need to map it */
|
||||||
pte = pte_offset_map(pmd, _address);
|
fe.pte = pte_offset_map(pmd, fe.address);
|
||||||
}
|
}
|
||||||
pte--;
|
fe.pte--;
|
||||||
pte_unmap(pte);
|
pte_unmap(fe.pte);
|
||||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
|
trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,9 +36,7 @@
|
||||||
/* Do not use these with a slab allocator */
|
/* Do not use these with a slab allocator */
|
||||||
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
|
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
|
||||||
|
|
||||||
extern int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
int do_swap_page(struct fault_env *fe, pte_t orig_pte);
|
||||||
unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
||||||
unsigned int flags, pte_t orig_pte);
|
|
||||||
|
|
||||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||||
unsigned long floor, unsigned long ceiling);
|
unsigned long floor, unsigned long ceiling);
|
||||||
|
|
582
mm/memory.c
582
mm/memory.c
File diff suppressed because it is too large
Load Diff
|
@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(filemap_fault);
|
EXPORT_SYMBOL(filemap_fault);
|
||||||
|
|
||||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
|
void filemap_map_pages(struct fault_env *fe,
|
||||||
|
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||||
{
|
{
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue