Merge branch 'for-next/faultaround' into for-next/core
Initialise prefaulted PTEs as 'old' for arm64 when hardware access-flag updates are supported, which drastically improves vmscan performance. * for-next/faultaround: mm: filemap: Fix microblaze build failure with 'mmu_defconfig' mm/nommu: Fix return type of filemap_map_pages() mm: Mark anonymous struct field of 'struct vm_fault' as 'const' mm: Use static initialisers for immutable fields of 'struct vm_fault' mm: Avoid modifying vmf.address in __collapse_huge_page_swapin() mm: Pass 'address' to map to do_set_pte() and drop FAULT_FLAG_PREFAULT mm: Move immutable fields of 'struct vm_fault' into anonymous struct arm64: mm: Implement arch_wants_old_prefaulted_pte() mm: Allow architectures to request 'old' entries when prefaulting mm: Cleanup faultaround and finish_fault() codepaths
This commit is contained in:
commit
6b76c3aedb
|
@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void)
|
|||
|
||||
return !cpu_has_hw_af();
|
||||
}
|
||||
#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
|
||||
/*
|
||||
* Experimentally, it's cheap to set the access flag in hardware and we
|
||||
* benefit from prefaulting mappings as 'old' to start with.
|
||||
*/
|
||||
static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
{
|
||||
return !arch_faults_on_old_pte();
|
||||
}
|
||||
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
|
|
|
@ -1319,17 +1319,19 @@ xfs_filemap_pfn_mkwrite(
|
|||
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
|
||||
}
|
||||
|
||||
static void
|
||||
static vm_fault_t
|
||||
xfs_filemap_map_pages(
|
||||
struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff,
|
||||
pgoff_t end_pgoff)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
vm_fault_t ret;
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
filemap_map_pages(vmf, start_pgoff, end_pgoff);
|
||||
ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct xfs_file_vm_ops = {
|
||||
|
|
|
@ -514,11 +514,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
|
|||
* pgoff should be used in favour of virtual_address, if possible.
|
||||
*/
|
||||
struct vm_fault {
|
||||
struct vm_area_struct *vma; /* Target VMA */
|
||||
unsigned int flags; /* FAULT_FLAG_xxx flags */
|
||||
gfp_t gfp_mask; /* gfp mask to be used for allocations */
|
||||
pgoff_t pgoff; /* Logical page offset based on vma */
|
||||
unsigned long address; /* Faulting virtual address */
|
||||
const struct {
|
||||
struct vm_area_struct *vma; /* Target VMA */
|
||||
gfp_t gfp_mask; /* gfp mask to be used for allocations */
|
||||
pgoff_t pgoff; /* Logical page offset based on vma */
|
||||
unsigned long address; /* Faulting virtual address */
|
||||
};
|
||||
unsigned int flags; /* FAULT_FLAG_xxx flags
|
||||
* XXX: should really be 'const' */
|
||||
pmd_t *pmd; /* Pointer to pmd entry matching
|
||||
* the 'address' */
|
||||
pud_t *pud; /* Pointer to pud entry matching
|
||||
|
@ -542,8 +545,8 @@ struct vm_fault {
|
|||
* is not NULL, otherwise pmd.
|
||||
*/
|
||||
pgtable_t prealloc_pte; /* Pre-allocated pte page table.
|
||||
* vm_ops->map_pages() calls
|
||||
* alloc_set_pte() from atomic context.
|
||||
* vm_ops->map_pages() sets up a page
|
||||
* table from atomic context.
|
||||
* do_fault_around() pre-allocates
|
||||
* page table to avoid allocation from
|
||||
* atomic context.
|
||||
|
@ -578,7 +581,7 @@ struct vm_operations_struct {
|
|||
vm_fault_t (*fault)(struct vm_fault *vmf);
|
||||
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
|
||||
enum page_entry_size pe_size);
|
||||
void (*map_pages)(struct vm_fault *vmf,
|
||||
vm_fault_t (*map_pages)(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||
unsigned long (*pagesize)(struct vm_area_struct * area);
|
||||
|
||||
|
@ -988,7 +991,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
|||
return pte;
|
||||
}
|
||||
|
||||
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
|
||||
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
|
||||
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
|
||||
|
||||
vm_fault_t finish_fault(struct vm_fault *vmf);
|
||||
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
|
||||
#endif
|
||||
|
@ -2622,7 +2627,7 @@ extern void truncate_inode_pages_final(struct address_space *);
|
|||
|
||||
/* generic vm_area_ops exported for stackable file systems */
|
||||
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
|
||||
extern void filemap_map_pages(struct vm_fault *vmf,
|
||||
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
|
||||
|
||||
|
|
|
@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* the ordering of these checks is important for pmds with _page_devmap set.
|
||||
* if we check pmd_trans_unstable() first we will trip the bad_pmd() check
|
||||
* inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
|
||||
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
|
||||
*/
|
||||
static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
|
||||
{
|
||||
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA_BALANCING
|
||||
/*
|
||||
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
|
||||
|
|
179
mm/filemap.c
179
mm/filemap.c
|
@ -42,6 +42,8 @@
|
|||
#include <linux/psi.h>
|
||||
#include <linux/ramfs.h>
|
||||
#include <linux/page_idle.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include "internal.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
@ -2911,74 +2913,163 @@ out_retry:
|
|||
}
|
||||
EXPORT_SYMBOL(filemap_fault);
|
||||
|
||||
void filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
|
||||
{
|
||||
struct file *file = vmf->vma->vm_file;
|
||||
struct mm_struct *mm = vmf->vma->vm_mm;
|
||||
|
||||
/* Huge page is mapped? No need to proceed. */
|
||||
if (pmd_trans_huge(*vmf->pmd)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
|
||||
vm_fault_t ret = do_set_pmd(vmf, page);
|
||||
if (!ret) {
|
||||
/* The page is mapped successfully, reference consumed. */
|
||||
unlock_page(page);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (pmd_none(*vmf->pmd)) {
|
||||
vmf->ptl = pmd_lock(mm, vmf->pmd);
|
||||
if (likely(pmd_none(*vmf->pmd))) {
|
||||
mm_inc_nr_ptes(mm);
|
||||
pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
|
||||
vmf->prealloc_pte = NULL;
|
||||
}
|
||||
spin_unlock(vmf->ptl);
|
||||
}
|
||||
|
||||
/* See comment in handle_pte_fault() */
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct page *next_uptodate_page(struct page *page,
|
||||
struct address_space *mapping,
|
||||
struct xa_state *xas, pgoff_t end_pgoff)
|
||||
{
|
||||
unsigned long max_idx;
|
||||
|
||||
do {
|
||||
if (!page)
|
||||
return NULL;
|
||||
if (xas_retry(xas, page))
|
||||
continue;
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
if (PageLocked(page))
|
||||
continue;
|
||||
if (!page_cache_get_speculative(page))
|
||||
continue;
|
||||
/* Has the page moved or been split? */
|
||||
if (unlikely(page != xas_reload(xas)))
|
||||
goto skip;
|
||||
if (!PageUptodate(page) || PageReadahead(page))
|
||||
goto skip;
|
||||
if (PageHWPoison(page))
|
||||
goto skip;
|
||||
if (!trylock_page(page))
|
||||
goto skip;
|
||||
if (page->mapping != mapping)
|
||||
goto unlock;
|
||||
if (!PageUptodate(page))
|
||||
goto unlock;
|
||||
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
|
||||
if (xas->xa_index >= max_idx)
|
||||
goto unlock;
|
||||
return page;
|
||||
unlock:
|
||||
unlock_page(page);
|
||||
skip:
|
||||
put_page(page);
|
||||
} while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct page *first_map_page(struct address_space *mapping,
|
||||
struct xa_state *xas,
|
||||
pgoff_t end_pgoff)
|
||||
{
|
||||
return next_uptodate_page(xas_find(xas, end_pgoff),
|
||||
mapping, xas, end_pgoff);
|
||||
}
|
||||
|
||||
static inline struct page *next_map_page(struct address_space *mapping,
|
||||
struct xa_state *xas,
|
||||
pgoff_t end_pgoff)
|
||||
{
|
||||
return next_uptodate_page(xas_next_entry(xas, end_pgoff),
|
||||
mapping, xas, end_pgoff);
|
||||
}
|
||||
|
||||
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
pgoff_t last_pgoff = start_pgoff;
|
||||
unsigned long max_idx;
|
||||
unsigned long addr;
|
||||
XA_STATE(xas, &mapping->i_pages, start_pgoff);
|
||||
struct page *head, *page;
|
||||
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
|
||||
vm_fault_t ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
xas_for_each(&xas, head, end_pgoff) {
|
||||
if (xas_retry(&xas, head))
|
||||
continue;
|
||||
if (xa_is_value(head))
|
||||
goto next;
|
||||
head = first_map_page(mapping, &xas, end_pgoff);
|
||||
if (!head)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Check for a locked page first, as a speculative
|
||||
* reference may adversely influence page migration.
|
||||
*/
|
||||
if (PageLocked(head))
|
||||
goto next;
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto next;
|
||||
if (filemap_map_pmd(vmf, head)) {
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Has the page moved or been split? */
|
||||
if (unlikely(head != xas_reload(&xas)))
|
||||
goto skip;
|
||||
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
|
||||
do {
|
||||
page = find_subpage(head, xas.xa_index);
|
||||
|
||||
if (!PageUptodate(head) ||
|
||||
PageReadahead(page) ||
|
||||
PageHWPoison(page))
|
||||
goto skip;
|
||||
if (!trylock_page(head))
|
||||
goto skip;
|
||||
|
||||
if (head->mapping != mapping || !PageUptodate(head))
|
||||
goto unlock;
|
||||
|
||||
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
|
||||
if (xas.xa_index >= max_idx)
|
||||
if (PageHWPoison(page))
|
||||
goto unlock;
|
||||
|
||||
if (mmap_miss > 0)
|
||||
mmap_miss--;
|
||||
|
||||
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
|
||||
if (vmf->pte)
|
||||
vmf->pte += xas.xa_index - last_pgoff;
|
||||
addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
|
||||
vmf->pte += xas.xa_index - last_pgoff;
|
||||
last_pgoff = xas.xa_index;
|
||||
if (alloc_set_pte(vmf, page))
|
||||
|
||||
if (!pte_none(*vmf->pte))
|
||||
goto unlock;
|
||||
|
||||
/* We're about to handle the fault */
|
||||
if (vmf->address == addr)
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
|
||||
do_set_pte(vmf, page, addr);
|
||||
/* no need to invalidate: a not-present page won't be cached */
|
||||
update_mmu_cache(vma, addr, vmf->pte);
|
||||
unlock_page(head);
|
||||
goto next;
|
||||
continue;
|
||||
unlock:
|
||||
unlock_page(head);
|
||||
skip:
|
||||
put_page(head);
|
||||
next:
|
||||
/* Huge page is mapped? No need to proceed. */
|
||||
if (pmd_trans_huge(*vmf->pmd))
|
||||
break;
|
||||
}
|
||||
} while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_map_pages);
|
||||
|
||||
|
|
|
@ -991,38 +991,41 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
|||
|
||||
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmd,
|
||||
unsigned long haddr, pmd_t *pmd,
|
||||
int referenced)
|
||||
{
|
||||
int swapped_in = 0;
|
||||
vm_fault_t ret = 0;
|
||||
struct vm_fault vmf = {
|
||||
.vma = vma,
|
||||
.address = address,
|
||||
.flags = FAULT_FLAG_ALLOW_RETRY,
|
||||
.pmd = pmd,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
};
|
||||
unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
|
||||
|
||||
vmf.pte = pte_offset_map(pmd, address);
|
||||
for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
|
||||
vmf.pte++, vmf.address += PAGE_SIZE) {
|
||||
for (address = haddr; address < end; address += PAGE_SIZE) {
|
||||
struct vm_fault vmf = {
|
||||
.vma = vma,
|
||||
.address = address,
|
||||
.pgoff = linear_page_index(vma, haddr),
|
||||
.flags = FAULT_FLAG_ALLOW_RETRY,
|
||||
.pmd = pmd,
|
||||
};
|
||||
|
||||
vmf.pte = pte_offset_map(pmd, address);
|
||||
vmf.orig_pte = *vmf.pte;
|
||||
if (!is_swap_pte(vmf.orig_pte))
|
||||
if (!is_swap_pte(vmf.orig_pte)) {
|
||||
pte_unmap(vmf.pte);
|
||||
continue;
|
||||
}
|
||||
swapped_in++;
|
||||
ret = do_swap_page(&vmf);
|
||||
|
||||
/* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
|
||||
if (ret & VM_FAULT_RETRY) {
|
||||
mmap_read_lock(mm);
|
||||
if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
|
||||
if (hugepage_vma_revalidate(mm, haddr, &vma)) {
|
||||
/* vma is no longer available, don't continue to swapin */
|
||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
||||
return false;
|
||||
}
|
||||
/* check if the pmd is still valid */
|
||||
if (mm_find_pmd(mm, address) != pmd) {
|
||||
if (mm_find_pmd(mm, haddr) != pmd) {
|
||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
||||
return false;
|
||||
}
|
||||
|
@ -1031,11 +1034,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
|||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
||||
return false;
|
||||
}
|
||||
/* pte is unmapped now, we need to map it */
|
||||
vmf.pte = pte_offset_map(pmd, vmf.address);
|
||||
}
|
||||
vmf.pte--;
|
||||
pte_unmap(vmf.pte);
|
||||
|
||||
/* Drain LRU add pagevec to remove extra pin on the swapped in pages */
|
||||
if (swapped_in)
|
||||
|
|
223
mm/memory.c
223
mm/memory.c
|
@ -134,6 +134,18 @@ static inline bool arch_faults_on_old_pte(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_wants_old_prefaulted_pte
|
||||
static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
{
|
||||
/*
|
||||
* Transitioning a PTE from 'old' to 'young' can be expensive on
|
||||
* some architectures, even if it's performed in hardware. By
|
||||
* default, "false" means prefaulted entries will be 'young'.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __init disable_randmaps(char *s)
|
||||
{
|
||||
randomize_va_space = 0;
|
||||
|
@ -3503,7 +3515,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
|||
if (pte_alloc(vma->vm_mm, vmf->pmd))
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
/* See the comment in pte_alloc_one_map() */
|
||||
/* See comment in handle_pte_fault() */
|
||||
if (unlikely(pmd_trans_unstable(vmf->pmd)))
|
||||
return 0;
|
||||
|
||||
|
@ -3643,66 +3655,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
|
||||
* If we check pmd_trans_unstable() first we will trip the bad_pmd() check
|
||||
* inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
|
||||
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
|
||||
*/
|
||||
static int pmd_devmap_trans_unstable(pmd_t *pmd)
|
||||
{
|
||||
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
|
||||
}
|
||||
|
||||
static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
|
||||
if (!pmd_none(*vmf->pmd))
|
||||
goto map_pte;
|
||||
if (vmf->prealloc_pte) {
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_none(*vmf->pmd))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
goto map_pte;
|
||||
}
|
||||
|
||||
mm_inc_nr_ptes(vma->vm_mm);
|
||||
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
|
||||
spin_unlock(vmf->ptl);
|
||||
vmf->prealloc_pte = NULL;
|
||||
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
map_pte:
|
||||
/*
|
||||
* If a huge pmd materialized under us just retry later. Use
|
||||
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
|
||||
* pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
|
||||
* under us and then back to pmd_none, as a result of MADV_DONTNEED
|
||||
* running immediately after a huge pmd fault in a different thread of
|
||||
* this mm, in turn leading to a misleading pmd_trans_huge() retval.
|
||||
* All we have to ensure is that it is a regular pmd that we can walk
|
||||
* with pte_offset_map() and we can do that through an atomic read in
|
||||
* C, which is what pmd_trans_unstable() provides.
|
||||
*/
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd))
|
||||
return VM_FAULT_NOPAGE;
|
||||
|
||||
/*
|
||||
* At this point we know that our vmf->pmd points to a page of ptes
|
||||
* and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
|
||||
* for the duration of the fault. If a racing MADV_DONTNEED runs and
|
||||
* we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
|
||||
* be valid and we will re-check to make sure the vmf->pte isn't
|
||||
* pte_none() under vmf->ptl protection when we return to
|
||||
* alloc_set_pte().
|
||||
*/
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
|
||||
&vmf->ptl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static void deposit_prealloc_pte(struct vm_fault *vmf)
|
||||
{
|
||||
|
@ -3717,7 +3669,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
|
|||
vmf->prealloc_pte = NULL;
|
||||
}
|
||||
|
||||
static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
|
@ -3775,76 +3727,41 @@ out:
|
|||
return ret;
|
||||
}
|
||||
#else
|
||||
static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* alloc_set_pte - setup new PTE entry for given page and add reverse page
|
||||
* mapping. If needed, the function allocates page table or use pre-allocated.
|
||||
*
|
||||
* @vmf: fault environment
|
||||
* @page: page to map
|
||||
*
|
||||
* Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
|
||||
* return.
|
||||
*
|
||||
* Target users are page handler itself and implementations of
|
||||
* vm_ops->map_pages.
|
||||
*
|
||||
* Return: %0 on success, %VM_FAULT_ code in case of error.
|
||||
*/
|
||||
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
|
||||
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool prefault = vmf->address != addr;
|
||||
pte_t entry;
|
||||
vm_fault_t ret;
|
||||
|
||||
if (pmd_none(*vmf->pmd) && PageTransCompound(page)) {
|
||||
ret = do_set_pmd(vmf, page);
|
||||
if (ret != VM_FAULT_FALLBACK)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!vmf->pte) {
|
||||
ret = pte_alloc_one_map(vmf);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Re-check under ptl */
|
||||
if (unlikely(!pte_none(*vmf->pte))) {
|
||||
update_mmu_tlb(vma, vmf->address, vmf->pte);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
|
||||
flush_icache_page(vma, page);
|
||||
entry = mk_pte(page, vma->vm_page_prot);
|
||||
entry = pte_sw_mkyoung(entry);
|
||||
|
||||
if (prefault && arch_wants_old_prefaulted_pte())
|
||||
entry = pte_mkold(entry);
|
||||
else
|
||||
entry = pte_sw_mkyoung(entry);
|
||||
|
||||
if (write)
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
/* copy-on-write page */
|
||||
if (write && !(vma->vm_flags & VM_SHARED)) {
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, vma, vmf->address, false);
|
||||
page_add_new_anon_rmap(page, vma, addr, false);
|
||||
lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
} else {
|
||||
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
|
||||
page_add_file_rmap(page, false);
|
||||
}
|
||||
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
|
||||
|
||||
/* no need to invalidate: a not-present page won't be cached */
|
||||
update_mmu_cache(vma, vmf->address, vmf->pte);
|
||||
|
||||
return 0;
|
||||
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* finish_fault - finish page fault once we have prepared the page to fault
|
||||
*
|
||||
|
@ -3862,12 +3779,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
|
|||
*/
|
||||
vm_fault_t finish_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct page *page;
|
||||
vm_fault_t ret = 0;
|
||||
vm_fault_t ret;
|
||||
|
||||
/* Did we COW the page? */
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
!(vmf->vma->vm_flags & VM_SHARED))
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
|
||||
page = vmf->cow_page;
|
||||
else
|
||||
page = vmf->page;
|
||||
|
@ -3876,12 +3793,38 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
|
|||
* check even for read faults because we might have lost our CoWed
|
||||
* page
|
||||
*/
|
||||
if (!(vmf->vma->vm_flags & VM_SHARED))
|
||||
ret = check_stable_address_space(vmf->vma->vm_mm);
|
||||
if (!ret)
|
||||
ret = alloc_set_pte(vmf, page);
|
||||
if (vmf->pte)
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
if (!(vma->vm_flags & VM_SHARED)) {
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (pmd_none(*vmf->pmd)) {
|
||||
if (PageTransCompound(page)) {
|
||||
ret = do_set_pmd(vmf, page);
|
||||
if (ret != VM_FAULT_FALLBACK)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
/* See comment in handle_pte_fault() */
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd))
|
||||
return 0;
|
||||
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
ret = 0;
|
||||
/* Re-check under ptl */
|
||||
if (likely(pte_none(*vmf->pte)))
|
||||
do_set_pte(vmf, page, vmf->address);
|
||||
else
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
|
||||
update_mmu_tlb(vma, vmf->address, vmf->pte);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3951,13 +3894,12 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
|
|||
pgoff_t start_pgoff = vmf->pgoff;
|
||||
pgoff_t end_pgoff;
|
||||
int off;
|
||||
vm_fault_t ret = 0;
|
||||
|
||||
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
|
||||
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
|
||||
|
||||
vmf->address = max(address & mask, vmf->vma->vm_start);
|
||||
off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
||||
address = max(address & mask, vmf->vma->vm_start);
|
||||
off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
||||
start_pgoff -= off;
|
||||
|
||||
/*
|
||||
|
@ -3965,7 +3907,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
|
|||
* the vma or nr_pages from start_pgoff, depending what is nearest.
|
||||
*/
|
||||
end_pgoff = start_pgoff -
|
||||
((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
||||
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
||||
PTRS_PER_PTE - 1;
|
||||
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
|
||||
start_pgoff + nr_pages - 1);
|
||||
|
@ -3973,31 +3915,11 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
|
|||
if (pmd_none(*vmf->pmd)) {
|
||||
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
|
||||
if (!vmf->prealloc_pte)
|
||||
goto out;
|
||||
return VM_FAULT_OOM;
|
||||
smp_wmb(); /* See comment in __pte_alloc() */
|
||||
}
|
||||
|
||||
vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
|
||||
|
||||
/* Huge page is mapped? Page fault is solved */
|
||||
if (pmd_trans_huge(*vmf->pmd)) {
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* ->map_pages() haven't done anything useful. Cold page cache? */
|
||||
if (!vmf->pte)
|
||||
goto out;
|
||||
|
||||
/* check if the page fault is solved */
|
||||
vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
|
||||
if (!pte_none(*vmf->pte))
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
out:
|
||||
vmf->address = address;
|
||||
vmf->pte = NULL;
|
||||
return ret;
|
||||
return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
|
||||
}
|
||||
|
||||
static vm_fault_t do_read_fault(struct vm_fault *vmf)
|
||||
|
@ -4353,7 +4275,18 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
|
|||
*/
|
||||
vmf->pte = NULL;
|
||||
} else {
|
||||
/* See comment in pte_alloc_one_map() */
|
||||
/*
|
||||
* If a huge pmd materialized under us just retry later. Use
|
||||
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
|
||||
* of pmd_trans_huge() to ensure the pmd didn't become
|
||||
* pmd_trans_huge under us and then back to pmd_none, as a
|
||||
* result of MADV_DONTNEED running immediately after a huge pmd
|
||||
* fault in a different thread of this mm, in turn leading to a
|
||||
* misleading pmd_trans_huge() retval. All we have to ensure is
|
||||
* that it is a regular pmd that we can walk with
|
||||
* pte_offset_map() and we can do that through an atomic read
|
||||
* in C, which is what pmd_trans_unstable() provides.
|
||||
*/
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd))
|
||||
return 0;
|
||||
/*
|
||||
|
|
|
@ -1668,10 +1668,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
|
|||
}
|
||||
EXPORT_SYMBOL(filemap_fault);
|
||||
|
||||
void filemap_map_pages(struct vm_fault *vmf,
|
||||
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
{
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_map_pages);
|
||||
|
||||
|
|
|
@ -1520,11 +1520,11 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
|
|||
{
|
||||
struct vm_area_struct pvma;
|
||||
struct page *page;
|
||||
struct vm_fault vmf;
|
||||
struct vm_fault vmf = {
|
||||
.vma = &pvma,
|
||||
};
|
||||
|
||||
shmem_pseudo_vma_init(&pvma, info, index);
|
||||
vmf.vma = &pvma;
|
||||
vmf.address = 0;
|
||||
page = swap_cluster_readahead(swap, gfp, &vmf);
|
||||
shmem_pseudo_vma_destroy(&pvma);
|
||||
|
||||
|
|
|
@ -1951,8 +1951,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
si = swap_info[type];
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
do {
|
||||
struct vm_fault vmf;
|
||||
|
||||
if (!is_swap_pte(*pte))
|
||||
continue;
|
||||
|
||||
|
@ -1968,9 +1966,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
swap_map = &si->swap_map[offset];
|
||||
page = lookup_swap_cache(entry, vma, addr);
|
||||
if (!page) {
|
||||
vmf.vma = vma;
|
||||
vmf.address = addr;
|
||||
vmf.pmd = pmd;
|
||||
struct vm_fault vmf = {
|
||||
.vma = vma,
|
||||
.address = addr,
|
||||
.pmd = pmd,
|
||||
};
|
||||
|
||||
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
|
||||
&vmf);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue