mm/madvise: clean up pte_offset_map_lock() scans
Came here to make madvise's several pte_offset_map_lock() scans advance to next extent on failure, and remove superfluous pmd_trans_unstable() and pmd_none_or_trans_huge_or_clear_bad() calls. But also did some nearby cleanup. swapin_walk_pmd_entry(): don't name an address "index"; don't drop the lock after every pte, only when calling out to read_swap_cache_async(). madvise_cold_or_pageout_pte_range() and madvise_free_pte_range(): prefer "start_pte" for pointer, orig_pte usually denotes a saved pte value; leave lazy MMU mode before unlocking; merge the success and failure paths after split_folio(). Link: https://lkml.kernel.org/r/cc4d9a88-9da6-362-50d9-6735c2b125c6@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: SeongJae Park <sj@kernel.org> Cc: Song Liu <song@kernel.org> Cc: Steven Price <steven.price@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <shy828301@gmail.com> Cc: Yu Zhao <yuzhao@google.com> Cc: Zack Rusin <zackr@vmware.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
a5be621ee2
commit
f3cd4ab0aa
122
mm/madvise.c
122
mm/madvise.c
|
@ -188,37 +188,43 @@ success:
|
|||
|
||||
#ifdef CONFIG_SWAP
|
||||
static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
struct vm_area_struct *vma = walk->private;
|
||||
unsigned long index;
|
||||
struct swap_iocb *splug = NULL;
|
||||
pte_t *ptep = NULL;
|
||||
spinlock_t *ptl;
|
||||
unsigned long addr;
|
||||
|
||||
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
return 0;
|
||||
|
||||
for (index = start; index != end; index += PAGE_SIZE) {
|
||||
for (addr = start; addr < end; addr += PAGE_SIZE) {
|
||||
pte_t pte;
|
||||
swp_entry_t entry;
|
||||
struct page *page;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
ptep = pte_offset_map_lock(vma->vm_mm, pmd, index, &ptl);
|
||||
if (!ptep++) {
|
||||
ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
if (!ptep)
|
||||
break;
|
||||
}
|
||||
|
||||
pte = *ptep;
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
|
||||
if (!is_swap_pte(pte))
|
||||
continue;
|
||||
entry = pte_to_swp_entry(pte);
|
||||
if (unlikely(non_swap_entry(entry)))
|
||||
continue;
|
||||
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
ptep = NULL;
|
||||
|
||||
page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
|
||||
vma, index, false, &splug);
|
||||
vma, addr, false, &splug);
|
||||
if (page)
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
if (ptep)
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
swap_read_unplug(splug);
|
||||
cond_resched();
|
||||
|
||||
|
@ -340,7 +346,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
|
|||
bool pageout = private->pageout;
|
||||
struct mm_struct *mm = tlb->mm;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
pte_t *orig_pte, *pte, ptent;
|
||||
pte_t *start_pte, *pte, ptent;
|
||||
spinlock_t *ptl;
|
||||
struct folio *folio = NULL;
|
||||
LIST_HEAD(folio_list);
|
||||
|
@ -422,11 +428,11 @@ huge_unlock:
|
|||
}
|
||||
|
||||
regular_folio:
|
||||
if (pmd_trans_unstable(pmd))
|
||||
return 0;
|
||||
#endif
|
||||
tlb_change_page_size(tlb, PAGE_SIZE);
|
||||
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
if (!start_pte)
|
||||
return 0;
|
||||
flush_tlb_batched_pending(mm);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; addr < end; pte++, addr += PAGE_SIZE) {
|
||||
|
@ -447,25 +453,28 @@ regular_folio:
|
|||
* are sure it's worth. Split it if we are only owner.
|
||||
*/
|
||||
if (folio_test_large(folio)) {
|
||||
int err;
|
||||
|
||||
if (folio_mapcount(folio) != 1)
|
||||
break;
|
||||
if (pageout_anon_only_filter && !folio_test_anon(folio))
|
||||
break;
|
||||
if (!folio_trylock(folio))
|
||||
break;
|
||||
folio_get(folio);
|
||||
if (!folio_trylock(folio)) {
|
||||
folio_put(folio);
|
||||
break;
|
||||
}
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (split_folio(folio)) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
break;
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
start_pte = NULL;
|
||||
err = split_folio(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (err)
|
||||
break;
|
||||
start_pte = pte =
|
||||
pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (!start_pte)
|
||||
break;
|
||||
arch_enter_lazy_mmu_mode();
|
||||
pte--;
|
||||
addr -= PAGE_SIZE;
|
||||
continue;
|
||||
|
@ -510,8 +519,10 @@ regular_folio:
|
|||
folio_deactivate(folio);
|
||||
}
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (start_pte) {
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
}
|
||||
if (pageout)
|
||||
reclaim_pages(&folio_list);
|
||||
cond_resched();
|
||||
|
@ -612,7 +623,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
struct mm_struct *mm = tlb->mm;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
spinlock_t *ptl;
|
||||
pte_t *orig_pte, *pte, ptent;
|
||||
pte_t *start_pte, *pte, ptent;
|
||||
struct folio *folio;
|
||||
int nr_swap = 0;
|
||||
unsigned long next;
|
||||
|
@ -620,13 +631,12 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_trans_huge(*pmd))
|
||||
if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next))
|
||||
goto next;
|
||||
|
||||
if (pmd_trans_unstable(pmd))
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
tlb_change_page_size(tlb, PAGE_SIZE);
|
||||
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (!start_pte)
|
||||
return 0;
|
||||
flush_tlb_batched_pending(mm);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
||||
|
@ -664,23 +674,26 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
* deactivate all pages.
|
||||
*/
|
||||
if (folio_test_large(folio)) {
|
||||
int err;
|
||||
|
||||
if (folio_mapcount(folio) != 1)
|
||||
goto out;
|
||||
break;
|
||||
if (!folio_trylock(folio))
|
||||
break;
|
||||
folio_get(folio);
|
||||
if (!folio_trylock(folio)) {
|
||||
folio_put(folio);
|
||||
goto out;
|
||||
}
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (split_folio(folio)) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
goto out;
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
start_pte = NULL;
|
||||
err = split_folio(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (err)
|
||||
break;
|
||||
start_pte = pte =
|
||||
pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (!start_pte)
|
||||
break;
|
||||
arch_enter_lazy_mmu_mode();
|
||||
pte--;
|
||||
addr -= PAGE_SIZE;
|
||||
continue;
|
||||
|
@ -725,17 +738,18 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
}
|
||||
folio_mark_lazyfree(folio);
|
||||
}
|
||||
out:
|
||||
|
||||
if (nr_swap) {
|
||||
if (current->mm == mm)
|
||||
sync_mm_rss(mm);
|
||||
|
||||
add_mm_counter(mm, MM_SWAPENTS, nr_swap);
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (start_pte) {
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
}
|
||||
cond_resched();
|
||||
next:
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue