mm: remove zap_page_range and create zap_vma_pages
zap_page_range was originally designed to unmap pages within an address range that could span multiple vmas. While working on [1], it was discovered that all callers of zap_page_range pass a range entirely within a single vma. In addition, the mmu notification call within zap_page range does not correctly handle ranges that span multiple vmas. When crossing a vma boundary, a new mmu_notifier_range_init/end call pair with the new vma should be made. Instead of fixing zap_page_range, do the following: - Create a new routine zap_vma_pages() that will remove all pages within the passed vma. Most users of zap_page_range pass the entire vma and can use this new routine. - For callers of zap_page_range not passing the entire vma, instead call zap_page_range_single(). - Remove zap_page_range. [1] https://lore.kernel.org/linux-mm/20221114235507.294320-2-mike.kravetz@oracle.com/ Link: https://lkml.kernel.org/r/20230104002732.232573-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Suggested-by: Peter Xu <peterx@redhat.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Heiko Carstens <hca@linux.ibm.com> [s390] Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nadav Amit <nadav.amit@gmail.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
bbc61844b4
commit
e9adcfecf5
|
@ -138,13 +138,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||
mmap_read_lock(mm);
|
||||
|
||||
for_each_vma(vmi, vma) {
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -120,10 +120,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||
|
||||
mmap_read_lock(mm);
|
||||
for_each_vma(vmi, vma) {
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (vma_is_special_mapping(vma, &vvar_spec))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
}
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
|
|
|
@ -414,7 +414,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
|
|||
/*
|
||||
* When the LPAR lost credits due to core removal or during
|
||||
* migration, invalidate the existing mapping for the current
|
||||
* paste addresses and set windows in-active (zap_page_range in
|
||||
* paste addresses and set windows in-active (zap_vma_pages in
|
||||
* reconfig_close_windows()).
|
||||
* New mapping will be done later after migration or new credits
|
||||
* available. So continue to receive faults if the user space
|
||||
|
|
|
@ -760,8 +760,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
|
|||
* is done before the original mmap() and after the ioctl.
|
||||
*/
|
||||
if (vma)
|
||||
zap_page_range(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start);
|
||||
zap_vma_pages(vma);
|
||||
|
||||
mmap_write_unlock(task_ref->mm);
|
||||
mutex_unlock(&task_ref->mmap_mutex);
|
||||
|
|
|
@ -124,13 +124,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||
mmap_read_lock(mm);
|
||||
|
||||
for_each_vma(vmi, vma) {
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (vma_is_special_mapping(vma, vdso_info.dm))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (vma_is_special_mapping(vma, compat_vdso_info.dm))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -59,11 +59,9 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||
|
||||
mmap_read_lock(mm);
|
||||
for_each_vma(vmi, vma) {
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (!vma_is_special_mapping(vma, &vvar_mapping))
|
||||
continue;
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
break;
|
||||
}
|
||||
mmap_read_unlock(mm);
|
||||
|
|
|
@ -722,7 +722,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
|
|||
if (is_vm_hugetlb_page(vma))
|
||||
continue;
|
||||
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
|
||||
zap_page_range(vma, vmaddr, size);
|
||||
zap_page_range_single(vma, vmaddr, size, NULL);
|
||||
}
|
||||
mmap_read_unlock(gmap->mm);
|
||||
}
|
||||
|
|
|
@ -113,10 +113,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||
|
||||
mmap_read_lock(mm);
|
||||
for_each_vma(vmi, vma) {
|
||||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
|
||||
if (vma_is_special_mapping(vma, &vvar_mapping))
|
||||
zap_page_range(vma, vma->vm_start, size);
|
||||
zap_vma_pages(vma);
|
||||
}
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
|
|
|
@ -1019,7 +1019,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
|
|||
if (vma) {
|
||||
trace_binder_unmap_user_start(alloc, index);
|
||||
|
||||
zap_page_range(vma, page_addr, PAGE_SIZE);
|
||||
zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
|
||||
|
||||
trace_binder_unmap_user_end(alloc, index);
|
||||
}
|
||||
|
|
|
@ -1977,10 +1977,13 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||
|
||||
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long size);
|
||||
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long size);
|
||||
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long size, struct zap_details *details);
|
||||
static inline void zap_vma_pages(struct vm_area_struct *vma)
|
||||
{
|
||||
zap_page_range_single(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start, NULL);
|
||||
}
|
||||
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
||||
struct vm_area_struct *start_vma, unsigned long start,
|
||||
unsigned long end);
|
||||
|
|
30
mm/memory.c
30
mm/memory.c
|
@ -1693,36 +1693,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
|||
mmu_notifier_invalidate_range_end(&range);
|
||||
}
|
||||
|
||||
/**
|
||||
* zap_page_range - remove user pages in a given range
|
||||
* @vma: vm_area_struct holding the applicable pages
|
||||
* @start: starting address of pages to zap
|
||||
* @size: number of bytes to zap
|
||||
*
|
||||
* Caller must protect the VMA list
|
||||
*/
|
||||
void zap_page_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long size)
|
||||
{
|
||||
struct maple_tree *mt = &vma->vm_mm->mm_mt;
|
||||
unsigned long end = start + size;
|
||||
struct mmu_notifier_range range;
|
||||
struct mmu_gather tlb;
|
||||
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
|
||||
|
||||
lru_add_drain();
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
|
||||
start, start + size);
|
||||
tlb_gather_mmu(&tlb, vma->vm_mm);
|
||||
update_hiwater_rss(vma->vm_mm);
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
do {
|
||||
unmap_single_vma(&tlb, vma, start, range.end, NULL);
|
||||
} while ((vma = mas_find(&mas, end - 1)) != NULL);
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
tlb_finish_mmu(&tlb);
|
||||
}
|
||||
|
||||
/**
|
||||
* zap_page_range_single - remove user pages in a given range
|
||||
* @vma: vm_area_struct holding the applicable pages
|
||||
|
|
|
@ -2690,7 +2690,7 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
|
|||
*
|
||||
* The caller must hold lock_page_memcg(). Most callers have the folio
|
||||
* locked. A few have the folio blocked from truncation through other
|
||||
* means (eg zap_page_range() has it mapped and is holding the page table
|
||||
* means (eg zap_vma_pages() has it mapped and is holding the page table
|
||||
* lock). This can also be called from mark_buffer_dirty(), which I
|
||||
* cannot prove is always protected against truncate.
|
||||
*/
|
||||
|
|
|
@ -2092,7 +2092,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
|
|||
maybe_zap_len = total_bytes_to_map - /* All bytes to map */
|
||||
*length + /* Mapped or pending */
|
||||
(pages_remaining * PAGE_SIZE); /* Failed map. */
|
||||
zap_page_range(vma, *address, maybe_zap_len);
|
||||
zap_page_range_single(vma, *address, maybe_zap_len, NULL);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
|
@ -2100,7 +2100,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
|
|||
unsigned long leftover_pages = pages_remaining;
|
||||
int bytes_mapped;
|
||||
|
||||
/* We called zap_page_range, try to reinsert. */
|
||||
/* We called zap_page_range_single, try to reinsert. */
|
||||
err = vm_insert_pages(vma, *address,
|
||||
pending_pages,
|
||||
&pages_remaining);
|
||||
|
@ -2234,7 +2234,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
|
|||
total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
|
||||
if (total_bytes_to_map) {
|
||||
if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
|
||||
zap_page_range(vma, address, total_bytes_to_map);
|
||||
zap_page_range_single(vma, address, total_bytes_to_map,
|
||||
NULL);
|
||||
zc->length = total_bytes_to_map;
|
||||
zc->recv_skip_hint = 0;
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue