From c9c1ee20ee84b1a827437ca6dba2f06fe475d9b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 8 Jun 2023 18:41:31 -0700 Subject: [PATCH] mm/huge_memory: split huge pmd under one pte_offset_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __split_huge_zero_page_pmd() use a single pte_offset_map() to sweep the extent: it's already under pmd_lock(), so this is no worse for latency; and since it's supposed to have full control of the just-withdrawn page table, here choose to VM_BUG_ON if it were to fail. And please don't increment haddr by PAGE_SIZE, that should remain huge aligned: declare a separate addr (not a bugfix, but it was deceptive). __split_huge_pmd_locked() likewise (but it had declared a separate addr); and change its BUG_ON(!pte_none) to VM_BUG_ON, for consistency with zero (those deposited page tables are sometimes victims of random corruption). Link: https://lkml.kernel.org/r/90cbed7f-90d9-b779-4a46-d2485baf9595@google.com Signed-off-by: Hugh Dickins Reviewed-by: Yang Shi Cc: Alistair Popple Cc: Anshuman Khandual Cc: Axel Rasmussen Cc: Christophe Leroy Cc: Christoph Hellwig Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Mel Gorman Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Pavel Tatashin Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ralph Campbell Cc: Ryan Roberts Cc: SeongJae Park Cc: Song Liu Cc: Steven Price Cc: Suren Baghdasaryan Cc: Thomas Hellström Cc: Will Deacon Cc: Yu Zhao Cc: Zack Rusin Signed-off-by: Andrew Morton --- mm/huge_memory.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e21b3e3eb994..31bc8fa768e3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2037,6 +2037,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; pgtable_t pgtable; pmd_t _pmd, old_pmd; + unsigned long addr; + pte_t *pte; int i; /* @@ -2052,17 +2054,20 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); - for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { - pte_t *pte, entry; - entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot); + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); + for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) { + pte_t entry; + + entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot); entry = pte_mkspecial(entry); if (pmd_uffd_wp(old_pmd)) entry = pte_mkuffd_wp(entry); - pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte_none(*pte)); - set_pte_at(mm, haddr, pte, entry); - pte_unmap(pte); + set_pte_at(mm, addr, pte, entry); + pte++; } + pte_unmap(pte - 1); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); } @@ -2077,6 +2082,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false; bool anon_exclusive = false, dirty = false; unsigned long addr; + pte_t *pte; int i; VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); @@ -2205,8 +2211,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) { - pte_t entry, *pte; + pte_t entry; /* * Note that NUMA hinting access restrictions are not * transferred to avoid any possibility of altering @@ -2249,11 +2257,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_mkuffd_wp(entry); page_add_anon_rmap(page + i, vma, addr, false); } - pte = pte_offset_map(&_pmd, addr); - BUG_ON(!pte_none(*pte)); + VM_BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - pte_unmap(pte); + pte++; } + pte_unmap(pte - 1); if (!pmd_migration) page_remove_rmap(page, vma, true);