Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "11 patches.

  Subsystems affected by this patch series: mm (memcg, memory-failure,
  oom-kill, secretmem, vmalloc, hugetlb, damon, and tools), and ocfs2"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer
  mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()'
  mm: khugepaged: skip huge page collapse for special files
  mm, thp: bail out early in collapse_file for writeback page
  mm/vmalloc: fix numa spreading for large hash tables
  mm/secretmem: avoid letting secretmem_users drop to zero
  ocfs2: fix race between searching chunks and release journal_head from buffer_head
  mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap
  mm: filemap: check if THP has hwpoisoned subpage for PMD page fault
  mm: hwpoison: remove the unnecessary THP check
  memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT
This commit is contained in:
Linus Torvalds 2021-10-29 10:03:07 -07:00
commit 2c04d67ec1
12 changed files with 110 additions and 54 deletions

View File

@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
{ {
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh; struct journal_head *jh;
int ret; int ret = 1;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0; return 0;
@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
if (!buffer_jbd(bg_bh)) if (!buffer_jbd(bg_bh))
return 1; return 1;
jh = bh2jh(bg_bh); jbd_lock_bh_journal_head(bg_bh);
spin_lock(&jh->b_state_lock); if (buffer_jbd(bg_bh)) {
bg = (struct ocfs2_group_desc *) jh->b_committed_data; jh = bh2jh(bg_bh);
if (bg) spin_lock(&jh->b_state_lock);
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); bg = (struct ocfs2_group_desc *) jh->b_committed_data;
else if (bg)
ret = 1; ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
spin_unlock(&jh->b_state_lock); else
ret = 1;
spin_unlock(&jh->b_state_lock);
}
jbd_unlock_bh_journal_head(bg_bh);
return ret; return ret;
} }

View File

@ -171,6 +171,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */ /* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset, PG_double_map = PG_workingset,
#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
* Indicates that at least one subpage is hwpoisoned in the
* THP.
*/
PG_has_hwpoisoned = PG_mappedtodisk,
#endif
/* non-lru isolated movable page */ /* non-lru isolated movable page */
PG_isolated = PG_reclaim, PG_isolated = PG_reclaim,
@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap)
TESTSCFLAG_FALSE(DoubleMap) TESTSCFLAG_FALSE(DoubleMap)
#endif #endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
* PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
* compound page.
*
* This flag is set by hwpoison handler. Cleared by THP split or free page.
*/
PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
#else
PAGEFLAG_FALSE(HasHWPoisoned)
TESTSCFLAG_FALSE(HasHWPoisoned)
#endif
/* /*
* Check if a page is currently marked HWPoisoned. Note that this check is * Check if a page is currently marked HWPoisoned. Note that this check is
* best effort only and inherently racy: there is no way to synchronize with * best effort only and inherently racy: there is no way to synchronize with

View File

@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test)
r = damon_new_region(0, 22); r = damon_new_region(0, 22);
damon_add_region(r, t); damon_add_region(r, t);
damon_split_regions_of(c, t, 2); damon_split_regions_of(c, t, 2);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u); KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
damon_free_target(t); damon_free_target(t);
t = damon_new_target(42); t = damon_new_target(42);
r = damon_new_region(0, 220); r = damon_new_region(0, 220);
damon_add_region(r, t); damon_add_region(r, t);
damon_split_regions_of(c, t, 4); damon_split_regions_of(c, t, 4);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u); KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
damon_free_target(t); damon_free_target(t);
damon_destroy_ctx(c); damon_destroy_ctx(c);
} }

View File

@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* lock lru list/PageCompound, ref frozen by page_ref_freeze */ /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
lruvec = lock_page_lruvec(head); lruvec = lock_page_lruvec(head);
ClearPageHasHWPoisoned(head);
for (i = nr - 1; i >= 1; i--) { for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list); __split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond EOF: drop them from page cache */ /* Some pages can be beyond EOF: drop them from page cache */

View File

@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
if (!transhuge_vma_enabled(vma, vm_flags)) if (!transhuge_vma_enabled(vma, vm_flags))
return false; return false;
if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
vma->vm_pgoff, HPAGE_PMD_NR))
return false;
/* Enabled via shmem mount options or sysfs settings. */ /* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { if (shmem_file(vma->vm_file))
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, return shmem_huge_enabled(vma);
HPAGE_PMD_NR);
}
/* THP settings require madvise. */ /* THP settings require madvise. */
if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
return false; return false;
/* Read-only file mappings need to be aligned for THP to work. */ /* Only regular file is valid */
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) &&
(vm_flags & VM_EXEC)) { (vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, struct inode *inode = vma->vm_file->f_inode;
HPAGE_PMD_NR);
return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode);
} }
if (!vma->anon_vma || vma->vm_ops) if (!vma->anon_vma || vma->vm_ops)
@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm,
filemap_flush(mapping); filemap_flush(mapping);
result = SCAN_FAIL; result = SCAN_FAIL;
goto xa_unlocked; goto xa_unlocked;
} else if (PageWriteback(page)) {
xas_unlock_irq(&xas);
result = SCAN_FAIL;
goto xa_unlocked;
} else if (trylock_page(page)) { } else if (trylock_page(page)) {
get_page(page); get_page(page);
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock; goto out_unlock;
} }
if (!is_shmem && PageDirty(page)) { if (!is_shmem && (PageDirty(page) ||
PageWriteback(page))) {
/* /*
* khugepaged only works on read-only fd, so this * khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed * page is dirty because it hasn't been flushed

View File

@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page)
if (!HWPoisonHandlable(head)) if (!HWPoisonHandlable(head))
return -EBUSY; return -EBUSY;
if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
* This should be better than triggering BUG_ON when kernel
* tries to touch the "partially handled" page.
*/
if (!PageAnon(head)) {
pr_err("Memory failure: %#lx: non anonymous thp\n",
page_to_pfn(page));
return 0;
}
}
if (get_page_unless_zero(head)) { if (get_page_unless_zero(head)) {
if (head == compound_head(page)) if (head == compound_head(page))
return 1; return 1;
@ -1708,6 +1694,20 @@ try_again:
} }
if (PageTransHuge(hpage)) { if (PageTransHuge(hpage)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since
* it is called by soft offline too and it is just called
* for !MF_COUNT_INCREASE. So here seems to be the best
* place.
*
* Don't need care about the above error handling paths for
* get_hwpoison_page() since they handle either free page
* or unhandlable page. The refcount is bumped iff the
* page is a valid handlable page.
*/
SetPageHasHWPoisoned(hpage);
if (try_to_split_thp_page(p, "Memory Failure") < 0) { if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
res = -EBUSY; res = -EBUSY;

View File

@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
if (compound_order(page) != HPAGE_PMD_ORDER) if (compound_order(page) != HPAGE_PMD_ORDER)
return ret; return ret;
/*
* Just backoff if any subpage of a THP is corrupted otherwise
* the corrupted page may mapped by PMD silently to escape the
* check. This kind of THP just can be PTE mapped. Access to
* the corrupted subpage should trigger SIGBUS as expected.
*/
if (unlikely(PageHasHWPoisoned(page)))
return ret;
/* /*
* Archs like ppc64 need additional space to store information * Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that. * related to pte entry. Use the preallocated table for that.

View File

@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
struct task_struct *task; struct task_struct *task;
struct task_struct *p; struct task_struct *p;
unsigned int f_flags; unsigned int f_flags;
bool reap = true; bool reap = false;
struct pid *pid; struct pid *pid;
long ret = 0; long ret = 0;
@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task; goto put_task;
} }
mm = p->mm; if (mmget_not_zero(p->mm)) {
mmgrab(mm); mm = p->mm;
if (task_will_free_mem(p))
/* If the work has been done already, just exit with success */ reap = true;
if (test_bit(MMF_OOM_SKIP, &mm->flags)) else {
reap = false; /* Error only if the work has not been done already */
else if (!task_will_free_mem(p)) { if (!test_bit(MMF_OOM_SKIP, &mm->flags))
reap = false; ret = -EINVAL;
ret = -EINVAL; }
} }
task_unlock(p); task_unlock(p);
@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mmap_read_unlock(mm); mmap_read_unlock(mm);
drop_mm: drop_mm:
mmdrop(mm); if (mm)
mmput(mm);
put_task: put_task:
put_task_struct(task); put_task_struct(task);
put_pid: put_pid:

View File

@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
if (compound) if (compound) {
ClearPageDoubleMap(page); ClearPageDoubleMap(page);
ClearPageHasHWPoisoned(page);
}
for (i = 1; i < (1 << order); i++) { for (i = 1; i < (1 << order); i++) {
if (compound) if (compound)
bad += free_tail_pages_check(page, page + i); bad += free_tail_pages_check(page, page + i);
@ -5223,6 +5225,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(page_array && nr_pages - nr_populated == 0)) if (unlikely(page_array && nr_pages - nr_populated == 0))
goto out; goto out;
/* Bulk allocator does not support memcg accounting. */
if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
goto failed;
/* Use the single page allocator for one page. */ /* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1) if (nr_pages - nr_populated == 1)
goto failed; goto failed;

View File

@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
file->f_flags |= O_LARGEFILE; file->f_flags |= O_LARGEFILE;
fd_install(fd, file);
atomic_inc(&secretmem_users); atomic_inc(&secretmem_users);
fd_install(fd, file);
return fd; return fd;
err_put_fd: err_put_fd:

View File

@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
unsigned int order, unsigned int nr_pages, struct page **pages) unsigned int order, unsigned int nr_pages, struct page **pages)
{ {
unsigned int nr_allocated = 0; unsigned int nr_allocated = 0;
struct page *page;
int i;
/* /*
* For order-0 pages we make use of bulk allocator, if * For order-0 pages we make use of bulk allocator, if
@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* to fails, fallback to a single page allocator that is * to fails, fallback to a single page allocator that is
* more permissive. * more permissive.
*/ */
if (!order) { if (!order && nid != NUMA_NO_NODE) {
while (nr_allocated < nr_pages) { while (nr_allocated < nr_pages) {
unsigned int nr, nr_pages_request; unsigned int nr, nr_pages_request;
@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request) if (nr != nr_pages_request)
break; break;
} }
} else } else if (order)
/* /*
* Compound pages required for remap_vmalloc_page if * Compound pages required for remap_vmalloc_page if
* high-order pages. * high-order pages.
@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
gfp |= __GFP_COMP; gfp |= __GFP_COMP;
/* High-order pages or fallback path if "bulk" fails. */ /* High-order pages or fallback path if "bulk" fails. */
while (nr_allocated < nr_pages) {
struct page *page;
int i;
page = alloc_pages_node(nid, gfp, order); while (nr_allocated < nr_pages) {
if (nid == NUMA_NO_NODE)
page = alloc_pages(gfp, order);
else
page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page)) if (unlikely(!page))
break; break;

View File

@ -341,7 +341,7 @@ void split_file_backed_thp(void)
} }
/* write something to the file, so a file-backed THP can be allocated */ /* write something to the file, so a file-backed THP can be allocated */
num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
close(fd); close(fd);
if (num_written < 1) { if (num_written < 1) {