mm,thp: add read-only THP support for (non-shmem) FS
This patch is (hopefully) the first step to enable THP for non-shmem filesystems. This patch enables an application to put part of its text sections to THP via madvise, for example: madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE); We tried to reuse the logic for THP on tmpfs. Currently, write is not supported for non-shmem THP. khugepaged will only process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is execve(). This requirement limits non-shmem THP to text sections. The next patch will handle writes, which would only happen when the all the vmas with VM_DENYWRITE are unmapped. An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this feature. [songliubraving@fb.com: fix build without CONFIG_SHMEM] Link: http://lkml.kernel.org/r/F53407FB-96CC-42E8-9862-105C92CC2B98@fb.com [songliubraving@fb.com: fix double unlock in collapse_file()] Link: http://lkml.kernel.org/r/B960CBFA-8EFC-4DA4-ABC5-1977FFF2CA57@fb.com Link: http://lkml.kernel.org/r/20190801184244.3169074-7-songliubraving@fb.com Signed-off-by: Song Liu <songliubraving@fb.com> Acked-by: Rik van Riel <riel@surriel.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: Hillf Danton <hdanton@sina.com> Cc: Hugh Dickins <hughd@google.com> Cc: William Kucharski <william.kucharski@oracle.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
579c571e2e
commit
99cb0dbd47
11
mm/Kconfig
11
mm/Kconfig
|
@ -712,6 +712,17 @@ config GUP_BENCHMARK
|
|||
config GUP_GET_PTE_LOW_HIGH
|
||||
bool
|
||||
|
||||
config READ_ONLY_THP_FOR_FS
|
||||
bool "Read-only THP for filesystems (EXPERIMENTAL)"
|
||||
depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
|
||||
|
||||
help
|
||||
Allow khugepaged to put read-only file-backed pages in THP.
|
||||
|
||||
This is marked experimental because it is a new feature. Write
|
||||
support of file THPs will be developed in the next few release
|
||||
cycles.
|
||||
|
||||
config ARCH_HAS_PTE_SPECIAL
|
||||
bool
|
||||
|
||||
|
|
|
@ -203,8 +203,8 @@ static void unaccount_page_cache_page(struct address_space *mapping,
|
|||
__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
|
||||
if (PageTransHuge(page))
|
||||
__dec_node_page_state(page, NR_SHMEM_THPS);
|
||||
} else {
|
||||
VM_BUG_ON_PAGE(PageTransHuge(page), page);
|
||||
} else if (PageTransHuge(page)) {
|
||||
__dec_node_page_state(page, NR_FILE_THPS);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
149
mm/khugepaged.c
149
mm/khugepaged.c
|
@ -48,6 +48,7 @@ enum scan_result {
|
|||
SCAN_CGROUP_CHARGE_FAIL,
|
||||
SCAN_EXCEED_SWAP_PTE,
|
||||
SCAN_TRUNCATED,
|
||||
SCAN_PAGE_HAS_PRIVATE,
|
||||
};
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
@ -404,7 +405,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
|
|||
(vm_flags & VM_NOHUGEPAGE) ||
|
||||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
|
||||
return false;
|
||||
if (shmem_file(vma->vm_file)) {
|
||||
|
||||
if (shmem_file(vma->vm_file) ||
|
||||
(IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
|
||||
vma->vm_file &&
|
||||
(vm_flags & VM_DENYWRITE))) {
|
||||
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
|
||||
return false;
|
||||
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
|
||||
|
@ -456,8 +461,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
|
|||
unsigned long hstart, hend;
|
||||
|
||||
/*
|
||||
* khugepaged does not yet work on non-shmem files or special
|
||||
* mappings. And file-private shmem THP is not supported.
|
||||
* khugepaged only supports read-only files for non-shmem files.
|
||||
* khugepaged does not yet work on special mappings. And
|
||||
* file-private shmem THP is not supported.
|
||||
*/
|
||||
if (!hugepage_vma_check(vma, vm_flags))
|
||||
return 0;
|
||||
|
@ -1287,12 +1293,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||
}
|
||||
|
||||
/**
|
||||
* collapse_file - collapse small tmpfs/shmem pages into huge one.
|
||||
* collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
|
||||
*
|
||||
* Basic scheme is simple, details are more complex:
|
||||
* - allocate and lock a new huge page;
|
||||
* - scan page cache replacing old pages with the new one
|
||||
* + swap in pages if necessary;
|
||||
* + swap/gup in pages if necessary;
|
||||
* + fill in gaps;
|
||||
* + keep old pages around in case rollback is required;
|
||||
* - if replacing succeeds:
|
||||
|
@ -1316,7 +1322,9 @@ static void collapse_file(struct mm_struct *mm,
|
|||
LIST_HEAD(pagelist);
|
||||
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
|
||||
int nr_none = 0, result = SCAN_SUCCEED;
|
||||
bool is_shmem = shmem_file(file);
|
||||
|
||||
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
|
||||
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
||||
|
||||
/* Only allocate from the target node */
|
||||
|
@ -1348,7 +1356,8 @@ static void collapse_file(struct mm_struct *mm,
|
|||
} while (1);
|
||||
|
||||
__SetPageLocked(new_page);
|
||||
__SetPageSwapBacked(new_page);
|
||||
if (is_shmem)
|
||||
__SetPageSwapBacked(new_page);
|
||||
new_page->index = start;
|
||||
new_page->mapping = mapping;
|
||||
|
||||
|
@ -1363,41 +1372,75 @@ static void collapse_file(struct mm_struct *mm,
|
|||
struct page *page = xas_next(&xas);
|
||||
|
||||
VM_BUG_ON(index != xas.xa_index);
|
||||
if (!page) {
|
||||
/*
|
||||
* Stop if extent has been truncated or hole-punched,
|
||||
* and is now completely empty.
|
||||
*/
|
||||
if (index == start) {
|
||||
if (!xas_next_entry(&xas, end - 1)) {
|
||||
result = SCAN_TRUNCATED;
|
||||
if (is_shmem) {
|
||||
if (!page) {
|
||||
/*
|
||||
* Stop if extent has been truncated or
|
||||
* hole-punched, and is now completely
|
||||
* empty.
|
||||
*/
|
||||
if (index == start) {
|
||||
if (!xas_next_entry(&xas, end - 1)) {
|
||||
result = SCAN_TRUNCATED;
|
||||
goto xa_locked;
|
||||
}
|
||||
xas_set(&xas, index);
|
||||
}
|
||||
if (!shmem_charge(mapping->host, 1)) {
|
||||
result = SCAN_FAIL;
|
||||
goto xa_locked;
|
||||
}
|
||||
xas_set(&xas, index);
|
||||
xas_store(&xas, new_page);
|
||||
nr_none++;
|
||||
continue;
|
||||
}
|
||||
if (!shmem_charge(mapping->host, 1)) {
|
||||
result = SCAN_FAIL;
|
||||
|
||||
if (xa_is_value(page) || !PageUptodate(page)) {
|
||||
xas_unlock_irq(&xas);
|
||||
/* swap in or instantiate fallocated page */
|
||||
if (shmem_getpage(mapping->host, index, &page,
|
||||
SGP_NOHUGE)) {
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
xas_unlock_irq(&xas);
|
||||
} else {
|
||||
result = SCAN_PAGE_LOCK;
|
||||
goto xa_locked;
|
||||
}
|
||||
xas_store(&xas, new_page);
|
||||
nr_none++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xa_is_value(page) || !PageUptodate(page)) {
|
||||
xas_unlock_irq(&xas);
|
||||
/* swap in or instantiate fallocated page */
|
||||
if (shmem_getpage(mapping->host, index, &page,
|
||||
SGP_NOHUGE)) {
|
||||
} else { /* !is_shmem */
|
||||
if (!page || xa_is_value(page)) {
|
||||
xas_unlock_irq(&xas);
|
||||
page_cache_sync_readahead(mapping, &file->f_ra,
|
||||
file, index,
|
||||
PAGE_SIZE);
|
||||
/* drain pagevecs to help isolate_lru_page() */
|
||||
lru_add_drain();
|
||||
page = find_lock_page(mapping, index);
|
||||
if (unlikely(page == NULL)) {
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
} else if (!PageUptodate(page)) {
|
||||
xas_unlock_irq(&xas);
|
||||
wait_on_page_locked(page);
|
||||
if (!trylock_page(page)) {
|
||||
result = SCAN_PAGE_LOCK;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
get_page(page);
|
||||
} else if (PageDirty(page)) {
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
goto xa_locked;
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
xas_unlock_irq(&xas);
|
||||
} else {
|
||||
result = SCAN_PAGE_LOCK;
|
||||
goto xa_locked;
|
||||
}
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
xas_unlock_irq(&xas);
|
||||
} else {
|
||||
result = SCAN_PAGE_LOCK;
|
||||
goto xa_locked;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1426,6 +1469,12 @@ static void collapse_file(struct mm_struct *mm,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (page_has_private(page) &&
|
||||
!try_to_release_page(page, GFP_KERNEL)) {
|
||||
result = SCAN_PAGE_HAS_PRIVATE;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (page_mapped(page))
|
||||
unmap_mapping_pages(mapping, index, 1, false);
|
||||
|
||||
|
@ -1463,12 +1512,18 @@ out_unlock:
|
|||
goto xa_unlocked;
|
||||
}
|
||||
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
if (is_shmem)
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
else
|
||||
__inc_node_page_state(new_page, NR_FILE_THPS);
|
||||
|
||||
if (nr_none) {
|
||||
struct zone *zone = page_zone(new_page);
|
||||
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
||||
if (is_shmem)
|
||||
__mod_node_page_state(zone->zone_pgdat,
|
||||
NR_SHMEM, nr_none);
|
||||
}
|
||||
|
||||
xa_locked:
|
||||
|
@ -1506,10 +1561,15 @@ xa_unlocked:
|
|||
|
||||
SetPageUptodate(new_page);
|
||||
page_ref_add(new_page, HPAGE_PMD_NR - 1);
|
||||
set_page_dirty(new_page);
|
||||
mem_cgroup_commit_charge(new_page, memcg, false, true);
|
||||
|
||||
if (is_shmem) {
|
||||
set_page_dirty(new_page);
|
||||
lru_cache_add_anon(new_page);
|
||||
} else {
|
||||
lru_cache_add_file(new_page);
|
||||
}
|
||||
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
|
||||
lru_cache_add_anon(new_page);
|
||||
|
||||
/*
|
||||
* Remove pte page tables, so we can re-fault the page as huge.
|
||||
|
@ -1524,7 +1584,9 @@ xa_unlocked:
|
|||
/* Something went wrong: roll back page cache changes */
|
||||
xas_lock_irq(&xas);
|
||||
mapping->nrpages -= nr_none;
|
||||
shmem_uncharge(mapping->host, nr_none);
|
||||
|
||||
if (is_shmem)
|
||||
shmem_uncharge(mapping->host, nr_none);
|
||||
|
||||
xas_set(&xas, start);
|
||||
xas_for_each(&xas, page, end - 1) {
|
||||
|
@ -1607,7 +1669,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
|
|||
break;
|
||||
}
|
||||
|
||||
if (page_count(page) != 1 + page_mapcount(page)) {
|
||||
if (page_count(page) !=
|
||||
1 + page_mapcount(page) + page_has_private(page)) {
|
||||
result = SCAN_PAGE_COUNT;
|
||||
break;
|
||||
}
|
||||
|
@ -1713,11 +1776,13 @@ skip:
|
|||
VM_BUG_ON(khugepaged_scan.address < hstart ||
|
||||
khugepaged_scan.address + HPAGE_PMD_SIZE >
|
||||
hend);
|
||||
if (shmem_file(vma->vm_file)) {
|
||||
if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
|
||||
struct file *file;
|
||||
pgoff_t pgoff = linear_page_index(vma,
|
||||
khugepaged_scan.address);
|
||||
if (!shmem_huge_enabled(vma))
|
||||
|
||||
if (shmem_file(vma->vm_file)
|
||||
&& !shmem_huge_enabled(vma))
|
||||
goto skip;
|
||||
file = get_file(vma->vm_file);
|
||||
up_read(&mm->mmap_sem);
|
||||
|
|
12
mm/rmap.c
12
mm/rmap.c
|
@ -1189,8 +1189,10 @@ void page_add_file_rmap(struct page *page, bool compound)
|
|||
}
|
||||
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
if (PageSwapBacked(page))
|
||||
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
else
|
||||
__inc_node_page_state(page, NR_FILE_PMDMAPPED);
|
||||
} else {
|
||||
if (PageTransCompound(page) && page_mapping(page)) {
|
||||
VM_WARN_ON_ONCE(!PageLocked(page));
|
||||
|
@ -1229,8 +1231,10 @@ static void page_remove_file_rmap(struct page *page, bool compound)
|
|||
}
|
||||
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
if (PageSwapBacked(page))
|
||||
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
else
|
||||
__dec_node_page_state(page, NR_FILE_PMDMAPPED);
|
||||
} else {
|
||||
if (!atomic_add_negative(-1, &page->_mapcount))
|
||||
goto out;
|
||||
|
|
Loading…
Reference in New Issue