Merge branch 'akpm' (patches from Andrew)
Mergr misc fixes from Andrew Morton: "11 fixes" Mostly VM fixes, one psi polling fix, and one parisc build fix. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm/kasan: fix false positive invalid-free reports with CONFIG_KASAN_SW_TAGS=y mm/zsmalloc.c: fix race condition in zs_destroy_pool mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely mm, page_owner: handle THP splits correctly userfaultfd_release: always remove uffd flags and clear vm_userfaultfd_ctx psi: get poll_work to run when calling poll syscall next time mm: memcontrol: flush percpu vmevents before releasing memcg mm: memcontrol: flush percpu vmstats before releasing memcg parisc: fix compilation errrors mm, page_alloc: move_freepages should not examine struct page of reserved memory mm/z3fold.c: fix race between migration and destruction
This commit is contained in:
commit
f47edb59bb
|
@ -2,6 +2,7 @@
|
||||||
#ifndef _PARISC_PGTABLE_H
|
#ifndef _PARISC_PGTABLE_H
|
||||||
#define _PARISC_PGTABLE_H
|
#define _PARISC_PGTABLE_H
|
||||||
|
|
||||||
|
#include <asm/page.h>
|
||||||
#include <asm-generic/4level-fixup.h>
|
#include <asm-generic/4level-fixup.h>
|
||||||
|
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
|
@ -98,8 +99,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#include <asm/page.h>
|
|
||||||
|
|
||||||
#define pte_ERROR(e) \
|
#define pte_ERROR(e) \
|
||||||
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
|
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||||
#define pmd_ERROR(e) \
|
#define pmd_ERROR(e) \
|
||||||
|
|
|
@ -880,6 +880,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||||
/* len == 0 means wake all */
|
/* len == 0 means wake all */
|
||||||
struct userfaultfd_wake_range range = { .len = 0, };
|
struct userfaultfd_wake_range range = { .len = 0, };
|
||||||
unsigned long new_flags;
|
unsigned long new_flags;
|
||||||
|
bool still_valid;
|
||||||
|
|
||||||
WRITE_ONCE(ctx->released, true);
|
WRITE_ONCE(ctx->released, true);
|
||||||
|
|
||||||
|
@ -895,8 +896,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||||
* taking the mmap_sem for writing.
|
* taking the mmap_sem for writing.
|
||||||
*/
|
*/
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
if (!mmget_still_valid(mm))
|
still_valid = mmget_still_valid(mm);
|
||||||
goto skip_mm;
|
|
||||||
prev = NULL;
|
prev = NULL;
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -907,19 +907,20 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
|
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
|
||||||
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
if (still_valid) {
|
||||||
new_flags, vma->anon_vma,
|
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
||||||
vma->vm_file, vma->vm_pgoff,
|
new_flags, vma->anon_vma,
|
||||||
vma_policy(vma),
|
vma->vm_file, vma->vm_pgoff,
|
||||||
NULL_VM_UFFD_CTX);
|
vma_policy(vma),
|
||||||
if (prev)
|
NULL_VM_UFFD_CTX);
|
||||||
vma = prev;
|
if (prev)
|
||||||
else
|
vma = prev;
|
||||||
prev = vma;
|
else
|
||||||
|
prev = vma;
|
||||||
|
}
|
||||||
vma->vm_flags = new_flags;
|
vma->vm_flags = new_flags;
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||||
}
|
}
|
||||||
skip_mm:
|
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
wakeup:
|
wakeup:
|
||||||
|
|
|
@ -1131,7 +1131,15 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||||
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
||||||
*/
|
*/
|
||||||
if (kworker_to_destroy) {
|
if (kworker_to_destroy) {
|
||||||
|
/*
|
||||||
|
* After the RCU grace period has expired, the worker
|
||||||
|
* can no longer be found through group->poll_kworker.
|
||||||
|
* But it might have been already scheduled before
|
||||||
|
* that - deschedule it cleanly before destroying it.
|
||||||
|
*/
|
||||||
kthread_cancel_delayed_work_sync(&group->poll_work);
|
kthread_cancel_delayed_work_sync(&group->poll_work);
|
||||||
|
atomic_set(&group->poll_scheduled, 0);
|
||||||
|
|
||||||
kthread_destroy_worker(kworker_to_destroy);
|
kthread_destroy_worker(kworker_to_destroy);
|
||||||
}
|
}
|
||||||
kfree(t);
|
kfree(t);
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#include <linux/shmem_fs.h>
|
#include <linux/shmem_fs.h>
|
||||||
#include <linux/oom.h>
|
#include <linux/oom.h>
|
||||||
#include <linux/numa.h>
|
#include <linux/numa.h>
|
||||||
|
#include <linux/page_owner.h>
|
||||||
|
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
|
@ -2516,6 +2517,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||||
}
|
}
|
||||||
|
|
||||||
ClearPageCompound(head);
|
ClearPageCompound(head);
|
||||||
|
|
||||||
|
split_page_owner(head, HPAGE_PMD_ORDER);
|
||||||
|
|
||||||
/* See comment in __split_huge_page_tail() */
|
/* See comment in __split_huge_page_tail() */
|
||||||
if (PageAnon(head)) {
|
if (PageAnon(head)) {
|
||||||
/* Additional pin to swap cache */
|
/* Additional pin to swap cache */
|
||||||
|
|
|
@ -407,8 +407,14 @@ static inline bool shadow_invalid(u8 tag, s8 shadow_byte)
|
||||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
||||||
return shadow_byte < 0 ||
|
return shadow_byte < 0 ||
|
||||||
shadow_byte >= KASAN_SHADOW_SCALE_SIZE;
|
shadow_byte >= KASAN_SHADOW_SCALE_SIZE;
|
||||||
else
|
|
||||||
return tag != (u8)shadow_byte;
|
/* else CONFIG_KASAN_SW_TAGS: */
|
||||||
|
if ((u8)shadow_byte == KASAN_TAG_INVALID)
|
||||||
|
return true;
|
||||||
|
if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
|
static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
|
||||||
|
|
|
@ -3260,6 +3260,60 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
|
||||||
|
{
|
||||||
|
unsigned long stat[MEMCG_NR_STAT];
|
||||||
|
struct mem_cgroup *mi;
|
||||||
|
int node, cpu, i;
|
||||||
|
|
||||||
|
for (i = 0; i < MEMCG_NR_STAT; i++)
|
||||||
|
stat[i] = 0;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
for (i = 0; i < MEMCG_NR_STAT; i++)
|
||||||
|
stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]);
|
||||||
|
|
||||||
|
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
|
||||||
|
for (i = 0; i < MEMCG_NR_STAT; i++)
|
||||||
|
atomic_long_add(stat[i], &mi->vmstats[i]);
|
||||||
|
|
||||||
|
for_each_node(node) {
|
||||||
|
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
|
||||||
|
struct mem_cgroup_per_node *pi;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||||
|
stat[i] = 0;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||||
|
stat[i] += raw_cpu_read(
|
||||||
|
pn->lruvec_stat_cpu->count[i]);
|
||||||
|
|
||||||
|
for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
|
||||||
|
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||||
|
atomic_long_add(stat[i], &pi->lruvec_stat[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
|
||||||
|
{
|
||||||
|
unsigned long events[NR_VM_EVENT_ITEMS];
|
||||||
|
struct mem_cgroup *mi;
|
||||||
|
int cpu, i;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
|
||||||
|
events[i] = 0;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
|
||||||
|
events[i] += raw_cpu_read(
|
||||||
|
memcg->vmstats_percpu->events[i]);
|
||||||
|
|
||||||
|
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
|
||||||
|
for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
|
||||||
|
atomic_long_add(events[i], &mi->vmevents[i]);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
static int memcg_online_kmem(struct mem_cgroup *memcg)
|
static int memcg_online_kmem(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
|
@ -4682,6 +4736,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
int node;
|
int node;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush percpu vmstats and vmevents to guarantee the value correctness
|
||||||
|
* on parent's and all ancestor levels.
|
||||||
|
*/
|
||||||
|
memcg_flush_percpu_vmstats(memcg);
|
||||||
|
memcg_flush_percpu_vmevents(memcg);
|
||||||
for_each_node(node)
|
for_each_node(node)
|
||||||
free_mem_cgroup_per_node_info(memcg, node);
|
free_mem_cgroup_per_node_info(memcg, node);
|
||||||
free_percpu(memcg->vmstats_percpu);
|
free_percpu(memcg->vmstats_percpu);
|
||||||
|
|
|
@ -2238,27 +2238,12 @@ static int move_freepages(struct zone *zone,
|
||||||
unsigned int order;
|
unsigned int order;
|
||||||
int pages_moved = 0;
|
int pages_moved = 0;
|
||||||
|
|
||||||
#ifndef CONFIG_HOLES_IN_ZONE
|
|
||||||
/*
|
|
||||||
* page_zone is not safe to call in this context when
|
|
||||||
* CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
|
|
||||||
* anyway as we check zone boundaries in move_freepages_block().
|
|
||||||
* Remove at a later date when no bug reports exist related to
|
|
||||||
* grouping pages by mobility
|
|
||||||
*/
|
|
||||||
VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
|
|
||||||
pfn_valid(page_to_pfn(end_page)) &&
|
|
||||||
page_zone(start_page) != page_zone(end_page));
|
|
||||||
#endif
|
|
||||||
for (page = start_page; page <= end_page;) {
|
for (page = start_page; page <= end_page;) {
|
||||||
if (!pfn_valid_within(page_to_pfn(page))) {
|
if (!pfn_valid_within(page_to_pfn(page))) {
|
||||||
page++;
|
page++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure we are not inadvertently changing nodes */
|
|
||||||
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
|
||||||
|
|
||||||
if (!PageBuddy(page)) {
|
if (!PageBuddy(page)) {
|
||||||
/*
|
/*
|
||||||
* We assume that pages that could be isolated for
|
* We assume that pages that could be isolated for
|
||||||
|
@ -2273,6 +2258,10 @@ static int move_freepages(struct zone *zone,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure we are not inadvertently changing nodes */
|
||||||
|
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
||||||
|
VM_BUG_ON_PAGE(page_zone(page) != zone, page);
|
||||||
|
|
||||||
order = page_order(page);
|
order = page_order(page);
|
||||||
move_to_free_area(page, &zone->free_area[order], migratetype);
|
move_to_free_area(page, &zone->free_area[order], migratetype);
|
||||||
page += 1 << order;
|
page += 1 << order;
|
||||||
|
|
89
mm/z3fold.c
89
mm/z3fold.c
|
@ -41,6 +41,7 @@
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/wait.h>
|
||||||
#include <linux/zpool.h>
|
#include <linux/zpool.h>
|
||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
|
|
||||||
|
@ -145,6 +146,8 @@ struct z3fold_header {
|
||||||
* @release_wq: workqueue for safe page release
|
* @release_wq: workqueue for safe page release
|
||||||
* @work: work_struct for safe page release
|
* @work: work_struct for safe page release
|
||||||
* @inode: inode for z3fold pseudo filesystem
|
* @inode: inode for z3fold pseudo filesystem
|
||||||
|
* @destroying: bool to stop migration once we start destruction
|
||||||
|
* @isolated: int to count the number of pages currently in isolation
|
||||||
*
|
*
|
||||||
* This structure is allocated at pool creation time and maintains metadata
|
* This structure is allocated at pool creation time and maintains metadata
|
||||||
* pertaining to a particular z3fold pool.
|
* pertaining to a particular z3fold pool.
|
||||||
|
@ -163,8 +166,11 @@ struct z3fold_pool {
|
||||||
const struct zpool_ops *zpool_ops;
|
const struct zpool_ops *zpool_ops;
|
||||||
struct workqueue_struct *compact_wq;
|
struct workqueue_struct *compact_wq;
|
||||||
struct workqueue_struct *release_wq;
|
struct workqueue_struct *release_wq;
|
||||||
|
struct wait_queue_head isolate_wait;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
bool destroying;
|
||||||
|
int isolated;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
|
||||||
goto out_c;
|
goto out_c;
|
||||||
spin_lock_init(&pool->lock);
|
spin_lock_init(&pool->lock);
|
||||||
spin_lock_init(&pool->stale_lock);
|
spin_lock_init(&pool->stale_lock);
|
||||||
|
init_waitqueue_head(&pool->isolate_wait);
|
||||||
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
|
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
|
||||||
if (!pool->unbuddied)
|
if (!pool->unbuddied)
|
||||||
goto out_pool;
|
goto out_pool;
|
||||||
|
@ -808,6 +815,15 @@ out:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool pool_isolated_are_drained(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
ret = pool->isolated == 0;
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* z3fold_destroy_pool() - destroys an existing z3fold pool
|
* z3fold_destroy_pool() - destroys an existing z3fold pool
|
||||||
* @pool: the z3fold pool to be destroyed
|
* @pool: the z3fold pool to be destroyed
|
||||||
|
@ -817,6 +833,22 @@ out:
|
||||||
static void z3fold_destroy_pool(struct z3fold_pool *pool)
|
static void z3fold_destroy_pool(struct z3fold_pool *pool)
|
||||||
{
|
{
|
||||||
kmem_cache_destroy(pool->c_handle);
|
kmem_cache_destroy(pool->c_handle);
|
||||||
|
/*
|
||||||
|
* We set pool-> destroying under lock to ensure that
|
||||||
|
* z3fold_page_isolate() sees any changes to destroying. This way we
|
||||||
|
* avoid the need for any memory barriers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
pool->destroying = true;
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to ensure that no pages are being migrated while we destroy
|
||||||
|
* these workqueues, as migration can queue work on either of the
|
||||||
|
* workqueues.
|
||||||
|
*/
|
||||||
|
wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to destroy pool->compact_wq before pool->release_wq,
|
* We need to destroy pool->compact_wq before pool->release_wq,
|
||||||
|
@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
|
||||||
return atomic64_read(&pool->pages_nr);
|
return atomic64_read(&pool->pages_nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* z3fold_dec_isolated() expects to be called while pool->lock is held.
|
||||||
|
*/
|
||||||
|
static void z3fold_dec_isolated(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
assert_spin_locked(&pool->lock);
|
||||||
|
VM_BUG_ON(pool->isolated <= 0);
|
||||||
|
pool->isolated--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have no more isolated pages, we have to see if
|
||||||
|
* z3fold_destroy_pool() is waiting for a signal.
|
||||||
|
*/
|
||||||
|
if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
|
||||||
|
wake_up_all(&pool->isolate_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void z3fold_inc_isolated(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
pool->isolated++;
|
||||||
|
}
|
||||||
|
|
||||||
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
||||||
{
|
{
|
||||||
struct z3fold_header *zhdr;
|
struct z3fold_header *zhdr;
|
||||||
|
@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
||||||
spin_lock(&pool->lock);
|
spin_lock(&pool->lock);
|
||||||
if (!list_empty(&page->lru))
|
if (!list_empty(&page->lru))
|
||||||
list_del(&page->lru);
|
list_del(&page->lru);
|
||||||
|
/*
|
||||||
|
* We need to check for destruction while holding pool->lock, as
|
||||||
|
* otherwise destruction could see 0 isolated pages, and
|
||||||
|
* proceed.
|
||||||
|
*/
|
||||||
|
if (unlikely(pool->destroying)) {
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
/*
|
||||||
|
* If this page isn't stale, somebody else holds a
|
||||||
|
* reference to it. Let't drop our refcount so that they
|
||||||
|
* can call the release logic.
|
||||||
|
*/
|
||||||
|
if (unlikely(kref_put(&zhdr->refcount,
|
||||||
|
release_z3fold_page_locked))) {
|
||||||
|
/*
|
||||||
|
* If we get here we have kref problems, so we
|
||||||
|
* should freak out.
|
||||||
|
*/
|
||||||
|
WARN(1, "Z3fold is experiencing kref problems\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
z3fold_page_unlock(zhdr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
z3fold_inc_isolated(pool);
|
||||||
spin_unlock(&pool->lock);
|
spin_unlock(&pool->lock);
|
||||||
z3fold_page_unlock(zhdr);
|
z3fold_page_unlock(zhdr);
|
||||||
return true;
|
return true;
|
||||||
|
@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
|
||||||
|
|
||||||
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
|
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
|
||||||
page_mapcount_reset(page);
|
page_mapcount_reset(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page)
|
||||||
INIT_LIST_HEAD(&page->lru);
|
INIT_LIST_HEAD(&page->lru);
|
||||||
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
|
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
|
||||||
atomic64_dec(&pool->pages_nr);
|
atomic64_dec(&pool->pages_nr);
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
spin_lock(&pool->lock);
|
spin_lock(&pool->lock);
|
||||||
list_add(&page->lru, &pool->lru);
|
list_add(&page->lru, &pool->lru);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
spin_unlock(&pool->lock);
|
spin_unlock(&pool->lock);
|
||||||
z3fold_page_unlock(zhdr);
|
z3fold_page_unlock(zhdr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
#include <linux/pseudo_fs.h>
|
#include <linux/pseudo_fs.h>
|
||||||
#include <linux/migrate.h>
|
#include <linux/migrate.h>
|
||||||
|
#include <linux/wait.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
@ -268,6 +269,10 @@ struct zs_pool {
|
||||||
#ifdef CONFIG_COMPACTION
|
#ifdef CONFIG_COMPACTION
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct work_struct free_work;
|
struct work_struct free_work;
|
||||||
|
/* A wait queue for when migration races with async_free_zspage() */
|
||||||
|
struct wait_queue_head migration_wait;
|
||||||
|
atomic_long_t isolated_pages;
|
||||||
|
bool destroying;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1862,6 +1867,31 @@ static void dec_zspage_isolation(struct zspage *zspage)
|
||||||
zspage->isolated--;
|
zspage->isolated--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void putback_zspage_deferred(struct zs_pool *pool,
|
||||||
|
struct size_class *class,
|
||||||
|
struct zspage *zspage)
|
||||||
|
{
|
||||||
|
enum fullness_group fg;
|
||||||
|
|
||||||
|
fg = putback_zspage(class, zspage);
|
||||||
|
if (fg == ZS_EMPTY)
|
||||||
|
schedule_work(&pool->free_work);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void zs_pool_dec_isolated(struct zs_pool *pool)
|
||||||
|
{
|
||||||
|
VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
|
||||||
|
atomic_long_dec(&pool->isolated_pages);
|
||||||
|
/*
|
||||||
|
* There's no possibility of racing, since wait_for_isolated_drain()
|
||||||
|
* checks the isolated count under &class->lock after enqueuing
|
||||||
|
* on migration_wait.
|
||||||
|
*/
|
||||||
|
if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
|
||||||
|
wake_up_all(&pool->migration_wait);
|
||||||
|
}
|
||||||
|
|
||||||
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
|
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
|
||||||
struct page *newpage, struct page *oldpage)
|
struct page *newpage, struct page *oldpage)
|
||||||
{
|
{
|
||||||
|
@ -1931,6 +1961,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
|
||||||
*/
|
*/
|
||||||
if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
|
if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
|
||||||
get_zspage_mapping(zspage, &class_idx, &fullness);
|
get_zspage_mapping(zspage, &class_idx, &fullness);
|
||||||
|
atomic_long_inc(&pool->isolated_pages);
|
||||||
remove_zspage(class, zspage, fullness);
|
remove_zspage(class, zspage, fullness);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2030,8 +2061,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
|
||||||
* Page migration is done so let's putback isolated zspage to
|
* Page migration is done so let's putback isolated zspage to
|
||||||
* the list if @page is final isolated subpage in the zspage.
|
* the list if @page is final isolated subpage in the zspage.
|
||||||
*/
|
*/
|
||||||
if (!is_zspage_isolated(zspage))
|
if (!is_zspage_isolated(zspage)) {
|
||||||
putback_zspage(class, zspage);
|
/*
|
||||||
|
* We cannot race with zs_destroy_pool() here because we wait
|
||||||
|
* for isolation to hit zero before we start destroying.
|
||||||
|
* Also, we ensure that everyone can see pool->destroying before
|
||||||
|
* we start waiting.
|
||||||
|
*/
|
||||||
|
putback_zspage_deferred(pool, class, zspage);
|
||||||
|
zs_pool_dec_isolated(pool);
|
||||||
|
}
|
||||||
|
|
||||||
reset_page(page);
|
reset_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
@ -2077,13 +2116,12 @@ static void zs_page_putback(struct page *page)
|
||||||
spin_lock(&class->lock);
|
spin_lock(&class->lock);
|
||||||
dec_zspage_isolation(zspage);
|
dec_zspage_isolation(zspage);
|
||||||
if (!is_zspage_isolated(zspage)) {
|
if (!is_zspage_isolated(zspage)) {
|
||||||
fg = putback_zspage(class, zspage);
|
|
||||||
/*
|
/*
|
||||||
* Due to page_lock, we cannot free zspage immediately
|
* Due to page_lock, we cannot free zspage immediately
|
||||||
* so let's defer.
|
* so let's defer.
|
||||||
*/
|
*/
|
||||||
if (fg == ZS_EMPTY)
|
putback_zspage_deferred(pool, class, zspage);
|
||||||
schedule_work(&pool->free_work);
|
zs_pool_dec_isolated(pool);
|
||||||
}
|
}
|
||||||
spin_unlock(&class->lock);
|
spin_unlock(&class->lock);
|
||||||
}
|
}
|
||||||
|
@ -2107,8 +2145,36 @@ static int zs_register_migration(struct zs_pool *pool)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool pool_isolated_are_drained(struct zs_pool *pool)
|
||||||
|
{
|
||||||
|
return atomic_long_read(&pool->isolated_pages) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Function for resolving migration */
|
||||||
|
static void wait_for_isolated_drain(struct zs_pool *pool)
|
||||||
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're in the process of destroying the pool, so there are no
|
||||||
|
* active allocations. zs_page_isolate() fails for completely free
|
||||||
|
* zspages, so we need only wait for the zs_pool's isolated
|
||||||
|
* count to hit zero.
|
||||||
|
*/
|
||||||
|
wait_event(pool->migration_wait,
|
||||||
|
pool_isolated_are_drained(pool));
|
||||||
|
}
|
||||||
|
|
||||||
static void zs_unregister_migration(struct zs_pool *pool)
|
static void zs_unregister_migration(struct zs_pool *pool)
|
||||||
{
|
{
|
||||||
|
pool->destroying = true;
|
||||||
|
/*
|
||||||
|
* We need a memory barrier here to ensure global visibility of
|
||||||
|
* pool->destroying. Thus pool->isolated pages will either be 0 in which
|
||||||
|
* case we don't care, or it will be > 0 and pool->destroying will
|
||||||
|
* ensure that we wake up once isolation hits 0.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
wait_for_isolated_drain(pool); /* This can block */
|
||||||
flush_work(&pool->free_work);
|
flush_work(&pool->free_work);
|
||||||
iput(pool->inode);
|
iput(pool->inode);
|
||||||
}
|
}
|
||||||
|
@ -2346,6 +2412,8 @@ struct zs_pool *zs_create_pool(const char *name)
|
||||||
if (!pool->name)
|
if (!pool->name)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
init_waitqueue_head(&pool->migration_wait);
|
||||||
|
|
||||||
if (create_cache(pool))
|
if (create_cache(pool))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue