Merge branch 'akpm' (patches from Andrew)
Merge misx fixes from Andrew Morton: "31 patches. Subsystems affected by this patch series: hotfixes, mm/pagealloc, kexec, ocfs2, lib, mm/slab, mm/slab, mm/slub, mm/swap, mm/pagemap, mm/vmalloc, mm/memcg, mm/gup, mm/thp, mm/vmscan, x86, mm/memory-hotplug, MAINTAINERS" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits) MAINTAINERS: update info for sparse mm/memory_hotplug.c: fix false softlockup during pfn range removal mm: remove vmalloc_exec arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page x86/hyperv: allocate the hypercall page with only read and execute bits mm/memory: fix IO cost for anonymous page mm/swap: fix for "mm: workingset: age nonresident information alongside anonymous pages" mm: workingset: age nonresident information alongside anonymous pages doc: THP CoW fault no longer allocate THP docs: mm/gup: minor documentation update mm/memcontrol.c: prevent missed memory.low load tears mm/memcontrol.c: add missed css_put() mm: memcontrol: handle div0 crash race condition in memory.low mm/vmalloc.c: fix a warning while make xmldocs media: omap3isp: remove cacheflush.h make asm-generic/cacheflush.h more standalone mm/debug_vm_pgtable: fix build failure with powerpc 8xx mm/memory.c: properly pte_offset_map_lock/unlock in vm_insert_pages() mm: fix swap cache node allocation mask slub: cure list_slab_objects() from double fix ...
This commit is contained in:
commit
7c902e2730
|
@ -1356,8 +1356,8 @@ PAGE_SIZE multiple when read back.
|
|||
|
||||
thp_fault_alloc
|
||||
Number of transparent hugepages which were allocated to satisfy
|
||||
a page fault, including COW faults. This counter is not present
|
||||
when CONFIG_TRANSPARENT_HUGEPAGE is not set.
|
||||
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
|
||||
is not set.
|
||||
|
||||
thp_collapse_alloc
|
||||
Number of transparent hugepages which were allocated to allow
|
||||
|
|
|
@ -305,8 +305,7 @@ monitor how successfully the system is providing huge pages for use.
|
|||
|
||||
thp_fault_alloc
|
||||
is incremented every time a huge page is successfully
|
||||
allocated to handle a page fault. This applies to both the
|
||||
first time a page is faulted and for COW faults.
|
||||
allocated to handle a page fault.
|
||||
|
||||
thp_collapse_alloc
|
||||
is incremented by khugepaged when it has found
|
||||
|
|
|
@ -33,7 +33,7 @@ all combinations of get*(), pin*(), FOLL_LONGTERM, and more. Also, the
|
|||
pin_user_pages*() APIs are clearly distinct from the get_user_pages*() APIs, so
|
||||
that's a natural dividing line, and a good point to make separate wrapper calls.
|
||||
In other words, use pin_user_pages*() for DMA-pinned pages, and
|
||||
get_user_pages*() for other cases. There are four cases described later on in
|
||||
get_user_pages*() for other cases. There are five cases described later on in
|
||||
this document, to further clarify that concept.
|
||||
|
||||
FOLL_PIN and FOLL_GET are mutually exclusive for a given gup call. However,
|
||||
|
|
|
@ -16058,8 +16058,10 @@ SPARSE CHECKER
|
|||
M: "Luc Van Oostenryck" <luc.vanoostenryck@gmail.com>
|
||||
L: linux-sparse@vger.kernel.org
|
||||
S: Maintained
|
||||
W: https://sparse.wiki.kernel.org/
|
||||
W: https://sparse.docs.kernel.org/
|
||||
T: git git://git.kernel.org/pub/scm/devel/sparse/sparse.git
|
||||
Q: https://patchwork.kernel.org/project/linux-sparse/list/
|
||||
B: https://bugzilla.kernel.org/enter_bug.cgi?component=Sparse&product=Tools
|
||||
F: include/linux/compiler.h
|
||||
|
||||
SPEAR CLOCK FRAMEWORK SUPPORT
|
||||
|
|
|
@ -120,15 +120,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
|
|||
|
||||
void *alloc_insn_page(void)
|
||||
{
|
||||
void *page;
|
||||
|
||||
page = vmalloc_exec(PAGE_SIZE);
|
||||
if (page) {
|
||||
set_memory_ro((unsigned long)page, 1);
|
||||
set_vm_flush_reset_perms(page);
|
||||
}
|
||||
|
||||
return page;
|
||||
return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
|
||||
NUMA_NO_NODE, __func__);
|
||||
}
|
||||
|
||||
/* arm kprobe: install breakpoint in text */
|
||||
|
|
|
@ -74,8 +74,11 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size)
|
|||
* We need to iterate through the pages, clearing the dcache for
|
||||
* them and setting the cache-inhibit bit.
|
||||
*/
|
||||
mmap_read_lock(&init_mm);
|
||||
error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
|
||||
NULL);
|
||||
mmap_read_unlock(&init_mm);
|
||||
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
return cpu_addr;
|
||||
|
@ -85,9 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)
|
|||
{
|
||||
unsigned long va = (unsigned long)cpu_addr;
|
||||
|
||||
mmap_read_lock(&init_mm);
|
||||
/* walk_page_range shouldn't be able to fail here */
|
||||
WARN_ON(walk_page_range(&init_mm, va, va + size,
|
||||
&clear_nocache_walk_ops, NULL));
|
||||
mmap_read_unlock(&init_mm);
|
||||
}
|
||||
|
||||
void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
|
||||
|
|
|
@ -375,7 +375,9 @@ void __init hyperv_init(void)
|
|||
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
|
||||
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
|
||||
|
||||
hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
|
||||
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
|
||||
VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
|
||||
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __func__);
|
||||
if (hv_hypercall_pg == NULL) {
|
||||
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
|
||||
goto remove_cpuhp_state;
|
||||
|
|
|
@ -194,6 +194,7 @@ enum page_cache_mode {
|
|||
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
|
||||
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
|
||||
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
|
||||
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
|
||||
|
@ -219,6 +220,7 @@ enum page_cache_mode {
|
|||
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
|
||||
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
|
||||
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
|
||||
#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC)
|
||||
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
|
||||
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
|
||||
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
* Troy Laramy <t-laramy@ti.com>
|
||||
*/
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#include <linux/clk.h>
|
||||
#include <linux/clkdev.h>
|
||||
#include <linux/delay.h>
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#include <media/v4l2-dev.h>
|
||||
#include <media/v4l2-ioctl.h>
|
||||
|
|
|
@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
|
|||
&ocfs2_nfs_sync_lops, osb);
|
||||
}
|
||||
|
||||
static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
|
||||
{
|
||||
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
|
||||
init_rwsem(&osb->nfs_sync_rwlock);
|
||||
}
|
||||
|
||||
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
|
||||
{
|
||||
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
|
||||
|
@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
|
|||
if (ocfs2_is_hard_readonly(osb))
|
||||
return -EROFS;
|
||||
|
||||
if (ex)
|
||||
down_write(&osb->nfs_sync_rwlock);
|
||||
else
|
||||
down_read(&osb->nfs_sync_rwlock);
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
return 0;
|
||||
|
||||
|
@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
|
|||
if (!ocfs2_mount_local(osb))
|
||||
ocfs2_cluster_unlock(osb, lockres,
|
||||
ex ? LKM_EXMODE : LKM_PRMODE);
|
||||
if (ex)
|
||||
up_write(&osb->nfs_sync_rwlock);
|
||||
else
|
||||
up_read(&osb->nfs_sync_rwlock);
|
||||
}
|
||||
|
||||
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
|
||||
|
@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
|
|||
local:
|
||||
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
|
||||
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
|
||||
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
|
||||
ocfs2_nfs_sync_lock_init(osb);
|
||||
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
|
||||
|
||||
osb->cconn = conn;
|
||||
|
|
|
@ -395,6 +395,7 @@ struct ocfs2_super
|
|||
struct ocfs2_lock_res osb_super_lockres;
|
||||
struct ocfs2_lock_res osb_rename_lockres;
|
||||
struct ocfs2_lock_res osb_nfs_sync_lockres;
|
||||
struct rw_semaphore nfs_sync_rwlock;
|
||||
struct ocfs2_lock_res osb_trim_fs_lockres;
|
||||
struct mutex obs_trim_fs_mutex;
|
||||
struct ocfs2_dlm_debug *osb_dlm_debug;
|
||||
|
|
|
@ -290,7 +290,7 @@
|
|||
#define OCFS2_MAX_SLOTS 255
|
||||
|
||||
/* Slot map indicator for an empty slot */
|
||||
#define OCFS2_INVALID_SLOT -1
|
||||
#define OCFS2_INVALID_SLOT ((u16)-1)
|
||||
|
||||
#define OCFS2_VOL_UUID_LEN 16
|
||||
#define OCFS2_MAX_VOL_LABEL_LEN 64
|
||||
|
@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
|
|||
enum {
|
||||
BAD_BLOCK_SYSTEM_INODE = 0,
|
||||
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
|
||||
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
|
||||
SLOT_MAP_SYSTEM_INODE,
|
||||
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
|
||||
HEARTBEAT_SYSTEM_INODE,
|
||||
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||
USER_QUOTA_SYSTEM_INODE,
|
||||
|
|
|
@ -2825,9 +2825,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
|
|||
goto bail;
|
||||
}
|
||||
|
||||
inode_alloc_inode =
|
||||
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
|
||||
suballoc_slot);
|
||||
if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
|
||||
inode_alloc_inode = ocfs2_get_system_file_inode(osb,
|
||||
GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
|
||||
else
|
||||
inode_alloc_inode = ocfs2_get_system_file_inode(osb,
|
||||
INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
|
||||
if (!inode_alloc_inode) {
|
||||
/* the error code could be inaccurate, but we are not able to
|
||||
* get the correct one. */
|
||||
|
|
|
@ -2,6 +2,11 @@
|
|||
#ifndef _ASM_GENERIC_CACHEFLUSH_H
|
||||
#define _ASM_GENERIC_CACHEFLUSH_H
|
||||
|
||||
struct mm_struct;
|
||||
struct vm_area_struct;
|
||||
struct page;
|
||||
struct address_space;
|
||||
|
||||
/*
|
||||
* The cache doesn't need to be flushed when TLB entries change when
|
||||
* the cache is mapped to physical memory, not virtual memory
|
||||
|
|
|
@ -257,8 +257,8 @@ struct lruvec {
|
|||
*/
|
||||
unsigned long anon_cost;
|
||||
unsigned long file_cost;
|
||||
/* Evictions & activations on the inactive file list */
|
||||
atomic_long_t inactive_age;
|
||||
/* Non-resident age, driven by LRU movement */
|
||||
atomic_long_t nonresident_age;
|
||||
/* Refaults at the time of last reclaim cycle */
|
||||
unsigned long refaults;
|
||||
/* Various lruvec state flags (enum lruvec_flags) */
|
||||
|
|
|
@ -313,6 +313,7 @@ struct vma_swap_readahead {
|
|||
};
|
||||
|
||||
/* linux/mm/workingset.c */
|
||||
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
|
||||
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
|
||||
void workingset_refault(struct page *page, void *shadow);
|
||||
void workingset_activation(struct page *page);
|
||||
|
|
|
@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size);
|
|||
extern void *vmalloc_user(unsigned long size);
|
||||
extern void *vmalloc_node(unsigned long size, int node);
|
||||
extern void *vzalloc_node(unsigned long size, int node);
|
||||
extern void *vmalloc_exec(unsigned long size);
|
||||
extern void *vmalloc_32(unsigned long size);
|
||||
extern void *vmalloc_32_user(unsigned long size);
|
||||
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
|
||||
|
|
|
@ -181,34 +181,19 @@ void kimage_file_post_load_cleanup(struct kimage *image)
|
|||
static int
|
||||
kimage_validate_signature(struct kimage *image)
|
||||
{
|
||||
const char *reason;
|
||||
int ret;
|
||||
|
||||
ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
|
||||
image->kernel_buf_len);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
break;
|
||||
if (ret) {
|
||||
|
||||
/* Certain verification errors are non-fatal if we're not
|
||||
* checking errors, provided we aren't mandating that there
|
||||
* must be a valid signature.
|
||||
*/
|
||||
case -ENODATA:
|
||||
reason = "kexec of unsigned image";
|
||||
goto decide;
|
||||
case -ENOPKG:
|
||||
reason = "kexec of image with unsupported crypto";
|
||||
goto decide;
|
||||
case -ENOKEY:
|
||||
reason = "kexec of image with unavailable key";
|
||||
decide:
|
||||
if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
|
||||
pr_notice("%s rejected\n", reason);
|
||||
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* If IMA is guaranteed to appraise a signature on the kexec
|
||||
/*
|
||||
* If IMA is guaranteed to appraise a signature on the kexec
|
||||
* image, permit it even if the kernel is otherwise locked
|
||||
* down.
|
||||
*/
|
||||
|
@ -216,17 +201,10 @@ kimage_validate_signature(struct kimage *image)
|
|||
security_locked_down(LOCKDOWN_KEXEC))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
|
||||
/* All other errors are fatal, including nomem, unparseable
|
||||
* signatures and signature check failures - even if signatures
|
||||
* aren't required.
|
||||
*/
|
||||
default:
|
||||
pr_notice("kernel signature verification failed (%d).\n", ret);
|
||||
pr_debug("kernel signature verification failed (%d).\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -2783,7 +2783,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
|
|||
|
||||
void * __weak module_alloc(unsigned long size)
|
||||
{
|
||||
return vmalloc_exec(size);
|
||||
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
|
||||
NUMA_NO_NODE, __func__);
|
||||
}
|
||||
|
||||
bool __weak module_init_section(const char *name)
|
||||
|
|
|
@ -520,8 +520,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
|
|||
err_free:
|
||||
kfree(devmem);
|
||||
err_release:
|
||||
release_mem_region(devmem->pagemap.res.start,
|
||||
resource_size(&devmem->pagemap.res));
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
err:
|
||||
mutex_unlock(&mdevice->devmem_lock);
|
||||
return false;
|
||||
|
|
|
@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
|
|||
.page = NULL,
|
||||
};
|
||||
|
||||
current->capture_control = &capc;
|
||||
/*
|
||||
* Make sure the structs are really initialized before we expose the
|
||||
* capture control, in case we are interrupted and the interrupt handler
|
||||
* frees a page.
|
||||
*/
|
||||
barrier();
|
||||
WRITE_ONCE(current->capture_control, &capc);
|
||||
|
||||
ret = compact_zone(&cc, &capc);
|
||||
|
||||
VM_BUG_ON(!list_empty(&cc.freepages));
|
||||
VM_BUG_ON(!list_empty(&cc.migratepages));
|
||||
|
||||
*capture = capc.page;
|
||||
current->capture_control = NULL;
|
||||
/*
|
||||
* Make sure we hide capture control first before we read the captured
|
||||
* page pointer, otherwise an interrupt could free and capture a page
|
||||
* and we would leak it.
|
||||
*/
|
||||
WRITE_ONCE(current->capture_control, NULL);
|
||||
*capture = READ_ONCE(capc.page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -246,13 +246,13 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
|
|||
static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
|
||||
unsigned long vaddr)
|
||||
{
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
pte_t pte = ptep_get(ptep);
|
||||
|
||||
pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
|
||||
set_pte_at(mm, vaddr, ptep, pte);
|
||||
barrier();
|
||||
pte_clear(mm, vaddr, ptep);
|
||||
pte = READ_ONCE(*ptep);
|
||||
pte = ptep_get(ptep);
|
||||
WARN_ON(!pte_none(pte));
|
||||
}
|
||||
|
||||
|
|
|
@ -2772,8 +2772,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
|
|||
return;
|
||||
|
||||
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (!cw)
|
||||
if (!cw) {
|
||||
css_put(&memcg->css);
|
||||
return;
|
||||
}
|
||||
|
||||
cw->memcg = memcg;
|
||||
cw->cachep = cachep;
|
||||
|
@ -6360,11 +6362,16 @@ static unsigned long effective_protection(unsigned long usage,
|
|||
* We're using unprotected memory for the weight so that if
|
||||
* some cgroups DO claim explicit protection, we don't protect
|
||||
* the same bytes twice.
|
||||
*
|
||||
* Check both usage and parent_usage against the respective
|
||||
* protected values. One should imply the other, but they
|
||||
* aren't read atomically - make sure the division is sane.
|
||||
*/
|
||||
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
|
||||
return ep;
|
||||
|
||||
if (parent_effective > siblings_protected && usage > protected) {
|
||||
if (parent_effective > siblings_protected &&
|
||||
parent_usage > siblings_protected &&
|
||||
usage > protected) {
|
||||
unsigned long unclaimed;
|
||||
|
||||
unclaimed = parent_effective - siblings_protected;
|
||||
|
@ -6416,7 +6423,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
|
|||
|
||||
if (parent == root) {
|
||||
memcg->memory.emin = READ_ONCE(memcg->memory.min);
|
||||
memcg->memory.elow = memcg->memory.low;
|
||||
memcg->memory.elow = READ_ONCE(memcg->memory.low);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -6428,7 +6435,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
|
|||
atomic_long_read(&parent->memory.children_min_usage)));
|
||||
|
||||
WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
|
||||
memcg->memory.low, READ_ONCE(parent->memory.elow),
|
||||
READ_ONCE(memcg->memory.low),
|
||||
READ_ONCE(parent->memory.elow),
|
||||
atomic_long_read(&parent->memory.children_low_usage)));
|
||||
|
||||
out:
|
||||
|
|
33
mm/memory.c
33
mm/memory.c
|
@ -1498,7 +1498,7 @@ out:
|
|||
}
|
||||
|
||||
#ifdef pte_index
|
||||
static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
|
||||
static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
|
||||
unsigned long addr, struct page *page, pgprot_t prot)
|
||||
{
|
||||
int err;
|
||||
|
@ -1506,8 +1506,9 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
|
|||
if (!page_count(page))
|
||||
return -EINVAL;
|
||||
err = validate_page_before_insert(page);
|
||||
return err ? err : insert_page_into_pte_locked(
|
||||
mm, pte_offset_map(pmd, addr), addr, page, prot);
|
||||
if (err)
|
||||
return err;
|
||||
return insert_page_into_pte_locked(mm, pte, addr, page, prot);
|
||||
}
|
||||
|
||||
/* insert_pages() amortizes the cost of spinlock operations
|
||||
|
@ -1517,7 +1518,8 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
|
|||
struct page **pages, unsigned long *num, pgprot_t prot)
|
||||
{
|
||||
pmd_t *pmd = NULL;
|
||||
spinlock_t *pte_lock = NULL;
|
||||
pte_t *start_pte, *pte;
|
||||
spinlock_t *pte_lock;
|
||||
struct mm_struct *const mm = vma->vm_mm;
|
||||
unsigned long curr_page_idx = 0;
|
||||
unsigned long remaining_pages_total = *num;
|
||||
|
@ -1536,18 +1538,17 @@ more:
|
|||
ret = -ENOMEM;
|
||||
if (pte_alloc(mm, pmd))
|
||||
goto out;
|
||||
pte_lock = pte_lockptr(mm, pmd);
|
||||
|
||||
while (pages_to_write_in_pmd) {
|
||||
int pte_idx = 0;
|
||||
const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
|
||||
|
||||
spin_lock(pte_lock);
|
||||
for (; pte_idx < batch_size; ++pte_idx) {
|
||||
int err = insert_page_in_batch_locked(mm, pmd,
|
||||
start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
|
||||
for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
|
||||
int err = insert_page_in_batch_locked(mm, pte,
|
||||
addr, pages[curr_page_idx], prot);
|
||||
if (unlikely(err)) {
|
||||
spin_unlock(pte_lock);
|
||||
pte_unmap_unlock(start_pte, pte_lock);
|
||||
ret = err;
|
||||
remaining_pages_total -= pte_idx;
|
||||
goto out;
|
||||
|
@ -1555,7 +1556,7 @@ more:
|
|||
addr += PAGE_SIZE;
|
||||
++curr_page_idx;
|
||||
}
|
||||
spin_unlock(pte_lock);
|
||||
pte_unmap_unlock(start_pte, pte_lock);
|
||||
pages_to_write_in_pmd -= batch_size;
|
||||
remaining_pages_total -= batch_size;
|
||||
}
|
||||
|
@ -3140,8 +3141,18 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
|||
err = mem_cgroup_charge(page, vma->vm_mm,
|
||||
GFP_KERNEL);
|
||||
ClearPageSwapCache(page);
|
||||
if (err)
|
||||
if (err) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_page;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Move to lru_cache_add() when it
|
||||
* supports new vs putback
|
||||
*/
|
||||
spin_lock_irq(&page_pgdat(page)->lru_lock);
|
||||
lru_note_cost_page(page);
|
||||
spin_unlock_irq(&page_pgdat(page)->lru_lock);
|
||||
|
||||
lru_cache_add(page);
|
||||
swap_readpage(page, true);
|
||||
|
|
|
@ -471,11 +471,20 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
|
|||
unsigned long start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
const unsigned long end_pfn = start_pfn + nr_pages;
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
unsigned long flags;
|
||||
unsigned long pfn, cur_nr_pages, flags;
|
||||
|
||||
/* Poison struct pages because they are now uninitialized again. */
|
||||
page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
|
||||
cond_resched();
|
||||
|
||||
/* Select all remaining pages up to the next section boundary */
|
||||
cur_nr_pages =
|
||||
min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
|
||||
page_init_poison(pfn_to_page(pfn),
|
||||
sizeof(struct page) * cur_nr_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
/*
|
||||
|
|
17
mm/nommu.c
17
mm/nommu.c
|
@ -290,23 +290,6 @@ void *vzalloc_node(unsigned long size, int node)
|
|||
}
|
||||
EXPORT_SYMBOL(vzalloc_node);
|
||||
|
||||
/**
|
||||
* vmalloc_exec - allocate virtually contiguous, executable memory
|
||||
* @size: allocation size
|
||||
*
|
||||
* Kernel-internal function to allocate enough pages to cover @size
|
||||
* the page level allocator and map them into contiguous and
|
||||
* executable kernel virtual space.
|
||||
*
|
||||
* For tight control over page level allocator and protection flags
|
||||
* use __vmalloc() instead.
|
||||
*/
|
||||
|
||||
void *vmalloc_exec(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
|
||||
* @size: allocation size
|
||||
|
|
|
@ -348,7 +348,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
|
|||
gfp_t gfp, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
unsigned int nr_pages = 1 << order;
|
||||
int nr_pages = 1 << order;
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
int ret;
|
||||
|
@ -388,7 +388,7 @@ out:
|
|||
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
unsigned int nr_pages = 1 << order;
|
||||
int nr_pages = 1 << order;
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
|
|
|
@ -1726,7 +1726,7 @@ void kzfree(const void *p)
|
|||
if (unlikely(ZERO_OR_NULL_PTR(mem)))
|
||||
return;
|
||||
ks = ksize(mem);
|
||||
memset(mem, 0, ks);
|
||||
memzero_explicit(mem, ks);
|
||||
kfree(mem);
|
||||
}
|
||||
EXPORT_SYMBOL(kzfree);
|
||||
|
|
19
mm/slub.c
19
mm/slub.c
|
@ -3766,15 +3766,13 @@ error:
|
|||
}
|
||||
|
||||
static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
||||
const char *text, unsigned long *map)
|
||||
const char *text)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
void *addr = page_address(page);
|
||||
unsigned long *map;
|
||||
void *p;
|
||||
|
||||
if (!map)
|
||||
return;
|
||||
|
||||
slab_err(s, page, text, s->name);
|
||||
slab_lock(page);
|
||||
|
||||
|
@ -3786,6 +3784,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
|||
print_tracking(s, p);
|
||||
}
|
||||
}
|
||||
put_map(map);
|
||||
slab_unlock(page);
|
||||
#endif
|
||||
}
|
||||
|
@ -3799,11 +3798,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
|||
{
|
||||
LIST_HEAD(discard);
|
||||
struct page *page, *h;
|
||||
unsigned long *map = NULL;
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
spin_lock_irq(&n->list_lock);
|
||||
|
@ -3813,16 +3807,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
|||
list_add(&page->slab_list, &discard);
|
||||
} else {
|
||||
list_slab_objects(s, page,
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()",
|
||||
map);
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()");
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&n->list_lock);
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
bitmap_free(map);
|
||||
#endif
|
||||
|
||||
list_for_each_entry_safe(page, h, &discard, slab_list)
|
||||
discard_slab(s, page);
|
||||
}
|
||||
|
|
|
@ -443,8 +443,7 @@ void mark_page_accessed(struct page *page)
|
|||
else
|
||||
__lru_cache_activate_page(page);
|
||||
ClearPageReferenced(page);
|
||||
if (page_is_file_lru(page))
|
||||
workingset_activation(page);
|
||||
workingset_activation(page);
|
||||
}
|
||||
if (page_is_idle(page))
|
||||
clear_page_idle(page);
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/swap_slots.h>
|
||||
#include <linux/huge_mm.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* swapper_space is a fiction, retained to simplify the path through
|
||||
|
@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
|||
__SetPageSwapBacked(page);
|
||||
|
||||
/* May fail (-ENOMEM) if XArray node allocation failed. */
|
||||
if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
|
||||
if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
|
||||
put_swap_page(page, entry);
|
||||
goto fail_unlock;
|
||||
}
|
||||
|
|
21
mm/vmalloc.c
21
mm/vmalloc.c
|
@ -1862,7 +1862,6 @@ EXPORT_SYMBOL(vm_unmap_ram);
|
|||
* @pages: an array of pointers to the pages to be mapped
|
||||
* @count: number of pages
|
||||
* @node: prefer to allocate data structures on this node
|
||||
* @prot: memory protection to use. PAGE_KERNEL for regular RAM
|
||||
*
|
||||
* If you use this function for less than VMAP_MAX_ALLOC pages, it could be
|
||||
* faster than vmap so it's good. But if you mix long-life and short-life
|
||||
|
@ -2696,26 +2695,6 @@ void *vzalloc_node(unsigned long size, int node)
|
|||
}
|
||||
EXPORT_SYMBOL(vzalloc_node);
|
||||
|
||||
/**
|
||||
* vmalloc_exec - allocate virtually contiguous, executable memory
|
||||
* @size: allocation size
|
||||
*
|
||||
* Kernel-internal function to allocate enough pages to cover @size
|
||||
* the page level allocator and map them into contiguous and
|
||||
* executable kernel virtual space.
|
||||
*
|
||||
* For tight control over page level allocator and protection flags
|
||||
* use __vmalloc() instead.
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL on error
|
||||
*/
|
||||
void *vmalloc_exec(unsigned long size)
|
||||
{
|
||||
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
|
||||
NUMA_NO_NODE, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
|
||||
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
|
||||
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
|
||||
|
|
|
@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
|||
__delete_from_swap_cache(page, swap);
|
||||
xa_unlock_irqrestore(&mapping->i_pages, flags);
|
||||
put_swap_page(page, swap);
|
||||
workingset_eviction(page, target_memcg);
|
||||
} else {
|
||||
void (*freepage)(struct page *);
|
||||
void *shadow = NULL;
|
||||
|
@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
|
|||
list_add(&page->lru, &pages_to_free);
|
||||
} else {
|
||||
nr_moved += nr_pages;
|
||||
if (PageActive(page))
|
||||
workingset_age_nonresident(lruvec, nr_pages);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -156,8 +156,8 @@
|
|||
*
|
||||
* Implementation
|
||||
*
|
||||
* For each node's file LRU lists, a counter for inactive evictions
|
||||
* and activations is maintained (node->inactive_age).
|
||||
* For each node's LRU lists, a counter for inactive evictions and
|
||||
* activations is maintained (node->nonresident_age).
|
||||
*
|
||||
* On eviction, a snapshot of this counter (along with some bits to
|
||||
* identify the node) is stored in the now empty page cache
|
||||
|
@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
|||
*workingsetp = workingset;
|
||||
}
|
||||
|
||||
static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
|
||||
/**
|
||||
* workingset_age_nonresident - age non-resident entries as LRU ages
|
||||
* @memcg: the lruvec that was aged
|
||||
* @nr_pages: the number of pages to count
|
||||
*
|
||||
* As in-memory pages are aged, non-resident pages need to be aged as
|
||||
* well, in order for the refault distances later on to be comparable
|
||||
* to the in-memory dimensions. This function allows reclaim and LRU
|
||||
* operations to drive the non-resident aging along in parallel.
|
||||
*/
|
||||
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
|
||||
{
|
||||
/*
|
||||
* Reclaiming a cgroup means reclaiming all its children in a
|
||||
|
@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
|
|||
* the root cgroup's, age as well.
|
||||
*/
|
||||
do {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
atomic_long_inc(&lruvec->inactive_age);
|
||||
} while (memcg && (memcg = parent_mem_cgroup(memcg)));
|
||||
atomic_long_add(nr_pages, &lruvec->nonresident_age);
|
||||
} while ((lruvec = parent_lruvec(lruvec)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
|
|||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
||||
advance_inactive_age(page_memcg(page), pgdat);
|
||||
|
||||
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
/* XXX: target_memcg can be NULL, go through lruvec */
|
||||
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
|
||||
eviction = atomic_long_read(&lruvec->inactive_age);
|
||||
eviction = atomic_long_read(&lruvec->nonresident_age);
|
||||
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
|
||||
}
|
||||
|
||||
|
@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
|
|||
if (!mem_cgroup_disabled() && !eviction_memcg)
|
||||
goto out;
|
||||
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
|
||||
refault = atomic_long_read(&eviction_lruvec->inactive_age);
|
||||
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
|
||||
|
||||
/*
|
||||
* Calculate the refault distance
|
||||
*
|
||||
* The unsigned subtraction here gives an accurate distance
|
||||
* across inactive_age overflows in most cases. There is a
|
||||
* across nonresident_age overflows in most cases. There is a
|
||||
* special case: usually, shadow entries have a short lifetime
|
||||
* and are either refaulted or reclaimed along with the inode
|
||||
* before they get too old. But it is not impossible for the
|
||||
* inactive_age to lap a shadow entry in the field, which can
|
||||
* then result in a false small refault distance, leading to a
|
||||
* false activation should this old entry actually refault
|
||||
* again. However, earlier kernels used to deactivate
|
||||
* nonresident_age to lap a shadow entry in the field, which
|
||||
* can then result in a false small refault distance, leading
|
||||
* to a false activation should this old entry actually
|
||||
* refault again. However, earlier kernels used to deactivate
|
||||
* unconditionally with *every* reclaim invocation for the
|
||||
* longest time, so the occasional inappropriate activation
|
||||
* leading to pressure on the active list is not a problem.
|
||||
|
@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
|
|||
goto out;
|
||||
|
||||
SetPageActive(page);
|
||||
advance_inactive_age(memcg, pgdat);
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
|
||||
|
||||
/* Page was active prior to eviction */
|
||||
|
@ -382,6 +388,7 @@ out:
|
|||
void workingset_activation(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
|
@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
|
|||
memcg = page_memcg_rcu(page);
|
||||
if (!mem_cgroup_disabled() && !memcg)
|
||||
goto out;
|
||||
advance_inactive_age(memcg, page_pgdat(page));
|
||||
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue