From 89a8f0b629af6e892fe0541ed2d10fc5dc5daefc Mon Sep 17 00:00:00 2001 From: Liu Hua Date: Thu, 16 Nov 2023 18:25:07 +0800 Subject: [PATCH] vm: isolate max_map_count by pid namespace Upstream: no Signed-off-by: zgpeng Signed-off-by: Liu Hua Signed-off-by: katrinzhou Signed-off-by: Kairui Song --- include/linux/pid_namespace.h | 3 +++ kernel/pid.c | 1 + kernel/pid_namespace.c | 2 ++ kernel/sysctl.c | 20 +++++++++++++++++++- mm/mmap.c | 16 ++++++++++++++++ mm/mremap.c | 8 ++++++++ mm/nommu.c | 4 ++++ 7 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..213a0a0183e3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -41,6 +41,9 @@ struct pid_namespace { #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) int memfd_noexec_scope; #endif +#ifdef CONFIG_PID_NS + int max_map_count; +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; diff --git a/kernel/pid.c b/kernel/pid.c index 6500ef956f2f..89fce12c59be 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -82,6 +82,7 @@ struct pid_namespace init_pid_ns = { .ns.inum = PROC_PID_INIT_INO, #ifdef CONFIG_PID_NS .ns.ops = &pidns_operations, + .max_map_count = DEFAULT_MAX_MAP_COUNT, #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 619972c78774..15269a933d0b 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -113,6 +113,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns); #endif + ns->max_map_count = parent_pid_ns->max_map_count; + return ns; out_free_idr: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b3e89608dd71..3728226f9a78 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -875,6 +875,24 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +int proc_dointvec_max_map_count(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + + struct do_proc_dointvec_minmax_conv_param param = { + .min = (int *) table->extra1, + .max = (int *) table->extra2, + }; + +#ifdef CONFIG_PID_NS + table->data = &task_active_pid_ns(current)->max_map_count; +#endif + + return do_proc_dointvec(table, write, buffer, lenp, ppos, + do_proc_dointvec_minmax_conv, ¶m); +} + + /** * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure * @min: pointer to minimum allowable value @@ -2165,7 +2183,7 @@ static struct ctl_table vm_table[] = { .data = &sysctl_max_map_count, .maxlen = sizeof(sysctl_max_map_count), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_max_map_count, .extra1 = SYSCTL_ZERO, }, #else diff --git a/mm/mmap.c b/mm/mmap.c index 9e018d8dd7d6..8747cb17859f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1241,7 +1241,11 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EOVERFLOW; /* Too many mappings? */ +#ifdef CONFIG_PID_NS + if (mm->map_count > task_active_pid_ns(current)->max_map_count) +#else if (mm->map_count > sysctl_max_map_count) +#endif return -ENOMEM; /* Obtain the address to map to. we verify (or select) it and ensure @@ -2428,7 +2432,11 @@ out_free_vma: int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long addr, int new_below) { +#ifdef CONFIG_PID_NS + if (vma->vm_mm->map_count >= task_active_pid_ns(current)->max_map_count) +#else if (vma->vm_mm->map_count >= sysctl_max_map_count) +#endif return -ENOMEM; return __split_vma(vmi, vma, addr, new_below); @@ -2478,7 +2486,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, * not exceed its limit; but let map_count go just above * its limit temporarily, to help free resources as expected. */ +#ifdef CONFIG_PID_NS + if (end < vma->vm_end && mm->map_count >= task_active_pid_ns(current)->max_map_count) +#else if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) +#endif goto map_count_exceeded; error = __split_vma(vmi, vma, start, 1); @@ -3080,7 +3092,11 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) return -ENOMEM; +#ifdef CONFIG_PID_NS + if (mm->map_count > task_active_pid_ns(current)->max_map_count) +#else if (mm->map_count > sysctl_max_map_count) +#endif return -ENOMEM; if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) diff --git a/mm/mremap.c b/mm/mremap.c index 382e81c33fc4..504f1013089b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -603,7 +603,11 @@ static unsigned long move_vma(struct vm_area_struct *vma, * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */ +#ifdef CONFIG_PID_NS + if (mm->map_count >= task_active_pid_ns(current)->max_map_count - 3) +#else if (mm->map_count >= sysctl_max_map_count - 3) +#endif return -ENOMEM; if (unlikely(flags & MREMAP_DONTUNMAP)) @@ -832,7 +836,11 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, * Check whether current map count plus 2 still leads us to 4 maps below * the threshold, otherwise return -ENOMEM here to be more safe. */ +#ifdef CONFIG_PID_NS + if ((mm->map_count + 2) >= task_active_pid_ns(current)->max_map_count - 3) +#else if ((mm->map_count + 2) >= sysctl_max_map_count - 3) +#endif return -ENOMEM; if (flags & MREMAP_FIXED) { diff --git a/mm/nommu.c b/mm/nommu.c index 7f9e9e5a0e12..36eba8bf1e22 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1319,7 +1319,11 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, return -ENOMEM; mm = vma->vm_mm; +#ifdef CONFIG_PID_NS + if (mm->map_count >= task_active_pid_ns(current)->max_map_count) +#else if (mm->map_count >= sysctl_max_map_count) +#endif return -ENOMEM; region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);