vm: isolate max_map_count by pid namespace

Upstream: no

Signed-off-by: zgpeng <zgpeng@tencent.com>
Signed-off-by: Liu Hua <shookliu@tencent.com>
Signed-off-by: katrinzhou <katrinzhou@tencent.com>
Signed-off-by: Kairui Song <kasong@tencent.com>
This commit is contained in:
Liu Hua 2023-11-16 18:25:07 +08:00 committed by Kairui Song
parent 525005220c
commit 89a8f0b629
7 changed files with 53 additions and 1 deletions

View File

@ -41,6 +41,9 @@ struct pid_namespace {
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
int memfd_noexec_scope; int memfd_noexec_scope;
#endif #endif
#ifdef CONFIG_PID_NS
int max_map_count;
#endif
} __randomize_layout; } __randomize_layout;
extern struct pid_namespace init_pid_ns; extern struct pid_namespace init_pid_ns;

View File

@ -82,6 +82,7 @@ struct pid_namespace init_pid_ns = {
.ns.inum = PROC_PID_INIT_INO, .ns.inum = PROC_PID_INIT_INO,
#ifdef CONFIG_PID_NS #ifdef CONFIG_PID_NS
.ns.ops = &pidns_operations, .ns.ops = &pidns_operations,
.max_map_count = DEFAULT_MAX_MAP_COUNT,
#endif #endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,

View File

@ -113,6 +113,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns); ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
#endif #endif
ns->max_map_count = parent_pid_ns->max_map_count;
return ns; return ns;
out_free_idr: out_free_idr:

View File

@ -875,6 +875,24 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
do_proc_dointvec_minmax_conv, &param); do_proc_dointvec_minmax_conv, &param);
} }
int proc_dointvec_max_map_count(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_dointvec_minmax_conv_param param = {
.min = (int *) table->extra1,
.max = (int *) table->extra2,
};
#ifdef CONFIG_PID_NS
table->data = &task_active_pid_ns(current)->max_map_count;
#endif
return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_minmax_conv, &param);
}
/** /**
* struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
* @min: pointer to minimum allowable value * @min: pointer to minimum allowable value
@ -2165,7 +2183,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_max_map_count, .data = &sysctl_max_map_count,
.maxlen = sizeof(sysctl_max_map_count), .maxlen = sizeof(sysctl_max_map_count),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_max_map_count,
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
}, },
#else #else

View File

@ -1241,7 +1241,11 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
return -EOVERFLOW; return -EOVERFLOW;
/* Too many mappings? */ /* Too many mappings? */
#ifdef CONFIG_PID_NS
if (mm->map_count > task_active_pid_ns(current)->max_map_count)
#else
if (mm->map_count > sysctl_max_map_count) if (mm->map_count > sysctl_max_map_count)
#endif
return -ENOMEM; return -ENOMEM;
/* Obtain the address to map to. we verify (or select) it and ensure /* Obtain the address to map to. we verify (or select) it and ensure
@ -2428,7 +2432,11 @@ out_free_vma:
int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, int new_below) unsigned long addr, int new_below)
{ {
#ifdef CONFIG_PID_NS
if (vma->vm_mm->map_count >= task_active_pid_ns(current)->max_map_count)
#else
if (vma->vm_mm->map_count >= sysctl_max_map_count) if (vma->vm_mm->map_count >= sysctl_max_map_count)
#endif
return -ENOMEM; return -ENOMEM;
return __split_vma(vmi, vma, addr, new_below); return __split_vma(vmi, vma, addr, new_below);
@ -2478,7 +2486,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
* not exceed its limit; but let map_count go just above * not exceed its limit; but let map_count go just above
* its limit temporarily, to help free resources as expected. * its limit temporarily, to help free resources as expected.
*/ */
#ifdef CONFIG_PID_NS
if (end < vma->vm_end && mm->map_count >= task_active_pid_ns(current)->max_map_count)
#else
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
#endif
goto map_count_exceeded; goto map_count_exceeded;
error = __split_vma(vmi, vma, start, 1); error = __split_vma(vmi, vma, start, 1);
@ -3080,7 +3092,11 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM; return -ENOMEM;
#ifdef CONFIG_PID_NS
if (mm->map_count > task_active_pid_ns(current)->max_map_count)
#else
if (mm->map_count > sysctl_max_map_count) if (mm->map_count > sysctl_max_map_count)
#endif
return -ENOMEM; return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))

View File

@ -603,7 +603,11 @@ static unsigned long move_vma(struct vm_area_struct *vma,
* We'd prefer to avoid failure later on in do_munmap: * We'd prefer to avoid failure later on in do_munmap:
* which may split one vma into three before unmapping. * which may split one vma into three before unmapping.
*/ */
#ifdef CONFIG_PID_NS
if (mm->map_count >= task_active_pid_ns(current)->max_map_count - 3)
#else
if (mm->map_count >= sysctl_max_map_count - 3) if (mm->map_count >= sysctl_max_map_count - 3)
#endif
return -ENOMEM; return -ENOMEM;
if (unlikely(flags & MREMAP_DONTUNMAP)) if (unlikely(flags & MREMAP_DONTUNMAP))
@ -832,7 +836,11 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
* Check whether current map count plus 2 still leads us to 4 maps below * Check whether current map count plus 2 still leads us to 4 maps below
* the threshold, otherwise return -ENOMEM here to be more safe. * the threshold, otherwise return -ENOMEM here to be more safe.
*/ */
#ifdef CONFIG_PID_NS
if ((mm->map_count + 2) >= task_active_pid_ns(current)->max_map_count - 3)
#else
if ((mm->map_count + 2) >= sysctl_max_map_count - 3) if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
#endif
return -ENOMEM; return -ENOMEM;
if (flags & MREMAP_FIXED) { if (flags & MREMAP_FIXED) {

View File

@ -1319,7 +1319,11 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
return -ENOMEM; return -ENOMEM;
mm = vma->vm_mm; mm = vma->vm_mm;
#ifdef CONFIG_PID_NS
if (mm->map_count >= task_active_pid_ns(current)->max_map_count)
#else
if (mm->map_count >= sysctl_max_map_count) if (mm->map_count >= sysctl_max_map_count)
#endif
return -ENOMEM; return -ENOMEM;
region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);