mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the CPU bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Tested-by: Song Liu <songliubraving@fb.com> Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Rik van Riel <riel@surriel.com> Acked-by: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-team@fb.com Cc: luto@kernel.org Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
37c45b2354
commit
c1a2f7f0c0
|
@ -82,6 +82,7 @@ struct mm_struct efi_mm = {
|
||||||
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
|
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
|
||||||
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
|
||||||
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
|
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
|
||||||
|
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool disable_runtime;
|
static bool disable_runtime;
|
||||||
|
|
|
@ -335,6 +335,7 @@ struct core_state {
|
||||||
|
|
||||||
struct kioctx_table;
|
struct kioctx_table;
|
||||||
struct mm_struct {
|
struct mm_struct {
|
||||||
|
struct {
|
||||||
struct vm_area_struct *mmap; /* list of VMAs */
|
struct vm_area_struct *mmap; /* list of VMAs */
|
||||||
struct rb_root mm_rb;
|
struct rb_root mm_rb;
|
||||||
u32 vmacache_seqnum; /* per-thread vmacache */
|
u32 vmacache_seqnum; /* per-thread vmacache */
|
||||||
|
@ -357,11 +358,11 @@ struct mm_struct {
|
||||||
/**
|
/**
|
||||||
* @mm_users: The number of users including userspace.
|
* @mm_users: The number of users including userspace.
|
||||||
*
|
*
|
||||||
* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
|
* Use mmget()/mmget_not_zero()/mmput() to modify. When this
|
||||||
* to 0 (i.e. when the task exits and there are no other temporary
|
* drops to 0 (i.e. when the task exits and there are no other
|
||||||
* reference holders), we also release a reference on @mm_count
|
* temporary reference holders), we also release a reference on
|
||||||
* (which may then free the &struct mm_struct if @mm_count also
|
* @mm_count (which may then free the &struct mm_struct if
|
||||||
* drops to 0).
|
* @mm_count also drops to 0).
|
||||||
*/
|
*/
|
||||||
atomic_t mm_users;
|
atomic_t mm_users;
|
||||||
|
|
||||||
|
@ -379,11 +380,14 @@ struct mm_struct {
|
||||||
#endif
|
#endif
|
||||||
int map_count; /* number of VMAs */
|
int map_count; /* number of VMAs */
|
||||||
|
|
||||||
spinlock_t page_table_lock; /* Protects page tables and some counters */
|
spinlock_t page_table_lock; /* Protects page tables and some
|
||||||
|
* counters
|
||||||
|
*/
|
||||||
struct rw_semaphore mmap_sem;
|
struct rw_semaphore mmap_sem;
|
||||||
|
|
||||||
struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
|
struct list_head mmlist; /* List of maybe swapped mm's. These
|
||||||
* together off init_mm.mmlist, and are protected
|
* are globally strung together off
|
||||||
|
* init_mm.mmlist, and are protected
|
||||||
* by mmlist_lock
|
* by mmlist_lock
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -414,12 +418,10 @@ struct mm_struct {
|
||||||
|
|
||||||
struct linux_binfmt *binfmt;
|
struct linux_binfmt *binfmt;
|
||||||
|
|
||||||
cpumask_var_t cpu_vm_mask_var;
|
|
||||||
|
|
||||||
/* Architecture-specific MM context */
|
/* Architecture-specific MM context */
|
||||||
mm_context_t context;
|
mm_context_t context;
|
||||||
|
|
||||||
unsigned long flags; /* Must use atomic bitops to access the bits */
|
unsigned long flags; /* Must use atomic bitops to access */
|
||||||
|
|
||||||
struct core_state *core_state; /* coredumping support */
|
struct core_state *core_state; /* coredumping support */
|
||||||
#ifdef CONFIG_MEMBARRIER
|
#ifdef CONFIG_MEMBARRIER
|
||||||
|
@ -452,14 +454,11 @@ struct mm_struct {
|
||||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
||||||
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
|
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
|
||||||
struct cpumask cpumask_allocation;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_NUMA_BALANCING
|
#ifdef CONFIG_NUMA_BALANCING
|
||||||
/*
|
/*
|
||||||
* numa_next_scan is the next time that the PTEs will be marked
|
* numa_next_scan is the next time that the PTEs will be marked
|
||||||
* pte_numa. NUMA hinting faults will gather statistics and migrate
|
* pte_numa. NUMA hinting faults will gather statistics and
|
||||||
* pages to new nodes if necessary.
|
* migrate pages to new nodes if necessary.
|
||||||
*/
|
*/
|
||||||
unsigned long numa_next_scan;
|
unsigned long numa_next_scan;
|
||||||
|
|
||||||
|
@ -470,9 +469,9 @@ struct mm_struct {
|
||||||
int numa_scan_seq;
|
int numa_scan_seq;
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* An operation with batched TLB flushing is going on. Anything that
|
* An operation with batched TLB flushing is going on. Anything
|
||||||
* can move process memory needs to flush the TLB when moving a
|
* that can move process memory needs to flush the TLB when
|
||||||
* PROT_NONE or PROT_NUMA mapped page.
|
* moving a PROT_NONE or PROT_NUMA mapped page.
|
||||||
*/
|
*/
|
||||||
atomic_t tlb_flush_pending;
|
atomic_t tlb_flush_pending;
|
||||||
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||||
|
@ -491,20 +490,28 @@ struct mm_struct {
|
||||||
#endif
|
#endif
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The mm_cpumask needs to be at the end of mm_struct, because it
|
||||||
|
* is dynamically sized based on nr_cpu_ids.
|
||||||
|
*/
|
||||||
|
unsigned long cpu_bitmap[];
|
||||||
|
};
|
||||||
|
|
||||||
extern struct mm_struct init_mm;
|
extern struct mm_struct init_mm;
|
||||||
|
|
||||||
|
/* Pointer magic because the dynamic array size confuses some compilers. */
|
||||||
static inline void mm_init_cpumask(struct mm_struct *mm)
|
static inline void mm_init_cpumask(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
unsigned long cpu_bitmap = (unsigned long)mm;
|
||||||
mm->cpu_vm_mask_var = &mm->cpumask_allocation;
|
|
||||||
#endif
|
cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
|
||||||
cpumask_clear(mm->cpu_vm_mask_var);
|
cpumask_clear((struct cpumask *)cpu_bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
|
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
|
||||||
static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
|
static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
return mm->cpu_vm_mask_var;
|
return (struct cpumask *)&mm->cpu_bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mmu_gather;
|
struct mmu_gather;
|
||||||
|
|
|
@ -2253,6 +2253,8 @@ static void sighand_ctor(void *data)
|
||||||
|
|
||||||
void __init proc_caches_init(void)
|
void __init proc_caches_init(void)
|
||||||
{
|
{
|
||||||
|
unsigned int mm_size;
|
||||||
|
|
||||||
sighand_cachep = kmem_cache_create("sighand_cache",
|
sighand_cachep = kmem_cache_create("sighand_cache",
|
||||||
sizeof(struct sighand_struct), 0,
|
sizeof(struct sighand_struct), 0,
|
||||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
|
||||||
|
@ -2269,15 +2271,16 @@ void __init proc_caches_init(void)
|
||||||
sizeof(struct fs_struct), 0,
|
sizeof(struct fs_struct), 0,
|
||||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FIXME! The "sizeof(struct mm_struct)" currently includes the
|
* The mm_cpumask is located at the end of mm_struct, and is
|
||||||
* whole struct cpumask for the OFFSTACK case. We could change
|
* dynamically sized based on the maximum CPU number this system
|
||||||
* this to *only* allocate as much of it as required by the
|
* can have, taking hotplug into account (nr_cpu_ids).
|
||||||
* maximum number of CPU's we can ever have. The cpumask_allocation
|
|
||||||
* is at the end of the structure, exactly for that reason.
|
|
||||||
*/
|
*/
|
||||||
|
mm_size = sizeof(struct mm_struct) + cpumask_size();
|
||||||
|
|
||||||
mm_cachep = kmem_cache_create_usercopy("mm_struct",
|
mm_cachep = kmem_cache_create_usercopy("mm_struct",
|
||||||
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
|
mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
|
||||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
||||||
offsetof(struct mm_struct, saved_auxv),
|
offsetof(struct mm_struct, saved_auxv),
|
||||||
sizeof_field(struct mm_struct, saved_auxv),
|
sizeof_field(struct mm_struct, saved_auxv),
|
||||||
|
|
11
mm/init-mm.c
11
mm/init-mm.c
|
@ -15,6 +15,16 @@
|
||||||
#define INIT_MM_CONTEXT(name)
|
#define INIT_MM_CONTEXT(name)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For dynamically allocated mm_structs, there is a dynamically sized cpumask
|
||||||
|
* at the end of the structure, the size of which depends on the maximum CPU
|
||||||
|
* number the system can see. That way we allocate only as much memory for
|
||||||
|
* mm_cpumask() as needed for the hundreds, or thousands of processes that
|
||||||
|
* a system typically runs.
|
||||||
|
*
|
||||||
|
* Since there is only one init_mm in the entire system, keep it simple
|
||||||
|
* and size this cpu_bitmask to NR_CPUS.
|
||||||
|
*/
|
||||||
struct mm_struct init_mm = {
|
struct mm_struct init_mm = {
|
||||||
.mm_rb = RB_ROOT,
|
.mm_rb = RB_ROOT,
|
||||||
.pgd = swapper_pg_dir,
|
.pgd = swapper_pg_dir,
|
||||||
|
@ -25,5 +35,6 @@ struct mm_struct init_mm = {
|
||||||
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
||||||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
|
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
|
||||||
INIT_MM_CONTEXT(init_mm)
|
INIT_MM_CONTEXT(init_mm)
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue