From bd50cfa89153a67429935a15e577a5eb5f10dd1b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 7 Jun 2011 07:18:45 -0400 Subject: [PATCH 01/10] slob/lockdep: Fix gfp flags passed to lockdep Doing a ktest.pl randconfig, I stumbled across the following bug on boot up: ------------[ cut here ]------------ WARNING: at /home/rostedt/work/autotest/nobackup/linux-test.git/kernel/lockdep.c:2649 lockdep_trace_alloc+0xed/0x100() Hardware name: Modules linked in: Pid: 0, comm: swapper Not tainted 3.0.0-rc1-test-00054-g1d68b67 #1 Call Trace: [] warn_slowpath_common+0xad/0xf0 [] warn_slowpath_null+0x1a/0x20 [] lockdep_trace_alloc+0xed/0x100 [] __kmalloc_node+0x30/0x2f0 [] pcpu_mem_alloc+0x13a/0x180 [] percpu_init_late+0x48/0xc2 [] ? mem_init+0xd8/0xe3 [] start_kernel+0x1c2/0x449 [] x86_64_start_reservations+0x163/0x167 [] x86_64_start_kernel+0x133/0x142^M ---[ end trace a7919e7f17c0a725 ]--- Then I ran a ktest.pl config_bisect and it came up with this config as the problem: CONFIG_SLOB Looking at what is different between SLOB and SLAB and SLUB, I found that the gfp flags are masked against gfp_allowed_mask in SLAB and SLUB, but not SLOB. On boot up, interrupts are disabled and lockdep will warn if some flags are set in gfp and interrupts are disabled. But these flags are masked off with the gfp_allowed_mask during boot. Because SLOB does not mask the flags against gfp_allowed_mask it triggers the warn on. Adding this mask fixes the bug. I also found that kmem_cache_alloc_node() was missing both the mask and the lockdep check, and that was added too. Acked-by: Matt Mackall Cc: Paul Mundt Cc: Nick Piggin Signed-off-by: Steven Rostedt Signed-off-by: Pekka Enberg --- mm/slob.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/slob.c b/mm/slob.c index 46e0aee33a23..0ae881831ae2 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -482,6 +482,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); void *ret; + gfp &= gfp_allowed_mask; + lockdep_trace_alloc(gfp); if (size < PAGE_SIZE - align) { @@ -608,6 +610,10 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; + flags &= gfp_allowed_mask; + + lockdep_trace_alloc(flags); + if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node); trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, From 3192b920bf7d0c528ab54e7d3689f44055316a37 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 14 Jun 2011 16:16:36 -0500 Subject: [PATCH 02/10] slab, slub, slob: Unify alignment definition Every slab has its on alignment definition in include/linux/sl?b_def.h. Extract those and define a common set in include/linux/slab.h. SLOB: As notes sometimes we need double word alignment on 32 bit. This gives all structures allocated by SLOB a unsigned long long alignment like the others do. SLAB: If ARCH_SLAB_MINALIGN is not set SLAB would set ARCH_SLAB_MINALIGN to zero meaning no alignment at all. Give it the default unsigned long long alignment. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 10 ++++++++++ include/linux/slab_def.h | 26 -------------------------- include/linux/slob_def.h | 10 ---------- include/linux/slub_def.h | 10 ---------- 4 files changed, 10 insertions(+), 46 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index ad4dd1c8d30a..646a639a4aae 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -133,6 +133,16 @@ unsigned int kmem_cache_size(struct kmem_cache *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +#ifdef ARCH_DMA_MINALIGN +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + /* * Common kmalloc functions provided by all allocators */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 83203ae9390b..d7f63112f63c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -17,32 +17,6 @@ #include -/* - * Enforce a minimum alignment for the kmalloc caches. - * Usually, the kmalloc caches are cache_line_size() aligned, except when - * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. - * Some archs want to perform DMA into kmalloc caches and need a guaranteed - * alignment larger than the alignment of a 64-bit integer. - * ARCH_KMALLOC_MINALIGN allows that. - * Note that increasing this value may disable some debug features. - */ -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - -#ifndef ARCH_SLAB_MINALIGN -/* - * Enforce a minimum alignment for all caches. - * Intended for archs that get misalignment faults even for BYTES_PER_WORD - * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. - * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables - * some debug features. - */ -#define ARCH_SLAB_MINALIGN 0 -#endif - /* * struct kmem_cache * diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 4382db09df4f..0ec00b39d006 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -1,16 +1,6 @@ #ifndef __LINUX_SLOB_DEF_H #define __LINUX_SLOB_DEF_H -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) -#endif - -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) -#endif - void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index c8668d161dd8..fd4fdc72bc8c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -113,16 +113,6 @@ struct kmem_cache { #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - /* * Maximum kmalloc object size handled by SLUB. Larger object allocations * are passed through to the page allocator. The page allocator "fastpath" From 90810645f78f894acfb04b3768e8a7d45f2b303a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 23 Jun 2011 09:36:12 -0500 Subject: [PATCH 03/10] slab allocators: Provide generic description of alignment defines Provide description for alignment defines. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/slab.h b/include/linux/slab.h index 646a639a4aae..573c809c33d9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -133,12 +133,22 @@ unsigned int kmem_cache_size(struct kmem_cache *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +/* + * Some archs want to perform DMA into kmalloc caches and need a guaranteed + * alignment larger than the alignment of a 64-bit integer. + * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + */ #ifdef ARCH_DMA_MINALIGN #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #else #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #endif +/* + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ #ifndef ARCH_SLAB_MINALIGN #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif From d6543e3935cec9f66b9647c24c2e44c68f8a91fd Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 7 Jul 2011 11:36:36 -0700 Subject: [PATCH 04/10] slub: Enable backtrace for create/delete points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch attempts to grab a backtrace for the creation and deletion points of the slub object. When a fault is detected, we can then get a better idea of where the item was deleted. Example output from debugging some funky nfs/rpc behaviour: ============================================================================= BUG kmalloc-64: Object is on free-list ----------------------------------------------------------------------------- INFO: Allocated in rpcb_getport_async+0x39c/0x5a5 [sunrpc] age=381 cpu=3 pid=3750 __slab_alloc+0x348/0x3ba kmem_cache_alloc_trace+0x67/0xe7 rpcb_getport_async+0x39c/0x5a5 [sunrpc] call_bind+0x70/0x75 [sunrpc] __rpc_execute+0x78/0x24b [sunrpc] rpc_execute+0x3d/0x42 [sunrpc] rpc_run_task+0x79/0x81 [sunrpc] rpc_call_sync+0x3f/0x60 [sunrpc] rpc_ping+0x42/0x58 [sunrpc] rpc_create+0x4aa/0x527 [sunrpc] nfs_create_rpc_client+0xb1/0xf6 [nfs] nfs_init_client+0x3b/0x7d [nfs] nfs_get_client+0x453/0x5ab [nfs] nfs_create_server+0x10b/0x437 [nfs] nfs_fs_mount+0x4ca/0x708 [nfs] mount_fs+0x6b/0x152 INFO: Freed in rpcb_map_release+0x3f/0x44 [sunrpc] age=30 cpu=2 pid=29049 __slab_free+0x57/0x150 kfree+0x107/0x13a rpcb_map_release+0x3f/0x44 [sunrpc] rpc_release_calldata+0x12/0x14 [sunrpc] rpc_free_task+0x59/0x61 [sunrpc] rpc_final_put_task+0x82/0x8a [sunrpc] __rpc_execute+0x23c/0x24b [sunrpc] rpc_async_schedule+0x10/0x12 [sunrpc] process_one_work+0x230/0x41d worker_thread+0x133/0x217 kthread+0x7d/0x85 kernel_thread_helper+0x4/0x10 INFO: Slab 0xffffea00029aa470 objects=20 used=9 fp=0xffff8800be7830d8 flags=0x20000000004081 INFO: Object 0xffff8800be7830d8 @offset=4312 fp=0xffff8800be7827a8 Bytes b4 0xffff8800be7830c8: 87 a8 96 00 01 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a .�......ZZZZZZZZ Object 0xffff8800be7830d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object 0xffff8800be7830e8: 6b 6b 6b 6b 01 08 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkk..kkkkkkkkkk Object 0xffff8800be7830f8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object 0xffff8800be783108: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk� Redzone 0xffff8800be783118: bb bb bb bb bb bb bb bb ������������� Padding 0xffff8800be783258: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Pid: 29049, comm: kworker/2:2 Not tainted 3.0.0-rc4+ #8 Call Trace: [] print_trailer+0x131/0x13a [] object_err+0x35/0x3e [] verify_mem_not_deleted+0x7a/0xb7 [] rpcb_getport_done+0x23/0x126 [sunrpc] [] rpc_exit_task+0x3f/0x6d [sunrpc] [] __rpc_execute+0x78/0x24b [sunrpc] [] ? rpc_execute+0x42/0x42 [sunrpc] [] rpc_async_schedule+0x10/0x12 [sunrpc] [] process_one_work+0x230/0x41d [] ? process_one_work+0x17b/0x41d [] worker_thread+0x133/0x217 [] ? manage_workers+0x191/0x191 [] kthread+0x7d/0x85 [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? __init_kthread_worker+0x56/0x56 [] ? gs_change+0x13/0x13 Acked-by: Christoph Lameter Signed-off-by: Ben Greear Signed-off-by: Pekka Enberg --- mm/slub.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 7be0223531b0..c9050995bc87 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -191,8 +191,12 @@ static LIST_HEAD(slab_caches); /* * Tracking user of a slab. */ +#define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ +#ifdef CONFIG_STACKTRACE + unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ +#endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@ -420,6 +424,24 @@ static void set_track(struct kmem_cache *s, void *object, struct track *p = get_track(s, object, alloc); if (addr) { +#ifdef CONFIG_STACKTRACE + struct stack_trace trace; + int i; + + trace.nr_entries = 0; + trace.max_entries = TRACK_ADDRS_COUNT; + trace.entries = p->addrs; + trace.skip = 3; + save_stack_trace(&trace); + + /* See rant in lockdep.c */ + if (trace.nr_entries != 0 && + trace.entries[trace.nr_entries - 1] == ULONG_MAX) + trace.nr_entries--; + + for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) + p->addrs[i] = 0; +#endif p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; @@ -444,6 +466,16 @@ static void print_track(const char *s, struct track *t) printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); +#ifdef CONFIG_STACKTRACE + { + int i; + for (i = 0; i < TRACK_ADDRS_COUNT; i++) + if (t->addrs[i]) + printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); + else + break; + } +#endif } static void print_tracking(struct kmem_cache *s, void *object) From d18a90dd85f8243ed20cdadb6d8a37d595df456d Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 7 Jul 2011 11:36:37 -0700 Subject: [PATCH 05/10] slub: Add method to verify memory is not freed This is for tracking down suspect memory usage. Acked-by: Christoph Lameter Signed-off-by: Ben Greear Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 13 +++++++++++++ mm/slub.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index fd4fdc72bc8c..4b35c06dfbc5 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -218,6 +218,19 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order) return ret; } +/** + * Calling this on allocated memory will check that the memory + * is expected to be in use, and print warnings if not. + */ +#ifdef CONFIG_SLUB_DEBUG +extern bool verify_mem_not_deleted(const void *x); +#else +static inline bool verify_mem_not_deleted(const void *x) +{ + return true; +} +#endif + #ifdef CONFIG_TRACING extern void * kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); diff --git a/mm/slub.c b/mm/slub.c index c9050995bc87..0e4f4f8245bc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2964,6 +2964,42 @@ size_t ksize(const void *object) } EXPORT_SYMBOL(ksize); +#ifdef CONFIG_SLUB_DEBUG +bool verify_mem_not_deleted(const void *x) +{ + struct page *page; + void *object = (void *)x; + unsigned long flags; + bool rv; + + if (unlikely(ZERO_OR_NULL_PTR(x))) + return false; + + local_irq_save(flags); + + page = virt_to_head_page(x); + if (unlikely(!PageSlab(page))) { + /* maybe it was from stack? */ + rv = true; + goto out_unlock; + } + + slab_lock(page); + if (on_freelist(page->slab, page, object)) { + object_err(page->slab, page, object, "Object is on free-list"); + rv = false; + } else { + rv = true; + } + slab_unlock(page); + +out_unlock: + local_irq_restore(flags); + return rv; +} +EXPORT_SYMBOL(verify_mem_not_deleted); +#endif + void kfree(const void *x) { struct page *page; From c4089f98e943ff445665dea49c190657b34ccffe Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 26 Jun 2011 21:39:18 +0200 Subject: [PATCH 06/10] slub: reduce overhead of slub_debug slub checks for poison one byte by one, which is highly inefficient and shows up frequently as a highest cpu-eater in perf top. Joining reads gives nice speedup: (Compiling some project with different options) make -j12 make clean slub_debug disabled: 1m 27s 1.2 s slub_debug enabled: 1m 46s 7.6 s slub_debug enabled + this patch: 1m 33s 3.2 s check_bytes still shows up high, but not always at the top. Signed-off-by: Marcin Slusarz Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: linux-mm@kvack.org Signed-off-by: Pekka Enberg --- mm/slub.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0e4f4f8245bc..e3403b30159e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -589,10 +589,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) memset(p + s->objsize, val, s->inuse - s->objsize); } -static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) +static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) { while (bytes) { - if (*start != (u8)value) + if (*start != value) return start; start++; bytes--; @@ -600,6 +600,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) return NULL; } +static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) +{ + u64 value64; + unsigned int words, prefix; + + if (bytes <= 16) + return check_bytes8(start, value, bytes); + + value64 = value | value << 8 | value << 16 | value << 24; + value64 = value64 | value64 << 32; + prefix = 8 - ((unsigned long)start) % 8; + + if (prefix) { + u8 *r = check_bytes8(start, value, prefix); + if (r) + return r; + start += prefix; + bytes -= prefix; + } + + words = bytes / 8; + + while (words) { + if (*(u64 *)start != value64) + return check_bytes8(start, value, 8); + start += 8; + words--; + } + + return check_bytes8(start, value, bytes % 8); +} + static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { From bfa71457a091ac0e4e20cab36e8ebad63935e504 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 7 Jul 2011 22:47:01 +0300 Subject: [PATCH 07/10] SLUB: Fix missing include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following build breakage commit d6543e3 ("slub: Enable backtrace for create/delete points"): CC mm/slub.o mm/slub.c: In function ‘set_track’: mm/slub.c:428: error: storage size of ‘trace’ isn’t known mm/slub.c:435: error: implicit declaration of function ‘save_stack_trace’ mm/slub.c:428: warning: unused variable ‘trace’ make[1]: *** [mm/slub.o] Error 1 make: *** [mm/slub.o] Error 2 Signed-off-by: Pekka Enberg --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index e3403b30159e..f899ff469f60 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -27,6 +27,7 @@ #include #include #include +#include #include From c225150b86fef9f7663219b6e9f7606ea1607312 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 11 Jul 2011 13:35:08 -0700 Subject: [PATCH 08/10] slab: fix DEBUG_SLAB build Fix CONFIG_SLAB=y CONFIG_DEBUG_SLAB=y build error and warnings. Now that ARCH_SLAB_MINALIGN defaults to __alignof__(unsigned long long), it is always defined (when slab.h included), but cannot be used in #if: mm/slab.c: In function `cache_alloc_debugcheck_after': mm/slab.c:3156:5: warning: "__alignof__" is not defined mm/slab.c:3156:5: error: missing binary operator before token "(" make[1]: *** [mm/slab.o] Error 1 So just remove the #if and #endif lines, but then 64-bit build warns: mm/slab.c: In function `cache_alloc_debugcheck_after': mm/slab.c:3156:6: warning: cast from pointer to integer of different size mm/slab.c:3158:10: warning: format `%d' expects type `int', but argument 3 has type `long unsigned int' Fix those with casts, whatever the actual type of ARCH_SLAB_MINALIGN. Acked-by: Christoph Lameter Signed-off-by: Hugh Dickins Signed-off-by: Pekka Enberg --- mm/slab.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index bcfa4987c8ae..ef8ceb726e71 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3153,12 +3153,10 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); -#if ARCH_SLAB_MINALIGN - if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { + if ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, ARCH_SLAB_MINALIGN); + objp, (int)ARCH_SLAB_MINALIGN); } -#endif return objp; } #else From b56efcf0a45aa7fc32de90d5f9838541082fbc19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jul 2011 19:04:23 +0200 Subject: [PATCH 09/10] slab: shrink sizeof(struct kmem_cache) Reduce high order allocations for some setups. (NR_CPUS=4096 -> we need 64KB per kmem_cache struct) We now allocate exact needed size (using nr_cpu_ids and nr_node_ids) This also makes code a bit smaller on x86_64, since some field offsets are less than the 127 limit : Before patch : # size mm/slab.o text data bss dec hex filename 22605 361665 32 384302 5dd2e mm/slab.o After patch : # size mm/slab.o text data bss dec hex filename 22349 353473 8224 384046 5dc2e mm/slab.o CC: Andrew Morton Reported-by: Konstantin Khlebnikov Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 26 +++++++++++++------------- mm/slab.c | 10 ++++++---- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index d7f63112f63c..d00e0bacda93 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -24,21 +24,19 @@ */ struct kmem_cache { -/* 1) per-cpu data, touched during every alloc/free */ - struct array_cache *array[NR_CPUS]; -/* 2) Cache tunables. Protected by cache_chain_mutex */ +/* 1) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; unsigned int shared; unsigned int buffer_size; u32 reciprocal_buffer_size; -/* 3) touched by every alloc & free from the backend */ +/* 2) touched by every alloc & free from the backend */ unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */ -/* 4) cache_grow/shrink */ +/* 3) cache_grow/shrink */ /* order of pgs per slab (2^n) */ unsigned int gfporder; @@ -54,11 +52,11 @@ struct kmem_cache { /* constructor func */ void (*ctor)(void *obj); -/* 5) cache creation/removal */ +/* 4) cache creation/removal */ const char *name; struct list_head next; -/* 6) statistics */ +/* 5) statistics */ #ifdef CONFIG_DEBUG_SLAB unsigned long num_active; unsigned long num_allocations; @@ -85,16 +83,18 @@ struct kmem_cache { int obj_size; #endif /* CONFIG_DEBUG_SLAB */ +/* 6) per-cpu/per-node data, touched during every alloc/free */ /* - * We put nodelists[] at the end of kmem_cache, because we want to size - * this array to nr_node_ids slots instead of MAX_NUMNODES + * We put array[] at the end of kmem_cache, because we want to size + * this array to nr_cpu_ids slots instead of NR_CPUS * (see kmem_cache_init()) - * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache - * is statically defined, so we reserve the max number of nodes. + * We still use [NR_CPUS] and not [1] or [0] because cache_cache + * is statically defined, so we reserve the max number of cpus. */ - struct kmem_list3 *nodelists[MAX_NUMNODES]; + struct kmem_list3 **nodelists; + struct array_cache *array[NR_CPUS]; /* - * Do not add fields after nodelists[] + * Do not add fields after array[] */ }; diff --git a/mm/slab.c b/mm/slab.c index ef8ceb726e71..c3cb3598555a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ +static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; static struct kmem_cache cache_cache = { + .nodelists = cache_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, @@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void) cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* - * struct kmem_cache size depends on nr_node_ids, which - * can be less than MAX_NUMNODES. + * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids */ - cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + - nr_node_ids * sizeof(struct kmem_list3 *); + cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + + nr_node_ids * sizeof(struct kmem_list3 *); #if DEBUG cache_cache.obj_size = cache_cache.buffer_size; #endif @@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (!cachep) goto oops; + cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; #if DEBUG cachep->obj_size = size; From 7ea466f2256b02a7047dfd47d76a2f6c1e427e3e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 21 Jul 2011 09:42:45 +0900 Subject: [PATCH 10/10] slab: fix DEBUG_SLAB warning In commit c225150b "slab: fix DEBUG_SLAB build", "if ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))" is always true if ARCH_SLAB_MINALIGN == 0. Do not print warning if ARCH_SLAB_MINALIGN == 0. Signed-off-by: Tetsuo Handa Signed-off-by: Pekka Enberg --- mm/slab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index c3cb3598555a..dc2f068c0b7d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3155,7 +3155,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1)) { + if (ARCH_SLAB_MINALIGN && + ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", objp, (int)ARCH_SLAB_MINALIGN); }