From 42c8c99cd891184bf4bcf6f09d62c54e42599453 Mon Sep 17 00:00:00 2001 From: Zhao Jin Date: Sat, 27 Aug 2011 00:26:17 +0800 Subject: [PATCH 1/7] slab, cleanup: remove unneeded return The procedure ends right after the if-statement, so remove ``return''. Also move the last common statement outside. Signed-off-by: Zhao Jin Acked-by: David Rientjes Acked-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index f0bd7857ab3b..806a754fad8e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3693,13 +3693,12 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, if (likely(ac->avail < ac->limit)) { STATS_INC_FREEHIT(cachep); - ac->entry[ac->avail++] = objp; - return; } else { STATS_INC_FREEMISS(cachep); cache_flusharray(cachep, ac); - ac->entry[ac->avail++] = objp; } + + ac->entry[ac->avail++] = objp; } /** From 0ad9500e16fe24aa55809a2b00e0d2d0e658fc71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Dec 2011 16:25:34 +0100 Subject: [PATCH 2/7] slub: prefetch next freelist pointer in slab_alloc() Recycling a page is a problem, since freelist link chain is hot on cpu(s) which freed objects, and possibly very cold on cpu currently owning slab. Adding a prefetch of cache line containing the pointer to next object in slab_alloc() helps a lot in many workloads, in particular on assymetric ones (allocations done on one cpu, frees on another cpus). Added cost is three machine instructions only. Examples on my dual socket quad core ht machine (Intel CPU E5540 @2.53GHz) (16 logical cpus, 2 memory nodes), 64bit kernel. Before patch : # perf stat -r 32 hackbench 50 process 4000 >/dev/null Performance counter stats for 'hackbench 50 process 4000' (32 runs): 327577,471718 task-clock # 15,821 CPUs utilized ( +- 0,64% ) 28 866 491 context-switches # 0,088 M/sec ( +- 1,80% ) 1 506 929 CPU-migrations # 0,005 M/sec ( +- 3,24% ) 127 151 page-faults # 0,000 M/sec ( +- 0,16% ) 829 399 813 448 cycles # 2,532 GHz ( +- 0,64% ) 580 664 691 740 stalled-cycles-frontend # 70,01% frontend cycles idle ( +- 0,71% ) 197 431 700 448 stalled-cycles-backend # 23,80% backend cycles idle ( +- 1,03% ) 503 548 648 975 instructions # 0,61 insns per cycle # 1,15 stalled cycles per insn ( +- 0,46% ) 95 780 068 471 branches # 292,389 M/sec ( +- 0,48% ) 1 426 407 916 branch-misses # 1,49% of all branches ( +- 1,35% ) 20,705679994 seconds time elapsed ( +- 0,64% ) After patch : # perf stat -r 32 hackbench 50 process 4000 >/dev/null Performance counter stats for 'hackbench 50 process 4000' (32 runs): 286236,542804 task-clock # 15,786 CPUs utilized ( +- 1,32% ) 19 703 372 context-switches # 0,069 M/sec ( +- 4,99% ) 1 658 249 CPU-migrations # 0,006 M/sec ( +- 6,62% ) 126 776 page-faults # 0,000 M/sec ( +- 0,12% ) 724 636 593 213 cycles # 2,532 GHz ( +- 1,32% ) 499 320 714 837 stalled-cycles-frontend # 68,91% frontend cycles idle ( +- 1,47% ) 156 555 126 809 stalled-cycles-backend # 21,60% backend cycles idle ( +- 2,22% ) 463 897 792 661 instructions # 0,64 insns per cycle # 1,08 stalled cycles per insn ( +- 0,94% ) 87 717 352 563 branches # 306,451 M/sec ( +- 0,99% ) 941 738 280 branch-misses # 1,07% of all branches ( +- 3,35% ) 18,132070670 seconds time elapsed ( +- 1,30% ) Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter CC: Matt Mackall CC: David Rientjes CC: "Alex,Shi" CC: Shaohua Li Signed-off-by: Pekka Enberg --- mm/slub.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 4907563ef7ff..5b915e86a9b0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -269,6 +269,11 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) return *(void **)(object + s->offset); } +static void prefetch_freepointer(const struct kmem_cache *s, void *object) +{ + prefetch(object + s->offset); +} + static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) { void *p; @@ -2309,6 +2314,8 @@ redo: object = __slab_alloc(s, gfpflags, node, addr, c); else { + void *next_object = get_freepointer_safe(s, object); + /* * The cmpxchg will only match if there was no additional * operation and if we are on the right processor. @@ -2324,11 +2331,12 @@ redo: if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, object, tid, - get_freepointer_safe(s, object), next_tid(tid)))) { + next_object, next_tid(tid)))) { note_cmpxchg_failure("slab_alloc", s, tid); goto redo; } + prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } From 66c4c35c6bc5a1a452b024cf0364635b28fd94e4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jan 2012 09:27:31 -0600 Subject: [PATCH 3/7] slub: Do not hold slub_lock when calling sysfs_slab_add() sysfs_slab_add() calls various sysfs functions that actually may end up in userspace doing all sorts of things. Release the slub_lock after adding the kmem_cache structure to the list. At that point the address of the kmem_cache is not known so we are guaranteed exlusive access to the following modifications to the kmem_cache structure. If the sysfs_slab_add fails then reacquire the slub_lock to remove the kmem_cache structure from the list. Cc: # 3.3+ Reported-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 5b915e86a9b0..bc7a8af24f16 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3937,13 +3937,14 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, if (kmem_cache_open(s, n, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); + up_write(&slub_lock); if (sysfs_slab_add(s)) { + down_write(&slub_lock); list_del(&s->list); kfree(n); kfree(s); goto err; } - up_write(&slub_lock); return s; } kfree(n); From 4de900b4d6b2216b7443d32e263f5de9078697a3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 30 Jan 2012 15:53:51 -0600 Subject: [PATCH 4/7] slub: include include for prefetch Otherwise m68k breaks: On Mon, 30 Jan 2012, Geert Uytterhoeven wrote: > m68k/allmodconfig at http://kisskb.ellerman.id.au/kisskb/buildresult/5527349/ > > mm/slub.c:274: error: implicit declaration of function 'prefetch' > > Sorry, didn't notice it earlier due to other build breakage in -next. Reported-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index bc7a8af24f16..b6666eb3d9c4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -29,6 +29,7 @@ #include #include #include +#include #include From 8028dcea8abbbd51b5156e40ea214c20b559cd01 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 3 Feb 2012 23:34:56 +0800 Subject: [PATCH 5/7] slub: per cpu partial statistics change This patch split the cpu_partial_free into 2 parts: cpu_partial_node, PCP refilling times from node partial; and same name cpu_partial_free, PCP refilling times in slab_free slow path. A new statistic 'cpu_partial_drain' is added to get PCP drain to node partial times. These info are useful when do PCP tunning. The slabinfo.c code is unchanged, since cpu_partial_node is not on slow path. Signed-off-by: Alex Shi Acked-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 6 ++++-- mm/slub.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a32bcfdc7834..6388a6681af1 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -21,7 +21,7 @@ enum stat_item { FREE_FROZEN, /* Freeing to frozen slab */ FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ FREE_REMOVE_PARTIAL, /* Freeing removes last object */ - ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */ + ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */ ALLOC_SLAB, /* Cpu slab acquired from page allocator */ ALLOC_REFILL, /* Refill cpu slab from slab freelist */ ALLOC_NODE_MISMATCH, /* Switching cpu slab */ @@ -37,7 +37,9 @@ enum stat_item { CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ - CPU_PARTIAL_FREE, /* USed cpu partial on free */ + CPU_PARTIAL_FREE, /* Refill cpu partial on free */ + CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ + CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ NR_SLUB_STAT_ITEMS }; struct kmem_cache_cpu { diff --git a/mm/slub.c b/mm/slub.c index b6666eb3d9c4..24132edcfe33 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1566,6 +1566,7 @@ static void *get_partial_node(struct kmem_cache *s, } else { page->freelist = t; available = put_cpu_partial(s, page, 0); + stat(s, CPU_PARTIAL_NODE); } if (kmem_cache_debug(s) || available > s->cpu_partial / 2) break; @@ -1979,6 +1980,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) local_irq_restore(flags); pobjects = 0; pages = 0; + stat(s, CPU_PARTIAL_DRAIN); } } @@ -1990,7 +1992,6 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - stat(s, CPU_PARTIAL_FREE); return pobjects; } @@ -2474,9 +2475,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * If we just froze the page then put it onto the * per cpu partial list. */ - if (new.frozen && !was_frozen) + if (new.frozen && !was_frozen) { put_cpu_partial(s, page, 1); - + stat(s, CPU_PARTIAL_FREE); + } /* * The list lock was not taken therefore no list * activity can be necessary. @@ -5069,6 +5071,8 @@ STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); +STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); +STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); #endif static struct attribute *slab_attrs[] = { @@ -5134,6 +5138,8 @@ static struct attribute *slab_attrs[] = { &cmpxchg_double_cpu_fail_attr.attr, &cpu_partial_alloc_attr.attr, &cpu_partial_free_attr.attr, + &cpu_partial_node_attr.attr, + &cpu_partial_drain_attr.attr, #endif #ifdef CONFIG_FAILSLAB &failslab_attr.attr, From a8203725dfded5c1f79dca3368a4a273e24b59bb Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 5 Mar 2012 15:14:41 -0800 Subject: [PATCH 6/7] slab: introduce kmalloc_array() Introduce a kmalloc_array() wrapper that performs integer overflow checking without zeroing the memory. Suggested-by: Andrew Morton Suggested-by: Jens Axboe Signed-off-by: Xi Wang Cc: Dan Carpenter Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- include/linux/slab.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 573c809c33d9..a595dce6b0c7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -190,7 +190,7 @@ size_t ksize(const void *); #endif /** - * kcalloc - allocate memory for an array. The memory is set to zero. + * kmalloc_array - allocate memory for an array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate. @@ -240,11 +240,22 @@ size_t ksize(const void *); * for general use, and so are not documented here. For a full list of * potential flags, always refer to linux/gfp.h. */ -static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { if (size != 0 && n > ULONG_MAX / size) return NULL; - return __kmalloc(n * size, flags | __GFP_ZERO); + return __kmalloc(n * size, flags); +} + +/** + * kcalloc - allocate memory for an array. The memory is set to zero. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). + */ +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kmalloc_array(n, size, flags | __GFP_ZERO); } #if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) From 8bdec192b40cf7f7eec170b317c76089eb5eeddb Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Fri, 9 Mar 2012 17:27:27 -0300 Subject: [PATCH 7/7] mm: SLAB Out-of-memory diagnostics Following the example at mm/slub.c, add out-of-memory diagnostics to the SLAB allocator to help on debugging certain OOM conditions. An example print out looks like this: SLAB: Unable to allocate memory on node 0 (gfp=0x11200) cache: bio-0, object size: 192, order: 0 node 0: slabs: 3/3, objs: 60/60, free: 0 Signed-off-by: Rafael Aquini Acked-by: Rik van Riel Acked-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slab.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 806a754fad8e..67e0e0589267 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1731,6 +1731,52 @@ static int __init cpucache_init(void) } __initcall(cpucache_init); +static noinline void +slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) +{ + struct kmem_list3 *l3; + struct slab *slabp; + unsigned long flags; + int node; + + printk(KERN_WARNING + "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", + nodeid, gfpflags); + printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", + cachep->name, cachep->buffer_size, cachep->gfporder); + + for_each_online_node(node) { + unsigned long active_objs = 0, num_objs = 0, free_objects = 0; + unsigned long active_slabs = 0, num_slabs = 0; + + l3 = cachep->nodelists[node]; + if (!l3) + continue; + + spin_lock_irqsave(&l3->list_lock, flags); + list_for_each_entry(slabp, &l3->slabs_full, list) { + active_objs += cachep->num; + active_slabs++; + } + list_for_each_entry(slabp, &l3->slabs_partial, list) { + active_objs += slabp->inuse; + active_slabs++; + } + list_for_each_entry(slabp, &l3->slabs_free, list) + num_slabs++; + + free_objects += l3->free_objects; + spin_unlock_irqrestore(&l3->list_lock, flags); + + num_slabs += active_slabs; + num_objs = num_slabs * cachep->num; + printk(KERN_WARNING + " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", + node, active_slabs, num_slabs, active_objs, num_objs, + free_objects); + } +} + /* * Interface to system's page allocator. No need to hold the cache-lock. * @@ -1757,8 +1803,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) flags |= __GFP_RECLAIMABLE; page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); - if (!page) + if (!page) { + if (!(flags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(cachep, flags, nodeid); return NULL; + } nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT)