2007-05-07 05:49:36 +08:00
|
|
|
#ifndef _LINUX_SLUB_DEF_H
|
|
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SLUB : A Slab allocator without object queues.
|
|
|
|
*
|
2008-07-05 00:59:22 +08:00
|
|
|
* (C) 2007 SGI, Christoph Lameter
|
2007-05-07 05:49:36 +08:00
|
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/gfp.h>
|
2011-11-24 09:12:59 +08:00
|
|
|
#include <linux/bug.h>
|
2007-05-07 05:49:36 +08:00
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/kobject.h>
|
|
|
|
|
2010-10-21 17:29:19 +08:00
|
|
|
#include <linux/kmemleak.h>
|
2010-05-26 17:22:17 +08:00
|
|
|
|
2008-02-08 09:47:41 +08:00
|
|
|
enum stat_item {
|
|
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
|
|
FREE_FASTPATH, /* Free to cpu slub */
|
|
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
2012-02-03 23:34:56 +08:00
|
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
2008-02-08 09:47:41 +08:00
|
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
2011-06-02 01:25:57 +08:00
|
|
|
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
2008-02-08 09:47:41 +08:00
|
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
2011-06-02 01:25:58 +08:00
|
|
|
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
2008-04-15 00:11:40 +08:00
|
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
2011-03-23 02:35:00 +08:00
|
|
|
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
2011-06-02 01:25:49 +08:00
|
|
|
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
2011-08-10 05:12:27 +08:00
|
|
|
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
2012-02-03 23:34:56 +08:00
|
|
|
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
|
|
|
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
|
|
|
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
2008-02-08 09:47:41 +08:00
|
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
|
2007-10-16 16:26:05 +08:00
|
|
|
struct kmem_cache_cpu {
|
2011-02-26 01:38:54 +08:00
|
|
|
void **freelist; /* Pointer to next available object */
|
|
|
|
unsigned long tid; /* Globally unique transaction id */
|
2008-01-08 15:20:31 +08:00
|
|
|
struct page *page; /* The slab from which we are allocating */
|
2011-08-10 05:12:27 +08:00
|
|
|
struct page *partial; /* Partially allocated frozen slabs */
|
2008-02-08 09:47:41 +08:00
|
|
|
#ifdef CONFIG_SLUB_STATS
|
|
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
|
|
#endif
|
2007-10-16 16:26:08 +08:00
|
|
|
};
|
2007-10-16 16:26:05 +08:00
|
|
|
|
2008-04-15 00:11:31 +08:00
|
|
|
/*
|
|
|
|
* Word size structure that can be atomically updated or read and that
|
|
|
|
* contains both the order and the number of objects that a slab of the
|
|
|
|
* given order would contain.
|
|
|
|
*/
|
|
|
|
struct kmem_cache_order_objects {
|
|
|
|
unsigned long x;
|
|
|
|
};
|
|
|
|
|
2007-05-07 05:49:36 +08:00
|
|
|
/*
|
|
|
|
* Slab cache management.
|
|
|
|
*/
|
|
|
|
struct kmem_cache {
|
2010-08-07 20:29:22 +08:00
|
|
|
struct kmem_cache_cpu __percpu *cpu_slab;
|
2007-05-07 05:49:36 +08:00
|
|
|
/* Used for retriving partial slabs etc */
|
|
|
|
unsigned long flags;
|
2011-02-26 01:38:51 +08:00
|
|
|
unsigned long min_partial;
|
2007-05-07 05:49:36 +08:00
|
|
|
int size; /* The size of an object including meta data */
|
2012-06-13 23:24:57 +08:00
|
|
|
int object_size; /* The size of an object without meta data */
|
2007-05-07 05:49:36 +08:00
|
|
|
int offset; /* Free pointer offset. */
|
2011-09-01 11:32:18 +08:00
|
|
|
int cpu_partial; /* Number of per cpu partial objects to keep around */
|
2008-04-15 00:11:31 +08:00
|
|
|
struct kmem_cache_order_objects oo;
|
2007-05-07 05:49:36 +08:00
|
|
|
|
|
|
|
/* Allocation and freeing of slabs */
|
2008-04-15 00:11:40 +08:00
|
|
|
struct kmem_cache_order_objects max;
|
2008-04-15 00:11:40 +08:00
|
|
|
struct kmem_cache_order_objects min;
|
2008-02-15 06:21:32 +08:00
|
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
2007-05-07 05:49:36 +08:00
|
|
|
int refcount; /* Refcount for slab cache destroy */
|
2008-07-26 10:45:34 +08:00
|
|
|
void (*ctor)(void *);
|
2007-05-07 05:49:36 +08:00
|
|
|
int inuse; /* Offset to metadata */
|
|
|
|
int align; /* Alignment */
|
2011-03-10 15:21:48 +08:00
|
|
|
int reserved; /* Reserved bytes at the end of slabs */
|
2007-05-07 05:49:36 +08:00
|
|
|
const char *name; /* Name (only for display!) */
|
|
|
|
struct list_head list; /* List of slab caches */
|
2010-10-06 02:57:26 +08:00
|
|
|
#ifdef CONFIG_SYSFS
|
2007-05-07 05:49:36 +08:00
|
|
|
struct kobject kobj; /* For sysfs */
|
2007-07-17 19:03:24 +08:00
|
|
|
#endif
|
2012-12-19 06:22:27 +08:00
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
|
struct memcg_cache_params *memcg_params;
|
slub: slub-specific propagation changes
SLUB allows us to tune a particular cache behavior with sysfs-based
tunables. When creating a new memcg cache copy, we'd like to preserve any
tunables the parent cache already had.
This can be done by tapping into the store attribute function provided by
the allocator. We of course don't need to mess with read-only fields.
Since the attributes can have multiple types and are stored internally by
sysfs, the best strategy is to issue a ->show() in the root cache, and
then ->store() in the memcg cache.
The drawback of that, is that sysfs can allocate up to a page in buffering
for show(), that we are likely not to need, but also can't guarantee. To
avoid always allocating a page for that, we can update the caches at store
time with the maximum attribute size ever stored to the root cache. We
will then get a buffer big enough to hold it. The corolary to this, is
that if no stores happened, nothing will be propagated.
It can also happen that a root cache has its tunables updated during
normal system operation. In this case, we will propagate the change to
all caches that are already active.
[akpm@linux-foundation.org: tweak code to avoid __maybe_unused]
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-19 06:23:05 +08:00
|
|
|
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
2012-12-19 06:22:27 +08:00
|
|
|
#endif
|
2007-05-07 05:49:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-08 15:20:26 +08:00
|
|
|
/*
|
|
|
|
* Defragmentation by allocating from a remote node.
|
|
|
|
*/
|
|
|
|
int remote_node_defrag_ratio;
|
2007-05-07 05:49:36 +08:00
|
|
|
#endif
|
2010-09-28 21:10:26 +08:00
|
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
2007-05-07 05:49:36 +08:00
|
|
|
};
|
|
|
|
|
slob: initial NUMA support
This adds preliminary NUMA support to SLOB, primarily aimed at systems with
small nodes (tested all the way down to a 128kB SRAM block), whether
asymmetric or otherwise.
We follow the same conventions as SLAB/SLUB, preferring current node
placement for new pages, or with explicit placement, if a node has been
specified. Presently on UP NUMA this has the side-effect of preferring
node#0 allocations (since numa_node_id() == 0, though this could be
reworked if we could hand off a pfn to determine node placement), so
single-CPU NUMA systems will want to place smaller nodes further out in
terms of node id. Once a page has been bound to a node (via explicit node
id typing), we only do block allocations from partial free pages that have
a matching node id in the page flags.
The current implementation does have some scalability problems, in that all
partial free pages are tracked in the global freelist (with contention due
to the single spinlock). However, these are things that are being reworked
for SMP scalability first, while things like per-node freelists can easily
be built on top of this sort of functionality once it's been added.
More background can be found in:
http://marc.info/?l=linux-mm&m=118117916022379&w=2
http://marc.info/?l=linux-mm&m=118170446306199&w=2
http://marc.info/?l=linux-mm&m=118187859420048&w=2
and subsequent threads.
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:38:22 +08:00
|
|
|
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
|
|
|
|
void *__kmalloc(size_t size, gfp_t flags);
|
|
|
|
|
2010-10-21 17:29:19 +08:00
|
|
|
static __always_inline void *
|
|
|
|
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
|
|
|
{
|
2012-12-19 06:22:48 +08:00
|
|
|
void *ret;
|
|
|
|
|
|
|
|
flags |= (__GFP_COMP | __GFP_KMEMCG);
|
|
|
|
ret = (void *) __get_free_pages(flags, order);
|
2010-10-21 17:29:19 +08:00
|
|
|
kmemleak_alloc(ret, size, 1, flags);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-07-08 02:36:37 +08:00
|
|
|
/**
|
|
|
|
* Calling this on allocated memory will check that the memory
|
|
|
|
* is expected to be in use, and print warnings if not.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_SLUB_DEBUG
|
|
|
|
extern bool verify_mem_not_deleted(const void *x);
|
|
|
|
#else
|
|
|
|
static inline bool verify_mem_not_deleted(const void *x)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-12-11 15:45:30 +08:00
|
|
|
#ifdef CONFIG_TRACING
|
2010-10-21 17:29:19 +08:00
|
|
|
extern void *
|
|
|
|
kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
|
|
|
|
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
|
2008-08-20 01:43:26 +08:00
|
|
|
#else
|
|
|
|
static __always_inline void *
|
2010-10-21 17:29:19 +08:00
|
|
|
kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
2008-08-20 01:43:26 +08:00
|
|
|
{
|
|
|
|
return kmem_cache_alloc(s, gfpflags);
|
|
|
|
}
|
2010-10-21 17:29:19 +08:00
|
|
|
|
|
|
|
static __always_inline void *
|
|
|
|
kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
|
|
|
{
|
|
|
|
return kmalloc_order(size, flags, order);
|
|
|
|
}
|
2008-08-20 01:43:26 +08:00
|
|
|
#endif
|
|
|
|
|
2008-02-12 04:47:46 +08:00
|
|
|
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
|
|
|
|
{
|
2008-08-20 01:43:26 +08:00
|
|
|
unsigned int order = get_order(size);
|
2010-10-21 17:29:19 +08:00
|
|
|
return kmalloc_order_trace(size, flags, order);
|
2008-02-12 04:47:46 +08:00
|
|
|
}
|
|
|
|
|
2007-08-31 15:48:45 +08:00
|
|
|
static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
2007-05-07 05:49:36 +08:00
|
|
|
{
|
2007-10-16 16:24:38 +08:00
|
|
|
if (__builtin_constant_p(size)) {
|
2013-01-11 03:14:19 +08:00
|
|
|
if (size > KMALLOC_MAX_CACHE_SIZE)
|
2008-02-12 04:47:46 +08:00
|
|
|
return kmalloc_large(size, flags);
|
2007-05-07 05:49:36 +08:00
|
|
|
|
2013-01-11 03:14:19 +08:00
|
|
|
if (!(flags & GFP_DMA)) {
|
|
|
|
int index = kmalloc_index(size);
|
2007-10-16 16:24:38 +08:00
|
|
|
|
2013-01-11 03:14:19 +08:00
|
|
|
if (!index)
|
2007-10-16 16:24:38 +08:00
|
|
|
return ZERO_SIZE_PTR;
|
2007-05-07 05:49:36 +08:00
|
|
|
|
2013-01-11 03:14:19 +08:00
|
|
|
return kmem_cache_alloc_trace(kmalloc_caches[index],
|
|
|
|
flags, size);
|
2007-10-16 16:24:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return __kmalloc(size, flags);
|
2007-05-07 05:49:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
slob: initial NUMA support
This adds preliminary NUMA support to SLOB, primarily aimed at systems with
small nodes (tested all the way down to a 128kB SRAM block), whether
asymmetric or otherwise.
We follow the same conventions as SLAB/SLUB, preferring current node
placement for new pages, or with explicit placement, if a node has been
specified. Presently on UP NUMA this has the side-effect of preferring
node#0 allocations (since numa_node_id() == 0, though this could be
reworked if we could hand off a pfn to determine node placement), so
single-CPU NUMA systems will want to place smaller nodes further out in
terms of node id. Once a page has been bound to a node (via explicit node
id typing), we only do block allocations from partial free pages that have
a matching node id in the page flags.
The current implementation does have some scalability problems, in that all
partial free pages are tracked in the global freelist (with contention due
to the single spinlock). However, these are things that are being reworked
for SMP scalability first, while things like per-node freelists can easily
be built on top of this sort of functionality once it's been added.
More background can be found in:
http://marc.info/?l=linux-mm&m=118117916022379&w=2
http://marc.info/?l=linux-mm&m=118170446306199&w=2
http://marc.info/?l=linux-mm&m=118187859420048&w=2
and subsequent threads.
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:38:22 +08:00
|
|
|
void *__kmalloc_node(size_t size, gfp_t flags, int node);
|
|
|
|
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
|
2007-05-07 05:49:36 +08:00
|
|
|
|
2009-12-11 15:45:30 +08:00
|
|
|
#ifdef CONFIG_TRACING
|
2010-10-21 17:29:19 +08:00
|
|
|
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
2008-08-20 01:43:26 +08:00
|
|
|
gfp_t gfpflags,
|
2010-10-21 17:29:19 +08:00
|
|
|
int node, size_t size);
|
2008-08-20 01:43:26 +08:00
|
|
|
#else
|
|
|
|
static __always_inline void *
|
2010-10-21 17:29:19 +08:00
|
|
|
kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
2008-08-20 01:43:26 +08:00
|
|
|
gfp_t gfpflags,
|
2010-10-21 17:29:19 +08:00
|
|
|
int node, size_t size)
|
2008-08-20 01:43:26 +08:00
|
|
|
{
|
|
|
|
return kmem_cache_alloc_node(s, gfpflags, node);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-08-31 15:48:45 +08:00
|
|
|
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
2007-05-07 05:49:36 +08:00
|
|
|
{
|
2007-10-16 16:24:38 +08:00
|
|
|
if (__builtin_constant_p(size) &&
|
2013-01-11 03:14:19 +08:00
|
|
|
size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
|
|
|
|
int index = kmalloc_index(size);
|
2007-05-07 05:49:36 +08:00
|
|
|
|
2013-01-11 03:14:19 +08:00
|
|
|
if (!index)
|
2007-06-09 04:46:49 +08:00
|
|
|
return ZERO_SIZE_PTR;
|
2007-05-07 05:49:36 +08:00
|
|
|
|
2013-01-11 03:14:19 +08:00
|
|
|
return kmem_cache_alloc_node_trace(kmalloc_caches[index],
|
|
|
|
flags, node, size);
|
2007-10-16 16:24:38 +08:00
|
|
|
}
|
|
|
|
return __kmalloc_node(size, flags, node);
|
2007-05-07 05:49:36 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|