2007-05-07 05:49:36 +08:00
|
|
|
#ifndef _LINUX_SLUB_DEF_H
|
|
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SLUB : A Slab allocator without object queues.
|
|
|
|
*
|
2008-07-05 00:59:22 +08:00
|
|
|
* (C) 2007 SGI, Christoph Lameter
|
2007-05-07 05:49:36 +08:00
|
|
|
*/
|
|
|
|
#include <linux/kobject.h>
|
|
|
|
|
2008-02-08 09:47:41 +08:00
|
|
|
enum stat_item {
|
|
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
2013-11-08 20:47:36 +08:00
|
|
|
FREE_FASTPATH, /* Free to cpu slab */
|
2008-02-08 09:47:41 +08:00
|
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
2012-02-03 23:34:56 +08:00
|
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
2008-02-08 09:47:41 +08:00
|
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
2011-06-02 01:25:57 +08:00
|
|
|
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
2008-02-08 09:47:41 +08:00
|
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
2011-06-02 01:25:58 +08:00
|
|
|
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
2008-04-15 00:11:40 +08:00
|
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
2011-03-23 02:35:00 +08:00
|
|
|
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
2011-06-02 01:25:49 +08:00
|
|
|
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
2011-08-10 05:12:27 +08:00
|
|
|
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
2012-02-03 23:34:56 +08:00
|
|
|
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
|
|
|
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
|
|
|
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
2008-02-08 09:47:41 +08:00
|
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
|
2007-10-16 16:26:05 +08:00
|
|
|
struct kmem_cache_cpu {
|
2011-02-26 01:38:54 +08:00
|
|
|
void **freelist; /* Pointer to next available object */
|
|
|
|
unsigned long tid; /* Globally unique transaction id */
|
2008-01-08 15:20:31 +08:00
|
|
|
struct page *page; /* The slab from which we are allocating */
|
2011-08-10 05:12:27 +08:00
|
|
|
struct page *partial; /* Partially allocated frozen slabs */
|
2008-02-08 09:47:41 +08:00
|
|
|
#ifdef CONFIG_SLUB_STATS
|
|
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
|
|
#endif
|
2007-10-16 16:26:08 +08:00
|
|
|
};
|
2007-10-16 16:26:05 +08:00
|
|
|
|
2008-04-15 00:11:31 +08:00
|
|
|
/*
|
|
|
|
* Word size structure that can be atomically updated or read and that
|
|
|
|
* contains both the order and the number of objects that a slab of the
|
|
|
|
* given order would contain.
|
|
|
|
*/
|
|
|
|
struct kmem_cache_order_objects {
|
|
|
|
unsigned long x;
|
|
|
|
};
|
|
|
|
|
2007-05-07 05:49:36 +08:00
|
|
|
/*
|
|
|
|
* Slab cache management.
|
|
|
|
*/
|
|
|
|
struct kmem_cache {
|
2010-08-07 20:29:22 +08:00
|
|
|
struct kmem_cache_cpu __percpu *cpu_slab;
|
2007-05-07 05:49:36 +08:00
|
|
|
/* Used for retriving partial slabs etc */
|
|
|
|
unsigned long flags;
|
2011-02-26 01:38:51 +08:00
|
|
|
unsigned long min_partial;
|
2007-05-07 05:49:36 +08:00
|
|
|
int size; /* The size of an object including meta data */
|
2012-06-13 23:24:57 +08:00
|
|
|
int object_size; /* The size of an object without meta data */
|
2007-05-07 05:49:36 +08:00
|
|
|
int offset; /* Free pointer offset. */
|
2011-09-01 11:32:18 +08:00
|
|
|
int cpu_partial; /* Number of per cpu partial objects to keep around */
|
2008-04-15 00:11:31 +08:00
|
|
|
struct kmem_cache_order_objects oo;
|
2007-05-07 05:49:36 +08:00
|
|
|
|
|
|
|
/* Allocation and freeing of slabs */
|
2008-04-15 00:11:40 +08:00
|
|
|
struct kmem_cache_order_objects max;
|
2008-04-15 00:11:40 +08:00
|
|
|
struct kmem_cache_order_objects min;
|
2008-02-15 06:21:32 +08:00
|
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
2007-05-07 05:49:36 +08:00
|
|
|
int refcount; /* Refcount for slab cache destroy */
|
2008-07-26 10:45:34 +08:00
|
|
|
void (*ctor)(void *);
|
2007-05-07 05:49:36 +08:00
|
|
|
int inuse; /* Offset to metadata */
|
|
|
|
int align; /* Alignment */
|
2011-03-10 15:21:48 +08:00
|
|
|
int reserved; /* Reserved bytes at the end of slabs */
|
2017-07-07 06:36:28 +08:00
|
|
|
int red_left_pad; /* Left redzone padding size */
|
2007-05-07 05:49:36 +08:00
|
|
|
const char *name; /* Name (only for display!) */
|
|
|
|
struct list_head list; /* List of slab caches */
|
2010-10-06 02:57:26 +08:00
|
|
|
#ifdef CONFIG_SYSFS
|
2007-05-07 05:49:36 +08:00
|
|
|
struct kobject kobj; /* For sysfs */
|
slub: make sysfs file removal asynchronous
Commit bf5eb3de3847 ("slub: separate out sysfs_slab_release() from
sysfs_slab_remove()") made slub sysfs file removals synchronous to
kmem_cache shutdown.
Unfortunately, this created a possible ABBA deadlock between slab_mutex
and sysfs draining mechanism triggering the following lockdep warning.
======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-test+ #48 Not tainted
-------------------------------------------------------
rmmod/1211 is trying to acquire lock:
(s_active#120){++++.+}, at: [<ffffffff81308073>] kernfs_remove+0x23/0x40
but task is already holding lock:
(slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (slab_mutex){+.+.+.}:
lock_acquire+0xf6/0x1f0
__mutex_lock+0x75/0x950
mutex_lock_nested+0x1b/0x20
slab_attr_store+0x75/0xd0
sysfs_kf_write+0x45/0x60
kernfs_fop_write+0x13c/0x1c0
__vfs_write+0x28/0x120
vfs_write+0xc8/0x1e0
SyS_write+0x49/0xa0
entry_SYSCALL_64_fastpath+0x1f/0xc2
-> #0 (s_active#120){++++.+}:
__lock_acquire+0x10ed/0x1260
lock_acquire+0xf6/0x1f0
__kernfs_remove+0x254/0x320
kernfs_remove+0x23/0x40
sysfs_remove_dir+0x51/0x80
kobject_del+0x18/0x50
__kmem_cache_shutdown+0x3e6/0x460
kmem_cache_destroy+0x1fb/0x2d0
kvm_exit+0x2d/0x80 [kvm]
vmx_exit+0x19/0xa1b [kvm_intel]
SyS_delete_module+0x198/0x1f0
entry_SYSCALL_64_fastpath+0x1f/0xc2
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(slab_mutex);
lock(s_active#120);
lock(slab_mutex);
lock(s_active#120);
*** DEADLOCK ***
2 locks held by rmmod/1211:
#0: (cpu_hotplug.dep_map){++++++}, at: [<ffffffff810a7877>] get_online_cpus+0x37/0x80
#1: (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0
stack backtrace:
CPU: 3 PID: 1211 Comm: rmmod Not tainted 4.10.0-test+ #48
Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
Call Trace:
print_circular_bug+0x1be/0x210
__lock_acquire+0x10ed/0x1260
lock_acquire+0xf6/0x1f0
__kernfs_remove+0x254/0x320
kernfs_remove+0x23/0x40
sysfs_remove_dir+0x51/0x80
kobject_del+0x18/0x50
__kmem_cache_shutdown+0x3e6/0x460
kmem_cache_destroy+0x1fb/0x2d0
kvm_exit+0x2d/0x80 [kvm]
vmx_exit+0x19/0xa1b [kvm_intel]
SyS_delete_module+0x198/0x1f0
? SyS_delete_module+0x5/0x1f0
entry_SYSCALL_64_fastpath+0x1f/0xc2
It'd be the cleanest to deal with the issue by removing sysfs files
without holding slab_mutex before the rest of shutdown; however, given
the current code structure, it is pretty difficult to do so.
This patch punts sysfs file removal to a work item. Before commit
bf5eb3de3847, the removal was punted to a RCU delayed work item which is
executed after release. Now, we're punting to a different work item on
shutdown which still maintains the goal removing the sysfs files earlier
when destroying kmem_caches.
Link: http://lkml.kernel.org/r/20170620204512.GI21326@htj.duckdns.org
Fixes: bf5eb3de3847 ("slub: separate out sysfs_slab_release() from sysfs_slab_remove()")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-06-24 06:08:52 +08:00
|
|
|
struct work_struct kobj_remove_work;
|
2007-07-17 19:03:24 +08:00
|
|
|
#endif
|
2016-01-21 07:02:32 +08:00
|
|
|
#ifdef CONFIG_MEMCG
|
2015-02-13 06:59:20 +08:00
|
|
|
struct memcg_cache_params memcg_params;
|
slub: slub-specific propagation changes
SLUB allows us to tune a particular cache behavior with sysfs-based
tunables. When creating a new memcg cache copy, we'd like to preserve any
tunables the parent cache already had.
This can be done by tapping into the store attribute function provided by
the allocator. We of course don't need to mess with read-only fields.
Since the attributes can have multiple types and are stored internally by
sysfs, the best strategy is to issue a ->show() in the root cache, and
then ->store() in the memcg cache.
The drawback of that, is that sysfs can allocate up to a page in buffering
for show(), that we are likely not to need, but also can't guarantee. To
avoid always allocating a page for that, we can update the caches at store
time with the maximum attribute size ever stored to the root cache. We
will then get a buffer big enough to hold it. The corolary to this, is
that if no stores happened, nothing will be propagated.
It can also happen that a root cache has its tunables updated during
normal system operation. In this case, we will propagate the change to
all caches that are already active.
[akpm@linux-foundation.org: tweak code to avoid __maybe_unused]
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-19 06:23:05 +08:00
|
|
|
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
2014-04-08 06:39:31 +08:00
|
|
|
#ifdef CONFIG_SYSFS
|
|
|
|
struct kset *memcg_kset;
|
|
|
|
#endif
|
2012-12-19 06:22:27 +08:00
|
|
|
#endif
|
2007-05-07 05:49:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-08 15:20:26 +08:00
|
|
|
/*
|
|
|
|
* Defragmentation by allocating from a remote node.
|
|
|
|
*/
|
|
|
|
int remote_node_defrag_ratio;
|
2007-05-07 05:49:36 +08:00
|
|
|
#endif
|
2016-07-27 06:21:59 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
|
|
|
unsigned int *random_seq;
|
|
|
|
#endif
|
|
|
|
|
2016-07-29 06:49:07 +08:00
|
|
|
#ifdef CONFIG_KASAN
|
|
|
|
struct kasan_cache kasan_info;
|
|
|
|
#endif
|
|
|
|
|
2010-09-28 21:10:26 +08:00
|
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
2007-05-07 05:49:36 +08:00
|
|
|
};
|
|
|
|
|
2014-05-07 03:50:08 +08:00
|
|
|
#ifdef CONFIG_SYSFS
|
|
|
|
#define SLAB_SUPPORTS_SYSFS
|
2017-02-23 07:41:11 +08:00
|
|
|
void sysfs_slab_release(struct kmem_cache *);
|
2014-05-07 03:50:08 +08:00
|
|
|
#else
|
2017-02-23 07:41:11 +08:00
|
|
|
static inline void sysfs_slab_release(struct kmem_cache *s)
|
2014-05-07 03:50:08 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-02-14 06:39:35 +08:00
|
|
|
void object_err(struct kmem_cache *s, struct page *page,
|
|
|
|
u8 *object, char *reason);
|
|
|
|
|
2016-07-29 06:49:04 +08:00
|
|
|
void *fixup_red_left(struct kmem_cache *s, void *p);
|
|
|
|
|
2016-03-26 05:21:59 +08:00
|
|
|
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
|
|
|
void *x) {
|
|
|
|
void *object = x - (x - page_address(page)) % cache->size;
|
|
|
|
void *last_object = page_address(page) +
|
|
|
|
(page->objects - 1) * cache->size;
|
2016-07-29 06:49:04 +08:00
|
|
|
void *result = (unlikely(object > last_object)) ? last_object : object;
|
|
|
|
|
|
|
|
result = fixup_red_left(cache, result);
|
|
|
|
return result;
|
2016-03-26 05:21:59 +08:00
|
|
|
}
|
|
|
|
|
2007-05-07 05:49:36 +08:00
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|