2019-10-18 12:50:53 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Dynamic loading of modules into the kernel.
|
|
|
|
*
|
|
|
|
* Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
|
|
|
|
* Rewritten again by Rusty Russell, 2002
|
|
|
|
*/
|
2019-10-18 12:50:53 +08:00
|
|
|
|
|
|
|
#ifndef _LINUX_MODULE_H
|
|
|
|
#define _LINUX_MODULE_H
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/cache.h>
|
|
|
|
#include <linux/kmod.h>
|
2015-05-02 08:13:42 +08:00
|
|
|
#include <linux/init.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/elf.h>
|
|
|
|
#include <linux/stringify.h>
|
|
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/moduleparam.h>
|
2014-02-27 02:12:06 +08:00
|
|
|
#include <linux/jump_label.h>
|
2011-05-24 02:11:39 +08:00
|
|
|
#include <linux/export.h>
|
2015-05-27 09:39:37 +08:00
|
|
|
#include <linux/rbtree_latch.h>
|
2018-01-13 01:55:33 +08:00
|
|
|
#include <linux/error-injection.h>
|
tracepoint: Fix tracepoint array element size mismatch
commit 46e0c9be206f ("kernel: tracepoints: add support for relative
references") changes the layout of the __tracepoint_ptrs section on
architectures supporting relative references. However, it does so
without turning struct tracepoint * const into const int elsewhere in
the tracepoint code, which has the following side-effect:
Setting mod->num_tracepoints is done in by module.c:
mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
sizeof(*mod->tracepoints_ptrs),
&mod->num_tracepoints);
Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size
(rather than sizeof(int)), num_tracepoints is erroneously set to half the
size it should be on 64-bit arch. So a module with an odd number of
tracepoints misses the last tracepoint due to effect of integer
division.
So in the module going notifier:
for_each_tracepoint_range(mod->tracepoints_ptrs,
mod->tracepoints_ptrs + mod->num_tracepoints,
tp_module_going_check_quiescent, NULL);
the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually
evaluates to something within the bounds of the array, but miss the
last tracepoint if the number of tracepoints is odd on 64-bit arch.
Fix this by introducing a new typedef: tracepoint_ptr_t, which
is either "const int" on architectures that have PREL32 relocations,
or "struct tracepoint * const" on architectures that does not have
this feature.
Also provide a new tracepoint_ptr_defer() static inline to
encapsulate deferencing this type rather than duplicate code and
ugly idefs within the for_each_tracepoint_range() implementation.
This issue appears in 4.19-rc kernels, and should ideally be fixed
before the end of the rc cycle.
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com
Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-10-14 03:10:50 +08:00
|
|
|
#include <linux/tracepoint-defs.h>
|
2019-04-06 07:15:00 +08:00
|
|
|
#include <linux/srcu.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-01-05 14:34:50 +08:00
|
|
|
#include <linux/percpu.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/module.h>
|
|
|
|
|
|
|
|
/* Not Yet Implemented */
|
|
|
|
#define MODULE_SUPPORTED_DEVICE(name)
|
|
|
|
|
2008-10-22 23:00:22 +08:00
|
|
|
#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-01-16 07:48:49 +08:00
|
|
|
struct modversion_info {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long crc;
|
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct module;
|
2016-07-27 10:36:34 +08:00
|
|
|
struct exception_table_entry;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-07-24 20:36:04 +08:00
|
|
|
struct module_kobject {
|
|
|
|
struct kobject kobj;
|
|
|
|
struct module *mod;
|
|
|
|
struct kobject *drivers_dir;
|
|
|
|
struct module_param_attrs *mp;
|
2013-09-03 15:03:57 +08:00
|
|
|
struct completion *kobj_completion;
|
2016-10-28 16:22:25 +08:00
|
|
|
} __randomize_layout;
|
2011-07-24 20:36:04 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct module_attribute {
|
2011-07-24 20:36:04 +08:00
|
|
|
struct attribute attr;
|
|
|
|
ssize_t (*show)(struct module_attribute *, struct module_kobject *,
|
|
|
|
char *);
|
|
|
|
ssize_t (*store)(struct module_attribute *, struct module_kobject *,
|
2005-04-17 06:20:36 +08:00
|
|
|
const char *, size_t count);
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:05:15 +08:00
|
|
|
void (*setup)(struct module *, const char *);
|
|
|
|
int (*test)(struct module *);
|
|
|
|
void (*free)(struct module *);
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2010-12-16 06:00:19 +08:00
|
|
|
struct module_version_attribute {
|
|
|
|
struct module_attribute mattr;
|
|
|
|
const char *module_name;
|
|
|
|
const char *version;
|
2011-02-05 05:30:10 +08:00
|
|
|
} __attribute__ ((__aligned__(sizeof(void *))));
|
2010-12-16 06:00:19 +08:00
|
|
|
|
2011-02-08 08:02:27 +08:00
|
|
|
extern ssize_t __modver_version_show(struct module_attribute *,
|
2011-07-24 20:36:04 +08:00
|
|
|
struct module_kobject *, char *);
|
2011-02-08 08:02:27 +08:00
|
|
|
|
2011-07-24 20:36:04 +08:00
|
|
|
extern struct module_attribute module_uevent;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* These are either module local, or the kernel's dummy ones. */
|
|
|
|
extern int init_module(void);
|
|
|
|
extern void cleanup_module(void);
|
|
|
|
|
2015-05-02 08:13:42 +08:00
|
|
|
#ifndef MODULE
|
|
|
|
/**
|
|
|
|
* module_init() - driver initialization entry point
|
|
|
|
* @x: function to be run at kernel boot time or module insertion
|
|
|
|
*
|
|
|
|
* module_init() will either be called during do_initcalls() (if
|
|
|
|
* builtin) or at module insertion time (if a module). There can only
|
|
|
|
* be one per module.
|
|
|
|
*/
|
|
|
|
#define module_init(x) __initcall(x);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* module_exit() - driver exit entry point
|
|
|
|
* @x: function to be run when driver is removed
|
|
|
|
*
|
|
|
|
* module_exit() will wrap the driver clean-up code
|
|
|
|
* with cleanup_module() when used with rmmod when
|
|
|
|
* the driver is a module. If the driver is statically
|
|
|
|
* compiled into the kernel, module_exit() has no effect.
|
|
|
|
* There can only be one per module.
|
|
|
|
*/
|
|
|
|
#define module_exit(x) __exitcall(x);
|
|
|
|
|
|
|
|
#else /* MODULE */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In most cases loadable modules do not need custom
|
|
|
|
* initcall levels. There are still some valid cases where
|
|
|
|
* a driver may be needed early if built in, and does not
|
|
|
|
* matter when built as a loadable module. Like bus
|
|
|
|
* snooping debug drivers.
|
|
|
|
*/
|
|
|
|
#define early_initcall(fn) module_init(fn)
|
|
|
|
#define core_initcall(fn) module_init(fn)
|
|
|
|
#define core_initcall_sync(fn) module_init(fn)
|
|
|
|
#define postcore_initcall(fn) module_init(fn)
|
|
|
|
#define postcore_initcall_sync(fn) module_init(fn)
|
|
|
|
#define arch_initcall(fn) module_init(fn)
|
|
|
|
#define subsys_initcall(fn) module_init(fn)
|
|
|
|
#define subsys_initcall_sync(fn) module_init(fn)
|
|
|
|
#define fs_initcall(fn) module_init(fn)
|
|
|
|
#define fs_initcall_sync(fn) module_init(fn)
|
|
|
|
#define rootfs_initcall(fn) module_init(fn)
|
|
|
|
#define device_initcall(fn) module_init(fn)
|
|
|
|
#define device_initcall_sync(fn) module_init(fn)
|
|
|
|
#define late_initcall(fn) module_init(fn)
|
|
|
|
#define late_initcall_sync(fn) module_init(fn)
|
|
|
|
|
|
|
|
#define console_initcall(fn) module_init(fn)
|
|
|
|
|
|
|
|
/* Each module must use one module_init(). */
|
|
|
|
#define module_init(initfn) \
|
2017-02-02 01:00:14 +08:00
|
|
|
static inline initcall_t __maybe_unused __inittest(void) \
|
2015-05-02 08:13:42 +08:00
|
|
|
{ return initfn; } \
|
include/linux/module.h: copy __init/__exit attrs to init/cleanup_module
The upcoming GCC 9 release extends the -Wmissing-attributes warnings
(enabled by -Wall) to C and aliases: it warns when particular function
attributes are missing in the aliases but not in their target.
In particular, it triggers for all the init/cleanup_module
aliases in the kernel (defined by the module_init/exit macros),
ending up being very noisy.
These aliases point to the __init/__exit functions of a module,
which are defined as __cold (among other attributes). However,
the aliases themselves do not have the __cold attribute.
Since the compiler behaves differently when compiling a __cold
function as well as when compiling paths leading to calls
to __cold functions, the warning is trying to point out
the possibly-forgotten attribute in the alias.
In order to keep the warning enabled, we decided to silence
this case. Ideally, we would mark the aliases directly
as __init/__exit. However, there are currently around 132 modules
in the kernel which are missing __init/__exit in their init/cleanup
functions (either because they are missing, or for other reasons,
e.g. the functions being called from somewhere else); and
a section mismatch is a hard error.
A conservative alternative was to mark the aliases as __cold only.
However, since we would like to eventually enforce __init/__exit
to be always marked, we chose to use the new __copy function
attribute (introduced by GCC 9 as well to deal with this).
With it, we copy the attributes used by the target functions
into the aliases. This way, functions that were not marked
as __init/__exit won't have their aliases marked either,
and therefore there won't be a section mismatch.
Note that the warning would go away marking either the extern
declaration, the definition, or both. However, we only mark
the definition of the alias, since we do not want callers
(which only see the declaration) to be compiled as if the function
was __cold (and therefore the paths leading to those calls
would be assumed to be unlikely).
Link: https://lore.kernel.org/lkml/20190123173707.GA16603@gmail.com/
Link: https://lore.kernel.org/lkml/20190206175627.GA20399@gmail.com/
Suggested-by: Martin Sebor <msebor@gcc.gnu.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
2019-01-20 03:59:34 +08:00
|
|
|
int init_module(void) __copy(initfn) __attribute__((alias(#initfn)));
|
2015-05-02 08:13:42 +08:00
|
|
|
|
|
|
|
/* This is only required if you want to be unloadable. */
|
|
|
|
#define module_exit(exitfn) \
|
2017-02-02 01:00:14 +08:00
|
|
|
static inline exitcall_t __maybe_unused __exittest(void) \
|
2015-05-02 08:13:42 +08:00
|
|
|
{ return exitfn; } \
|
include/linux/module.h: copy __init/__exit attrs to init/cleanup_module
The upcoming GCC 9 release extends the -Wmissing-attributes warnings
(enabled by -Wall) to C and aliases: it warns when particular function
attributes are missing in the aliases but not in their target.
In particular, it triggers for all the init/cleanup_module
aliases in the kernel (defined by the module_init/exit macros),
ending up being very noisy.
These aliases point to the __init/__exit functions of a module,
which are defined as __cold (among other attributes). However,
the aliases themselves do not have the __cold attribute.
Since the compiler behaves differently when compiling a __cold
function as well as when compiling paths leading to calls
to __cold functions, the warning is trying to point out
the possibly-forgotten attribute in the alias.
In order to keep the warning enabled, we decided to silence
this case. Ideally, we would mark the aliases directly
as __init/__exit. However, there are currently around 132 modules
in the kernel which are missing __init/__exit in their init/cleanup
functions (either because they are missing, or for other reasons,
e.g. the functions being called from somewhere else); and
a section mismatch is a hard error.
A conservative alternative was to mark the aliases as __cold only.
However, since we would like to eventually enforce __init/__exit
to be always marked, we chose to use the new __copy function
attribute (introduced by GCC 9 as well to deal with this).
With it, we copy the attributes used by the target functions
into the aliases. This way, functions that were not marked
as __init/__exit won't have their aliases marked either,
and therefore there won't be a section mismatch.
Note that the warning would go away marking either the extern
declaration, the definition, or both. However, we only mark
the definition of the alias, since we do not want callers
(which only see the declaration) to be compiled as if the function
was __cold (and therefore the paths leading to those calls
would be assumed to be unlikely).
Link: https://lore.kernel.org/lkml/20190123173707.GA16603@gmail.com/
Link: https://lore.kernel.org/lkml/20190206175627.GA20399@gmail.com/
Suggested-by: Martin Sebor <msebor@gcc.gnu.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
2019-01-20 03:59:34 +08:00
|
|
|
void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn)));
|
2015-05-02 08:13:42 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* This means "can be init if no module support, otherwise module load
|
|
|
|
may call it." */
|
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
#define __init_or_module
|
|
|
|
#define __initdata_or_module
|
|
|
|
#define __initconst_or_module
|
|
|
|
#define __INIT_OR_MODULE .text
|
|
|
|
#define __INITDATA_OR_MODULE .data
|
|
|
|
#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits
|
|
|
|
#else
|
|
|
|
#define __init_or_module __init
|
|
|
|
#define __initdata_or_module __initdata
|
|
|
|
#define __initconst_or_module __initconst
|
|
|
|
#define __INIT_OR_MODULE __INIT
|
|
|
|
#define __INITDATA_OR_MODULE __INITDATA
|
|
|
|
#define __INITRODATA_OR_MODULE __INITRODATA
|
|
|
|
#endif /*CONFIG_MODULES*/
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Generic info of form tag = "info" */
|
|
|
|
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
|
|
|
|
|
|
|
|
/* For userspace: you can also call me... */
|
|
|
|
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
|
|
|
|
|
2013-08-20 14:03:19 +08:00
|
|
|
/* Soft module dependencies. See man modprobe.d for details.
|
|
|
|
* Example: MODULE_SOFTDEP("pre: module-foo module-bar post: module-baz")
|
|
|
|
*/
|
|
|
|
#define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep)
|
|
|
|
|
kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf
Commit bc081dd6e9f6 ("kbuild: generate modules.builtin") added
infrastructure to generate modules.builtin, the list of all
builtin modules.
Basically, it works like this:
- Kconfig generates include/config/tristate.conf, the list of
tristate CONFIG options with a value in a capital letter.
- scripts/Makefile.modbuiltin makes Kbuild descend into
directories to collect the information of builtin modules.
I am not a big fan of it because Kbuild ends up with traversing
the source tree twice.
I am not sure how perfectly it should work, but this approach cannot
avoid false positives; even if the relevant CONFIG option is tristate,
some Makefiles forces obj-m to obj-y.
Some examples are:
arch/powerpc/platforms/powermac/Makefile:
obj-$(CONFIG_NVRAM:m=y) += nvram.o
net/ipv6/Makefile:
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
net/netlabel/Makefile:
obj-$(subst m,y,$(CONFIG_IPV6)) += netlabel_calipso.o
Nobody has complained about (or noticed) it, so it is probably fine to
have false positives in modules.builtin.
This commit simplifies the implementation. Let's exploit the fact
that every module has MODULE_LICENSE(). (modpost shows a warning if
MODULE_LICENSE is missing. If so, 0-day bot would already have blocked
such a module.)
I added MODULE_FILE to <linux/module.h>. When the code is being compiled
as builtin, it will be filled with the file path of the module, and
collected into modules.builtin.info. Then, scripts/link-vmlinux.sh
extracts the list of builtin modules out of it.
This new approach fixes the false-positives above, but adds another
type of false-positives; non-modular code may have MODULE_LICENSE()
by mistake. This is not a big deal, it is just the code is always
orphan. We can clean it up if we like. You can see cleanup examples by:
$ git log --grep='make.* explicitly non-modular'
To sum up, this commits deletes lots of code, but still produces almost
equivalent results. Please note it does not increase the vmlinux size at
all. As you can see in include/asm-generic/vmlinux.lds.h, the .modinfo
section is discarded in the link stage.
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
2019-12-19 16:33:29 +08:00
|
|
|
/*
|
|
|
|
* MODULE_FILE is used for generating modules.builtin
|
|
|
|
* So, make it no-op when this is being built as a module
|
|
|
|
*/
|
|
|
|
#ifdef MODULE
|
|
|
|
#define MODULE_FILE
|
|
|
|
#else
|
|
|
|
#define MODULE_FILE MODULE_INFO(file, KBUILD_MODFILE);
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* The following license idents are currently accepted as indicating free
|
|
|
|
* software modules
|
|
|
|
*
|
2019-02-09 00:02:56 +08:00
|
|
|
* "GPL" [GNU Public License v2]
|
2005-04-17 06:20:36 +08:00
|
|
|
* "GPL v2" [GNU Public License v2]
|
|
|
|
* "GPL and additional rights" [GNU Public License v2 rights and more]
|
|
|
|
* "Dual BSD/GPL" [GNU Public License v2
|
|
|
|
* or BSD license choice]
|
2006-06-23 17:05:13 +08:00
|
|
|
* "Dual MIT/GPL" [GNU Public License v2
|
|
|
|
* or MIT license choice]
|
2005-04-17 06:20:36 +08:00
|
|
|
* "Dual MPL/GPL" [GNU Public License v2
|
|
|
|
* or Mozilla license choice]
|
|
|
|
*
|
|
|
|
* The following other idents are available
|
|
|
|
*
|
|
|
|
* "Proprietary" [Non free products]
|
|
|
|
*
|
2019-02-09 00:02:56 +08:00
|
|
|
* Both "GPL v2" and "GPL" (the latter also in dual licensed strings) are
|
|
|
|
* merely stating that the module is licensed under the GPL v2, but are not
|
|
|
|
* telling whether "GPL v2 only" or "GPL v2 or later". The reason why there
|
|
|
|
* are two variants is a historic and failed attempt to convey more
|
|
|
|
* information in the MODULE_LICENSE string. For module loading the
|
|
|
|
* "only/or later" distinction is completely irrelevant and does neither
|
|
|
|
* replace the proper license identifiers in the corresponding source file
|
|
|
|
* nor amends them in any way. The sole purpose is to make the
|
|
|
|
* 'Proprietary' flagging work and to refuse to bind symbols which are
|
|
|
|
* exported with EXPORT_SYMBOL_GPL when a non free module is loaded.
|
|
|
|
*
|
|
|
|
* In the same way "BSD" is not a clear license information. It merely
|
|
|
|
* states, that the module is licensed under one of the compatible BSD
|
|
|
|
* license variants. The detailed and correct license information is again
|
|
|
|
* to be found in the corresponding source files.
|
|
|
|
*
|
2005-04-17 06:20:36 +08:00
|
|
|
* There are dual licensed components, but when running with Linux it is the
|
|
|
|
* GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
|
|
|
|
* is a GPL combined work.
|
|
|
|
*
|
|
|
|
* This exists for several reasons
|
2014-01-16 07:48:49 +08:00
|
|
|
* 1. So modinfo can show license info for users wanting to vet their setup
|
2005-04-17 06:20:36 +08:00
|
|
|
* is free
|
|
|
|
* 2. So the community can ignore bug reports including proprietary modules
|
|
|
|
* 3. So vendors can do likewise based on their own policies
|
|
|
|
*/
|
kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf
Commit bc081dd6e9f6 ("kbuild: generate modules.builtin") added
infrastructure to generate modules.builtin, the list of all
builtin modules.
Basically, it works like this:
- Kconfig generates include/config/tristate.conf, the list of
tristate CONFIG options with a value in a capital letter.
- scripts/Makefile.modbuiltin makes Kbuild descend into
directories to collect the information of builtin modules.
I am not a big fan of it because Kbuild ends up with traversing
the source tree twice.
I am not sure how perfectly it should work, but this approach cannot
avoid false positives; even if the relevant CONFIG option is tristate,
some Makefiles forces obj-m to obj-y.
Some examples are:
arch/powerpc/platforms/powermac/Makefile:
obj-$(CONFIG_NVRAM:m=y) += nvram.o
net/ipv6/Makefile:
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
net/netlabel/Makefile:
obj-$(subst m,y,$(CONFIG_IPV6)) += netlabel_calipso.o
Nobody has complained about (or noticed) it, so it is probably fine to
have false positives in modules.builtin.
This commit simplifies the implementation. Let's exploit the fact
that every module has MODULE_LICENSE(). (modpost shows a warning if
MODULE_LICENSE is missing. If so, 0-day bot would already have blocked
such a module.)
I added MODULE_FILE to <linux/module.h>. When the code is being compiled
as builtin, it will be filled with the file path of the module, and
collected into modules.builtin.info. Then, scripts/link-vmlinux.sh
extracts the list of builtin modules out of it.
This new approach fixes the false-positives above, but adds another
type of false-positives; non-modular code may have MODULE_LICENSE()
by mistake. This is not a big deal, it is just the code is always
orphan. We can clean it up if we like. You can see cleanup examples by:
$ git log --grep='make.* explicitly non-modular'
To sum up, this commits deletes lots of code, but still produces almost
equivalent results. Please note it does not increase the vmlinux size at
all. As you can see in include/asm-generic/vmlinux.lds.h, the .modinfo
section is discarded in the link stage.
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
2019-12-19 16:33:29 +08:00
|
|
|
#define MODULE_LICENSE(_license) MODULE_FILE MODULE_INFO(license, _license)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-09-25 14:32:58 +08:00
|
|
|
/*
|
|
|
|
* Author(s), use "Name <email>" or just "Name", for multiple
|
|
|
|
* authors use multiple MODULE_AUTHOR() statements/lines.
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
|
2014-01-16 07:48:49 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* What your module does. */
|
|
|
|
#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
|
|
|
|
|
2014-02-03 08:45:13 +08:00
|
|
|
#ifdef MODULE
|
|
|
|
/* Creates an alias so file2alias.c can find device table. */
|
|
|
|
#define MODULE_DEVICE_TABLE(type, name) \
|
2017-07-25 09:27:25 +08:00
|
|
|
extern typeof(name) __mod_##type##__##name##_device_table \
|
2014-02-03 08:45:13 +08:00
|
|
|
__attribute__ ((unused, alias(__stringify(name))))
|
|
|
|
#else /* !MODULE */
|
|
|
|
#define MODULE_DEVICE_TABLE(type, name)
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Version of form [<epoch>:]<version>[-<extra-version>].
|
2014-01-16 07:48:49 +08:00
|
|
|
* Or for CVS/RCS ID version, everything but the number is stripped.
|
|
|
|
* <epoch>: A (small) unsigned integer which allows you to start versions
|
|
|
|
* anew. If not mentioned, it's zero. eg. "2:1.0" is after
|
|
|
|
* "1:2.0".
|
|
|
|
|
|
|
|
* <version>: The <version> may contain only alphanumerics and the
|
|
|
|
* character `.'. Ordered by numeric sort for numeric parts,
|
|
|
|
* ascii sort for ascii parts (as per RPM or DEB algorithm).
|
|
|
|
|
|
|
|
* <extraversion>: Like <version>, but inserted for local
|
|
|
|
* customizations, eg "rh3" or "rusty1".
|
|
|
|
|
|
|
|
* Using this automatically adds a checksum of the .c files and the
|
|
|
|
* local headers in "srcversion".
|
|
|
|
*/
|
2010-12-16 06:00:19 +08:00
|
|
|
|
2011-01-25 04:32:51 +08:00
|
|
|
#if defined(MODULE) || !defined(CONFIG_SYSFS)
|
2005-04-17 06:20:36 +08:00
|
|
|
#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
|
2010-12-16 06:00:19 +08:00
|
|
|
#else
|
|
|
|
#define MODULE_VERSION(_version) \
|
2019-04-30 00:11:14 +08:00
|
|
|
MODULE_INFO(version, _version); \
|
2011-02-08 08:02:25 +08:00
|
|
|
static struct module_version_attribute ___modver_attr = { \
|
2010-12-16 06:00:19 +08:00
|
|
|
.mattr = { \
|
|
|
|
.attr = { \
|
|
|
|
.name = "version", \
|
|
|
|
.mode = S_IRUGO, \
|
|
|
|
}, \
|
|
|
|
.show = __modver_version_show, \
|
|
|
|
}, \
|
|
|
|
.module_name = KBUILD_MODNAME, \
|
|
|
|
.version = _version, \
|
2011-02-08 08:02:25 +08:00
|
|
|
}; \
|
|
|
|
static const struct module_version_attribute \
|
|
|
|
__used __attribute__ ((__section__ ("__modver"))) \
|
|
|
|
* __moduleparam_const __modver_attr = &___modver_attr
|
2010-12-16 06:00:19 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-08-29 06:08:21 +08:00
|
|
|
/* Optional firmware file (or files) needed by the module
|
|
|
|
* format is simply firmware file name. Multiple firmware
|
|
|
|
* files require multiple MODULE_FIRMWARE() specifiers */
|
|
|
|
#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
|
|
|
|
|
2019-09-23 02:15:14 +08:00
|
|
|
#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct notifier_block;
|
|
|
|
|
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
|
2010-03-11 07:24:06 +08:00
|
|
|
extern int modules_disabled; /* for sysctl */
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Get/put a kernel symbol (calls must be symmetric) */
|
|
|
|
void *__symbol_get(const char *symbol);
|
|
|
|
void *__symbol_get_gpl(const char *symbol);
|
2018-06-23 23:37:44 +08:00
|
|
|
#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-06-06 01:17:35 +08:00
|
|
|
/* modules using other modules: kdb wants to see this. */
|
|
|
|
struct module_use {
|
|
|
|
struct list_head source_list;
|
|
|
|
struct list_head target_list;
|
|
|
|
struct module *source, *target;
|
|
|
|
};
|
|
|
|
|
2013-01-12 09:08:44 +08:00
|
|
|
enum module_state {
|
|
|
|
MODULE_STATE_LIVE, /* Normal state. */
|
|
|
|
MODULE_STATE_COMING, /* Full formed, running module_init. */
|
|
|
|
MODULE_STATE_GOING, /* Going away. */
|
|
|
|
MODULE_STATE_UNFORMED, /* Still setting it up. */
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2015-05-27 09:39:37 +08:00
|
|
|
struct mod_tree_node {
|
|
|
|
struct module *mod;
|
|
|
|
struct latch_tree_node node;
|
|
|
|
};
|
|
|
|
|
2015-11-26 07:14:08 +08:00
|
|
|
struct module_layout {
|
|
|
|
/* The actual code + data. */
|
|
|
|
void *base;
|
|
|
|
/* Total size. */
|
|
|
|
unsigned int size;
|
|
|
|
/* The size of the executable code. */
|
|
|
|
unsigned int text_size;
|
|
|
|
/* Size of RO section of the module (text+rodata) */
|
|
|
|
unsigned int ro_size;
|
2016-07-27 10:36:21 +08:00
|
|
|
/* Size of RO after init section */
|
|
|
|
unsigned int ro_after_init_size;
|
2015-11-26 07:14:08 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_MODULES_TREE_LOOKUP
|
|
|
|
struct mod_tree_node mtn;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef CONFIG_MODULES_TREE_LOOKUP
|
|
|
|
/* Only touch one cacheline for common rbtree-for-core-layout case. */
|
|
|
|
#define __module_layout_align ____cacheline_aligned
|
|
|
|
#else
|
|
|
|
#define __module_layout_align
|
|
|
|
#endif
|
|
|
|
|
modules: fix longstanding /proc/kallsyms vs module insertion race.
For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section. There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.
After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions. We do this under the module_mutex.
However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path. It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.
By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe). We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).
Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2016-02-03 14:25:26 +08:00
|
|
|
struct mod_kallsyms {
|
|
|
|
Elf_Sym *symtab;
|
|
|
|
unsigned int num_symtab;
|
|
|
|
char *strtab;
|
2019-02-26 03:59:58 +08:00
|
|
|
char *typetab;
|
modules: fix longstanding /proc/kallsyms vs module insertion race.
For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section. There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.
After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions. We do this under the module_mutex.
However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path. It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.
By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe). We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).
Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2016-02-03 14:25:26 +08:00
|
|
|
};
|
|
|
|
|
2016-03-23 08:03:16 +08:00
|
|
|
#ifdef CONFIG_LIVEPATCH
|
|
|
|
struct klp_modinfo {
|
|
|
|
Elf_Ehdr hdr;
|
|
|
|
Elf_Shdr *sechdrs;
|
|
|
|
char *secstrings;
|
|
|
|
unsigned int symndx;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2014-01-16 07:48:49 +08:00
|
|
|
struct module {
|
2005-04-17 06:20:36 +08:00
|
|
|
enum module_state state;
|
|
|
|
|
|
|
|
/* Member of list of modules */
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
/* Unique handle for this module */
|
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
|
|
|
|
/* Sysfs stuff. */
|
|
|
|
struct module_kobject mkobj;
|
2006-02-17 05:50:23 +08:00
|
|
|
struct module_attribute *modinfo_attrs;
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 13:05:15 +08:00
|
|
|
const char *version;
|
|
|
|
const char *srcversion;
|
2007-01-18 20:26:15 +08:00
|
|
|
struct kobject *holders_dir;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Exported symbols */
|
|
|
|
const struct kernel_symbol *syms;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *crcs;
|
2008-07-23 08:24:26 +08:00
|
|
|
unsigned int num_syms;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-04-01 03:05:29 +08:00
|
|
|
/* Kernel parameters. */
|
2015-06-26 05:14:38 +08:00
|
|
|
#ifdef CONFIG_SYSFS
|
module: add per-module param_lock
Add a "param_lock" mutex to each module, and update params.c to use
the correct built-in or module mutex while locking kernel params.
Remove the kparam_block_sysfs_r/w() macros, replace them with direct
calls to kernel_param_[un]lock(module).
The kernel param code currently uses a single mutex to protect
modification of any and all kernel params. While this generally works,
there is one specific problem with it; a module callback function
cannot safely load another module, i.e. with request_module() or even
with indirect calls such as crypto_has_alg(). If the module to be
loaded has any of its params configured (e.g. with a /etc/modprobe.d/*
config file), then the attempt will result in a deadlock between the
first module param callback waiting for modprobe, and modprobe trying to
lock the single kernel param mutex to set the new module's param.
This fixes that by using per-module mutexes, so that each individual module
is protected against concurrent changes in its own kernel params, but is
not blocked by changes to other module params. All built-in modules
continue to use the built-in mutex, since they will always be loaded at
runtime and references (e.g. request_module(), crypto_has_alg()) to them
will never cause load-time param changing.
This also simplifies the interface used by modules to block sysfs access
to their params; while there are currently functions to block and unblock
sysfs param access which are split up by read and write and expect a single
kernel param to be passed, their actual operation is identical and applies
to all params, not just the one passed to them; they simply lock and unlock
the global param mutex. They are replaced with direct calls to
kernel_param_[un]lock(THIS_MODULE), which locks THIS_MODULE's param_lock, or
if the module is built-in, it locks the built-in mutex.
Suggested-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2015-06-17 04:48:52 +08:00
|
|
|
struct mutex param_lock;
|
2015-06-26 05:14:38 +08:00
|
|
|
#endif
|
2009-04-01 03:05:29 +08:00
|
|
|
struct kernel_param *kp;
|
|
|
|
unsigned int num_kp;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* GPL-only exported symbols. */
|
|
|
|
unsigned int num_gpl_syms;
|
2008-07-23 08:24:26 +08:00
|
|
|
const struct kernel_symbol *gpl_syms;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *gpl_crcs;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-07-23 08:24:26 +08:00
|
|
|
#ifdef CONFIG_UNUSED_SYMBOLS
|
2006-06-28 19:26:45 +08:00
|
|
|
/* unused exported symbols. */
|
|
|
|
const struct kernel_symbol *unused_syms;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *unused_crcs;
|
2008-07-23 08:24:26 +08:00
|
|
|
unsigned int num_unused_syms;
|
|
|
|
|
2006-06-28 19:26:45 +08:00
|
|
|
/* GPL-only, unused exported symbols. */
|
|
|
|
unsigned int num_unused_gpl_syms;
|
2008-07-23 08:24:26 +08:00
|
|
|
const struct kernel_symbol *unused_gpl_syms;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *unused_gpl_crcs;
|
2008-07-23 08:24:26 +08:00
|
|
|
#endif
|
2006-06-28 19:26:45 +08:00
|
|
|
|
2012-09-26 17:09:40 +08:00
|
|
|
#ifdef CONFIG_MODULE_SIG
|
|
|
|
/* Signature was verified. */
|
|
|
|
bool sig_ok;
|
|
|
|
#endif
|
|
|
|
|
2015-03-31 07:20:05 +08:00
|
|
|
bool async_probe_requested;
|
|
|
|
|
2006-03-21 05:17:13 +08:00
|
|
|
/* symbols that will be GPL-only in the near future. */
|
|
|
|
const struct kernel_symbol *gpl_future_syms;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *gpl_future_crcs;
|
2008-07-23 08:24:26 +08:00
|
|
|
unsigned int num_gpl_future_syms;
|
2006-03-21 05:17:13 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Exception table */
|
|
|
|
unsigned int num_exentries;
|
2008-10-22 23:00:13 +08:00
|
|
|
struct exception_table_entry *extable;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Startup function. */
|
|
|
|
int (*init)(void);
|
|
|
|
|
2015-11-26 07:14:08 +08:00
|
|
|
/* Core layout: rbtree is accessed frequently, so keep together. */
|
|
|
|
struct module_layout core_layout __module_layout_align;
|
|
|
|
struct module_layout init_layout;
|
2010-11-17 05:35:16 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Arch-specific module values */
|
|
|
|
struct mod_arch_specific arch;
|
|
|
|
|
2016-09-21 19:47:22 +08:00
|
|
|
unsigned long taints; /* same bits as kernel:taint_flags */
|
2006-10-02 17:17:02 +08:00
|
|
|
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 18:36:19 +08:00
|
|
|
#ifdef CONFIG_GENERIC_BUG
|
|
|
|
/* Support for BUG */
|
2008-07-23 08:24:26 +08:00
|
|
|
unsigned num_bugs;
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 18:36:19 +08:00
|
|
|
struct list_head bug_list;
|
|
|
|
struct bug_entry *bug_table;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
modules: fix longstanding /proc/kallsyms vs module insertion race.
For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section. There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.
After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions. We do this under the module_mutex.
However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path. It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.
By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe). We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).
Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2016-02-03 14:25:26 +08:00
|
|
|
/* Protected by RCU and/or module_mutex: use rcu_dereference() */
|
2020-01-23 01:04:47 +08:00
|
|
|
struct mod_kallsyms __rcu *kallsyms;
|
modules: fix longstanding /proc/kallsyms vs module insertion race.
For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section. There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.
After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions. We do this under the module_mutex.
However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path. It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.
By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe). We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).
Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2016-02-03 14:25:26 +08:00
|
|
|
struct mod_kallsyms core_kallsyms;
|
2016-10-20 07:12:18 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Section attributes */
|
|
|
|
struct module_sect_attrs *sect_attrs;
|
2007-10-17 14:26:40 +08:00
|
|
|
|
|
|
|
/* Notes attributes */
|
|
|
|
struct module_notes_attrs *notes_attrs;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2011-05-20 06:55:25 +08:00
|
|
|
/* The command line arguments (may be mangled). People like
|
|
|
|
keeping pointers to this stuff */
|
|
|
|
char *args;
|
|
|
|
|
2010-03-10 17:56:10 +08:00
|
|
|
#ifdef CONFIG_SMP
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Per-cpu data. */
|
2010-03-10 17:56:10 +08:00
|
|
|
void __percpu *percpu;
|
|
|
|
unsigned int percpu_size;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-19 00:16:16 +08:00
|
|
|
#ifdef CONFIG_TRACEPOINTS
|
|
|
|
unsigned int num_tracepoints;
|
tracepoint: Fix tracepoint array element size mismatch
commit 46e0c9be206f ("kernel: tracepoints: add support for relative
references") changes the layout of the __tracepoint_ptrs section on
architectures supporting relative references. However, it does so
without turning struct tracepoint * const into const int elsewhere in
the tracepoint code, which has the following side-effect:
Setting mod->num_tracepoints is done in by module.c:
mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
sizeof(*mod->tracepoints_ptrs),
&mod->num_tracepoints);
Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size
(rather than sizeof(int)), num_tracepoints is erroneously set to half the
size it should be on 64-bit arch. So a module with an odd number of
tracepoints misses the last tracepoint due to effect of integer
division.
So in the module going notifier:
for_each_tracepoint_range(mod->tracepoints_ptrs,
mod->tracepoints_ptrs + mod->num_tracepoints,
tp_module_going_check_quiescent, NULL);
the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually
evaluates to something within the bounds of the array, but miss the
last tracepoint if the number of tracepoints is odd on 64-bit arch.
Fix this by introducing a new typedef: tracepoint_ptr_t, which
is either "const int" on architectures that have PREL32 relocations,
or "struct tracepoint * const" on architectures that does not have
this feature.
Also provide a new tracepoint_ptr_defer() static inline to
encapsulate deferencing this type rather than duplicate code and
ugly idefs within the for_each_tracepoint_range() implementation.
This issue appears in 4.19-rc kernels, and should ideally be fixed
before the end of the rc cycle.
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com
Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-10-14 03:10:50 +08:00
|
|
|
tracepoint_ptr_t *tracepoints_ptrs;
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-19 00:16:16 +08:00
|
|
|
#endif
|
2019-04-06 07:15:00 +08:00
|
|
|
#ifdef CONFIG_TREE_SRCU
|
|
|
|
unsigned int num_srcu_structs;
|
|
|
|
struct srcu_struct **srcu_struct_ptrs;
|
|
|
|
#endif
|
2018-12-13 08:42:37 +08:00
|
|
|
#ifdef CONFIG_BPF_EVENTS
|
|
|
|
unsigned int num_bpf_raw_events;
|
|
|
|
struct bpf_raw_event_map *bpf_raw_events;
|
|
|
|
#endif
|
2018-12-30 23:14:15 +08:00
|
|
|
#ifdef CONFIG_JUMP_LABEL
|
2010-09-17 23:09:00 +08:00
|
|
|
struct jump_entry *jump_entries;
|
|
|
|
unsigned int num_jump_entries;
|
|
|
|
#endif
|
2009-03-07 00:21:49 +08:00
|
|
|
#ifdef CONFIG_TRACING
|
2009-03-07 00:21:48 +08:00
|
|
|
unsigned int num_trace_bprintk_fmt;
|
2011-05-20 06:55:25 +08:00
|
|
|
const char **trace_bprintk_fmt_start;
|
2009-03-07 00:21:48 +08:00
|
|
|
#endif
|
2009-04-11 02:53:50 +08:00
|
|
|
#ifdef CONFIG_EVENT_TRACING
|
2015-05-05 23:45:27 +08:00
|
|
|
struct trace_event_call **trace_events;
|
2009-04-11 02:53:50 +08:00
|
|
|
unsigned int num_trace_events;
|
2017-06-01 05:56:44 +08:00
|
|
|
struct trace_eval_map **trace_evals;
|
|
|
|
unsigned int num_trace_evals;
|
2009-04-11 02:53:50 +08:00
|
|
|
#endif
|
2009-04-16 01:24:06 +08:00
|
|
|
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
|
|
|
unsigned int num_ftrace_callsites;
|
2011-05-20 06:55:25 +08:00
|
|
|
unsigned long *ftrace_callsites;
|
2009-04-16 01:24:06 +08:00
|
|
|
#endif
|
2009-03-07 00:21:48 +08:00
|
|
|
|
livepatch: Fix subtle race with coming and going modules
There is a notifier that handles live patches for coming and going modules.
It takes klp_mutex lock to avoid races with coming and going patches but
it does not keep the lock all the time. Therefore the following races are
possible:
1. The notifier is called sometime in STATE_MODULE_COMING. The module
is visible by find_module() in this state all the time. It means that
new patch can be registered and enabled even before the notifier is
called. It might create wrong order of stacked patches, see below
for an example.
2. New patch could still see the module in the GOING state even after
the notifier has been called. It will try to initialize the related
object structures but the module could disappear at any time. There
will stay mess in the structures. It might even cause an invalid
memory access.
This patch solves the problem by adding a boolean variable into struct module.
The value is true after the coming and before the going handler is called.
New patches need to be applied when the value is true and they need to ignore
the module when the value is false.
Note that we need to know state of all modules on the system. The races are
related to new patches. Therefore we do not know what modules will get
patched.
Also note that we could not simply ignore going modules. The code from the
module could be called even in the GOING state until mod->exit() finishes.
If we start supporting patches with semantic changes between function
calls, we need to apply new patches to any still usable code.
See below for an example.
Finally note that the patch solves only the situation when a new patch is
registered. There are no such problems when the patch is being removed.
It does not matter who disable the patch first, whether the normal
disable_patch() or the module notifier. There is nothing to do
once the patch is disabled.
Alternative solutions:
======================
+ reject new patches when a patched module is coming or going; this is ugly
+ wait with adding new patch until the module leaves the COMING and GOING
states; this might be dangerous and complicated; we would need to release
kgr_lock in the middle of the patch registration to avoid a deadlock
with the coming and going handlers; also we might need a waitqueue for
each module which seems to be even bigger overhead than the boolean
+ stop modules from entering COMING and GOING states; wait until modules
leave these states when they are already there; looks complicated; we would
need to ignore the module that asked to stop the others to avoid a deadlock;
also it is unclear what to do when two modules asked to stop others and
both are in COMING state (situation when two new patches are applied)
+ always register/enable new patches and fix up the potential mess (registered
patches order) in klp_module_init(); this is nasty and prone to regressions
in the future development
+ add another MODULE_STATE where the kallsyms are visible but the module is not
used yet; this looks too complex; the module states are checked on "many"
locations
Example of patch stacking breakage:
===================================
The notifier could _not_ _simply_ ignore already initialized module objects.
For example, let's have three patches (P1, P2, P3) for functions a() and b()
where a() is from vmcore and b() is from a module M. Something like:
a() b()
P1 a1() b1()
P2 a2() b2()
P3 a3() b3(3)
If you load the module M after all patches are registered and enabled.
The ftrace ops for function a() and b() has listed the functions in this
order:
ops_a->func_stack -> list(a3,a2,a1)
ops_b->func_stack -> list(b3,b2,b1)
, so the pointer to b3() is the first and will be used.
Then you might have the following scenario. Let's start with state when patches
P1 and P2 are registered and enabled but the module M is not loaded. Then ftrace
ops for b() does not exist. Then we get into the following race:
CPU0 CPU1
load_module(M)
complete_formation()
mod->state = MODULE_STATE_COMING;
mutex_unlock(&module_mutex);
klp_register_patch(P3);
klp_enable_patch(P3);
# STATE 1
klp_module_notify(M)
klp_module_notify_coming(P1);
klp_module_notify_coming(P2);
klp_module_notify_coming(P3);
# STATE 2
The ftrace ops for a() and b() then looks:
STATE1:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b3);
STATE2:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b2,b1,b3);
therefore, b2() is used for the module but a3() is used for vmcore
because they were the last added.
Example of the race with going modules:
=======================================
CPU0 CPU1
delete_module() #SYSCALL
try_stop_module()
mod->state = MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
klp_register_patch()
klp_enable_patch()
#save place to switch universe
b() # from module that is going
a() # from core (patched)
mod->exit();
Note that the function b() can be called until we call mod->exit().
If we do not apply patch against b() because it is in MODULE_STATE_GOING,
it will call patched a() with modified semantic and things might get wrong.
[jpoimboe@redhat.com: use one boolean instead of two]
Signed-off-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2015-03-12 19:55:13 +08:00
|
|
|
#ifdef CONFIG_LIVEPATCH
|
2016-03-23 08:03:16 +08:00
|
|
|
bool klp; /* Is this a livepatch module? */
|
livepatch: Fix subtle race with coming and going modules
There is a notifier that handles live patches for coming and going modules.
It takes klp_mutex lock to avoid races with coming and going patches but
it does not keep the lock all the time. Therefore the following races are
possible:
1. The notifier is called sometime in STATE_MODULE_COMING. The module
is visible by find_module() in this state all the time. It means that
new patch can be registered and enabled even before the notifier is
called. It might create wrong order of stacked patches, see below
for an example.
2. New patch could still see the module in the GOING state even after
the notifier has been called. It will try to initialize the related
object structures but the module could disappear at any time. There
will stay mess in the structures. It might even cause an invalid
memory access.
This patch solves the problem by adding a boolean variable into struct module.
The value is true after the coming and before the going handler is called.
New patches need to be applied when the value is true and they need to ignore
the module when the value is false.
Note that we need to know state of all modules on the system. The races are
related to new patches. Therefore we do not know what modules will get
patched.
Also note that we could not simply ignore going modules. The code from the
module could be called even in the GOING state until mod->exit() finishes.
If we start supporting patches with semantic changes between function
calls, we need to apply new patches to any still usable code.
See below for an example.
Finally note that the patch solves only the situation when a new patch is
registered. There are no such problems when the patch is being removed.
It does not matter who disable the patch first, whether the normal
disable_patch() or the module notifier. There is nothing to do
once the patch is disabled.
Alternative solutions:
======================
+ reject new patches when a patched module is coming or going; this is ugly
+ wait with adding new patch until the module leaves the COMING and GOING
states; this might be dangerous and complicated; we would need to release
kgr_lock in the middle of the patch registration to avoid a deadlock
with the coming and going handlers; also we might need a waitqueue for
each module which seems to be even bigger overhead than the boolean
+ stop modules from entering COMING and GOING states; wait until modules
leave these states when they are already there; looks complicated; we would
need to ignore the module that asked to stop the others to avoid a deadlock;
also it is unclear what to do when two modules asked to stop others and
both are in COMING state (situation when two new patches are applied)
+ always register/enable new patches and fix up the potential mess (registered
patches order) in klp_module_init(); this is nasty and prone to regressions
in the future development
+ add another MODULE_STATE where the kallsyms are visible but the module is not
used yet; this looks too complex; the module states are checked on "many"
locations
Example of patch stacking breakage:
===================================
The notifier could _not_ _simply_ ignore already initialized module objects.
For example, let's have three patches (P1, P2, P3) for functions a() and b()
where a() is from vmcore and b() is from a module M. Something like:
a() b()
P1 a1() b1()
P2 a2() b2()
P3 a3() b3(3)
If you load the module M after all patches are registered and enabled.
The ftrace ops for function a() and b() has listed the functions in this
order:
ops_a->func_stack -> list(a3,a2,a1)
ops_b->func_stack -> list(b3,b2,b1)
, so the pointer to b3() is the first and will be used.
Then you might have the following scenario. Let's start with state when patches
P1 and P2 are registered and enabled but the module M is not loaded. Then ftrace
ops for b() does not exist. Then we get into the following race:
CPU0 CPU1
load_module(M)
complete_formation()
mod->state = MODULE_STATE_COMING;
mutex_unlock(&module_mutex);
klp_register_patch(P3);
klp_enable_patch(P3);
# STATE 1
klp_module_notify(M)
klp_module_notify_coming(P1);
klp_module_notify_coming(P2);
klp_module_notify_coming(P3);
# STATE 2
The ftrace ops for a() and b() then looks:
STATE1:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b3);
STATE2:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b2,b1,b3);
therefore, b2() is used for the module but a3() is used for vmcore
because they were the last added.
Example of the race with going modules:
=======================================
CPU0 CPU1
delete_module() #SYSCALL
try_stop_module()
mod->state = MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
klp_register_patch()
klp_enable_patch()
#save place to switch universe
b() # from module that is going
a() # from core (patched)
mod->exit();
Note that the function b() can be called until we call mod->exit().
If we do not apply patch against b() because it is in MODULE_STATE_GOING,
it will call patched a() with modified semantic and things might get wrong.
[jpoimboe@redhat.com: use one boolean instead of two]
Signed-off-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2015-03-12 19:55:13 +08:00
|
|
|
bool klp_alive;
|
2016-03-23 08:03:16 +08:00
|
|
|
|
|
|
|
/* Elf information */
|
|
|
|
struct klp_modinfo *klp_info;
|
livepatch: Fix subtle race with coming and going modules
There is a notifier that handles live patches for coming and going modules.
It takes klp_mutex lock to avoid races with coming and going patches but
it does not keep the lock all the time. Therefore the following races are
possible:
1. The notifier is called sometime in STATE_MODULE_COMING. The module
is visible by find_module() in this state all the time. It means that
new patch can be registered and enabled even before the notifier is
called. It might create wrong order of stacked patches, see below
for an example.
2. New patch could still see the module in the GOING state even after
the notifier has been called. It will try to initialize the related
object structures but the module could disappear at any time. There
will stay mess in the structures. It might even cause an invalid
memory access.
This patch solves the problem by adding a boolean variable into struct module.
The value is true after the coming and before the going handler is called.
New patches need to be applied when the value is true and they need to ignore
the module when the value is false.
Note that we need to know state of all modules on the system. The races are
related to new patches. Therefore we do not know what modules will get
patched.
Also note that we could not simply ignore going modules. The code from the
module could be called even in the GOING state until mod->exit() finishes.
If we start supporting patches with semantic changes between function
calls, we need to apply new patches to any still usable code.
See below for an example.
Finally note that the patch solves only the situation when a new patch is
registered. There are no such problems when the patch is being removed.
It does not matter who disable the patch first, whether the normal
disable_patch() or the module notifier. There is nothing to do
once the patch is disabled.
Alternative solutions:
======================
+ reject new patches when a patched module is coming or going; this is ugly
+ wait with adding new patch until the module leaves the COMING and GOING
states; this might be dangerous and complicated; we would need to release
kgr_lock in the middle of the patch registration to avoid a deadlock
with the coming and going handlers; also we might need a waitqueue for
each module which seems to be even bigger overhead than the boolean
+ stop modules from entering COMING and GOING states; wait until modules
leave these states when they are already there; looks complicated; we would
need to ignore the module that asked to stop the others to avoid a deadlock;
also it is unclear what to do when two modules asked to stop others and
both are in COMING state (situation when two new patches are applied)
+ always register/enable new patches and fix up the potential mess (registered
patches order) in klp_module_init(); this is nasty and prone to regressions
in the future development
+ add another MODULE_STATE where the kallsyms are visible but the module is not
used yet; this looks too complex; the module states are checked on "many"
locations
Example of patch stacking breakage:
===================================
The notifier could _not_ _simply_ ignore already initialized module objects.
For example, let's have three patches (P1, P2, P3) for functions a() and b()
where a() is from vmcore and b() is from a module M. Something like:
a() b()
P1 a1() b1()
P2 a2() b2()
P3 a3() b3(3)
If you load the module M after all patches are registered and enabled.
The ftrace ops for function a() and b() has listed the functions in this
order:
ops_a->func_stack -> list(a3,a2,a1)
ops_b->func_stack -> list(b3,b2,b1)
, so the pointer to b3() is the first and will be used.
Then you might have the following scenario. Let's start with state when patches
P1 and P2 are registered and enabled but the module M is not loaded. Then ftrace
ops for b() does not exist. Then we get into the following race:
CPU0 CPU1
load_module(M)
complete_formation()
mod->state = MODULE_STATE_COMING;
mutex_unlock(&module_mutex);
klp_register_patch(P3);
klp_enable_patch(P3);
# STATE 1
klp_module_notify(M)
klp_module_notify_coming(P1);
klp_module_notify_coming(P2);
klp_module_notify_coming(P3);
# STATE 2
The ftrace ops for a() and b() then looks:
STATE1:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b3);
STATE2:
ops_a->func_stack -> list(a3,a2,a1);
ops_b->func_stack -> list(b2,b1,b3);
therefore, b2() is used for the module but a3() is used for vmcore
because they were the last added.
Example of the race with going modules:
=======================================
CPU0 CPU1
delete_module() #SYSCALL
try_stop_module()
mod->state = MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
klp_register_patch()
klp_enable_patch()
#save place to switch universe
b() # from module that is going
a() # from core (patched)
mod->exit();
Note that the function b() can be called until we call mod->exit().
If we do not apply patch against b() because it is in MODULE_STATE_GOING,
it will call patched a() with modified semantic and things might get wrong.
[jpoimboe@redhat.com: use one boolean instead of two]
Signed-off-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2015-03-12 19:55:13 +08:00
|
|
|
#endif
|
|
|
|
|
2008-07-23 08:24:26 +08:00
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
|
|
|
/* What modules depend on me? */
|
2010-06-01 03:19:37 +08:00
|
|
|
struct list_head source_list;
|
|
|
|
/* What modules do I depend on? */
|
|
|
|
struct list_head target_list;
|
2008-07-23 08:24:26 +08:00
|
|
|
|
|
|
|
/* Destruction function. */
|
|
|
|
void (*exit)(void);
|
|
|
|
|
2014-11-10 06:59:29 +08:00
|
|
|
atomic_t refcnt;
|
2008-07-23 08:24:26 +08:00
|
|
|
#endif
|
2009-06-18 07:28:03 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_CONSTRUCTORS
|
|
|
|
/* Constructor functions. */
|
|
|
|
ctor_fn_t *ctors;
|
|
|
|
unsigned int num_ctors;
|
|
|
|
#endif
|
2017-12-12 00:36:46 +08:00
|
|
|
|
2018-01-13 01:55:03 +08:00
|
|
|
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
|
2018-01-13 01:55:33 +08:00
|
|
|
struct error_injection_entry *ei_funcs;
|
2018-01-13 01:55:03 +08:00
|
|
|
unsigned int num_ei_funcs;
|
2017-12-12 00:36:46 +08:00
|
|
|
#endif
|
2016-10-28 16:22:25 +08:00
|
|
|
} ____cacheline_aligned __randomize_layout;
|
2007-05-09 17:35:15 +08:00
|
|
|
#ifndef MODULE_ARCH_INIT
|
|
|
|
#define MODULE_ARCH_INIT {}
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
ARM: module: Fix function kallsyms on Thumb-2
Thumb-2 functions have the lowest bit set in the symbol value in the
symtab. When kallsyms are generated for the vmlinux, the kallsyms are
generated from the output of nm, and nm clears the lowest bit.
$ arm-linux-gnueabihf-readelf -a vmlinux | grep show_interrupts
95947: 8015dc89 686 FUNC GLOBAL DEFAULT 2 show_interrupts
$ arm-linux-gnueabihf-nm vmlinux | grep show_interrupts
8015dc88 T show_interrupts
$ cat /proc/kallsyms | grep show_interrupts
8015dc88 T show_interrupts
However, for modules, the kallsyms uses the values in the symbol table
without modification, so for functions in modules, the lowest bit is set
in kallsyms.
$ arm-linux-gnueabihf-readelf -a drivers/net/tun.ko | grep tun_get_socket
333: 00002d4d 36 FUNC GLOBAL DEFAULT 1 tun_get_socket
$ arm-linux-gnueabihf-nm drivers/net/tun.ko | grep tun_get_socket
00002d4c T tun_get_socket
$ cat /proc/kallsyms | grep tun_get_socket
7f802d4d t tun_get_socket [tun]
Because of this, the symbol+offset of the crashing instruction shown in
oopses is incorrect when the crash is in a module. For example, given a
tun_get_socket which starts like this,
00002d4c <tun_get_socket>:
2d4c: 6943 ldr r3, [r0, #20]
2d4e: 4a07 ldr r2, [pc, #28]
2d50: 4293 cmp r3, r2
a crash when tun_get_socket is called with NULL results in:
PC is at tun_xdp+0xa3/0xa4 [tun]
pc : [<7f802d4c>]
As can be seen, the "PC is at" line reports the wrong symbol name, and
the symbol+offset will point to the wrong source line if it is passed to
gdb.
To solve this, add a way for archs to fixup the reading of these module
kallsyms values, and use that to clear the lowest bit for function
symbols on Thumb-2.
After the fix:
# cat /proc/kallsyms | grep tun_get_socket
7f802d4c t tun_get_socket [tun]
PC is at tun_get_socket+0x0/0x24 [tun]
pc : [<7f802d4c>]
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2018-12-15 00:05:55 +08:00
|
|
|
#ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE
|
|
|
|
static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym)
|
|
|
|
{
|
|
|
|
return sym->st_value;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-12-06 08:03:59 +08:00
|
|
|
extern struct mutex module_mutex;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* FIXME: It'd be nice to isolate modules during init, too, so they
|
|
|
|
aren't used before they (may) fail. But presently too much code
|
|
|
|
(IDE & SCSI) require entry into the module during init.*/
|
2018-02-07 07:41:31 +08:00
|
|
|
static inline bool module_is_live(struct module *mod)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return mod->state != MODULE_STATE_GOING;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct module *__module_text_address(unsigned long addr);
|
2009-04-01 03:05:31 +08:00
|
|
|
struct module *__module_address(unsigned long addr);
|
|
|
|
bool is_module_address(unsigned long addr);
|
2017-02-27 22:37:36 +08:00
|
|
|
bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
|
2010-03-10 17:57:54 +08:00
|
|
|
bool is_module_percpu_address(unsigned long addr);
|
2009-04-01 03:05:31 +08:00
|
|
|
bool is_module_text_address(unsigned long addr);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-07-27 05:55:01 +08:00
|
|
|
static inline bool within_module_core(unsigned long addr,
|
|
|
|
const struct module *mod)
|
2009-01-07 06:41:49 +08:00
|
|
|
{
|
2015-11-26 07:14:08 +08:00
|
|
|
return (unsigned long)mod->core_layout.base <= addr &&
|
|
|
|
addr < (unsigned long)mod->core_layout.base + mod->core_layout.size;
|
2009-01-07 06:41:49 +08:00
|
|
|
}
|
|
|
|
|
2014-07-27 05:55:01 +08:00
|
|
|
static inline bool within_module_init(unsigned long addr,
|
|
|
|
const struct module *mod)
|
2009-01-07 06:41:49 +08:00
|
|
|
{
|
2015-11-26 07:14:08 +08:00
|
|
|
return (unsigned long)mod->init_layout.base <= addr &&
|
|
|
|
addr < (unsigned long)mod->init_layout.base + mod->init_layout.size;
|
2009-01-07 06:41:49 +08:00
|
|
|
}
|
|
|
|
|
2014-07-27 05:55:01 +08:00
|
|
|
static inline bool within_module(unsigned long addr, const struct module *mod)
|
2014-07-27 05:54:01 +08:00
|
|
|
{
|
|
|
|
return within_module_init(addr, mod) || within_module_core(addr, mod);
|
|
|
|
}
|
|
|
|
|
2008-12-06 08:03:59 +08:00
|
|
|
/* Search for module by name: must hold module_mutex. */
|
|
|
|
struct module *find_module(const char *name);
|
|
|
|
|
|
|
|
struct symsearch {
|
|
|
|
const struct kernel_symbol *start, *stop;
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 *crcs;
|
2008-12-06 08:03:59 +08:00
|
|
|
enum {
|
|
|
|
NOT_GPL_ONLY,
|
|
|
|
GPL_ONLY,
|
|
|
|
WILL_BE_GPL_ONLY,
|
|
|
|
} licence;
|
|
|
|
bool unused;
|
|
|
|
};
|
|
|
|
|
2015-05-27 09:39:35 +08:00
|
|
|
/*
|
|
|
|
* Search for an exported symbol by name.
|
|
|
|
*
|
|
|
|
* Must be called with module_mutex held or preemption disabled.
|
|
|
|
*/
|
2008-12-06 08:03:59 +08:00
|
|
|
const struct kernel_symbol *find_symbol(const char *name,
|
|
|
|
struct module **owner,
|
modversions: treat symbol CRCs as 32 bit quantities
The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.
This has a couple of downsides:
- Given that the CRCs are treated as memory addresses, we waste 4 bytes
for each CRC on 64 bit architectures,
- On architectures that support runtime relocation, a R_<arch>_RELATIVE
relocation entry is emitted for each CRC value, which identifies it
as a quantity that requires fixing up based on the actual runtime
load offset of the kernel. This results in corrupted CRCs unless we
explicitly undo the fixup (and this is currently being handled in the
core module code)
- Such runtime relocation entries take up 24 bytes of __init space
each, resulting in a x8 overhead in [uncompressed] kernel size for
CRCs.
Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset. Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant. Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.
So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff). To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.
Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-03 17:54:06 +08:00
|
|
|
const s32 **crc,
|
2008-12-06 08:03:59 +08:00
|
|
|
bool gplok,
|
|
|
|
bool warn);
|
|
|
|
|
2015-05-27 09:39:35 +08:00
|
|
|
/*
|
|
|
|
* Walk the exported symbol table
|
|
|
|
*
|
|
|
|
* Must be called with module_mutex held or preemption disabled.
|
|
|
|
*/
|
2011-04-20 03:49:58 +08:00
|
|
|
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
|
|
|
|
struct module *owner,
|
|
|
|
void *data), void *data);
|
2008-12-06 08:03:59 +08:00
|
|
|
|
2007-05-08 15:28:39 +08:00
|
|
|
/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
|
2005-04-17 06:20:36 +08:00
|
|
|
symnum out of range. */
|
2007-05-08 15:28:39 +08:00
|
|
|
int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
|
|
|
char *name, char *module_name, int *exported);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Look for this name: can be of form module:name. */
|
|
|
|
unsigned long module_kallsyms_lookup_name(const char *name);
|
|
|
|
|
2008-12-06 08:03:58 +08:00
|
|
|
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
|
|
|
struct module *, unsigned long),
|
|
|
|
void *data);
|
|
|
|
|
2016-04-12 03:32:09 +08:00
|
|
|
extern void __noreturn __module_put_and_exit(struct module *mod,
|
|
|
|
long code);
|
2013-12-04 11:39:38 +08:00
|
|
|
#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
2015-01-22 08:43:14 +08:00
|
|
|
int module_refcount(struct module *mod);
|
2005-04-17 06:20:36 +08:00
|
|
|
void __symbol_put(const char *symbol);
|
2018-06-23 23:37:44 +08:00
|
|
|
#define symbol_put(x) __symbol_put(__stringify(x))
|
2005-04-17 06:20:36 +08:00
|
|
|
void symbol_put_addr(void *addr);
|
|
|
|
|
|
|
|
/* Sometimes we know we already have a refcount, and it's easier not
|
|
|
|
to handle the error case (which only happens with rmmod --wait). */
|
2012-03-26 10:20:52 +08:00
|
|
|
extern void __module_get(struct module *module);
|
2010-01-05 14:34:50 +08:00
|
|
|
|
2012-03-26 10:20:52 +08:00
|
|
|
/* This is the Right Way to get a module: if it fails, it's being removed,
|
|
|
|
* so pretend it's not there. */
|
|
|
|
extern bool try_module_get(struct module *module);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-10-18 13:47:25 +08:00
|
|
|
extern void module_put(struct module *module);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#else /*!CONFIG_MODULE_UNLOAD*/
|
2017-04-19 09:47:22 +08:00
|
|
|
static inline bool try_module_get(struct module *module)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return !module || module_is_live(module);
|
|
|
|
}
|
|
|
|
static inline void module_put(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
2014-01-16 07:48:49 +08:00
|
|
|
#define symbol_put(x) do { } while (0)
|
|
|
|
#define symbol_put_addr(p) do { } while (0)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#endif /* CONFIG_MODULE_UNLOAD */
|
2010-11-25 05:21:10 +08:00
|
|
|
int ref_module(struct module *a, struct module *b);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* This is a #define so the string doesn't get put in every .o file */
|
|
|
|
#define module_name(mod) \
|
|
|
|
({ \
|
|
|
|
struct module *__mod = (mod); \
|
|
|
|
__mod ? __mod->name : "kernel"; \
|
|
|
|
})
|
|
|
|
|
sections: split dereference_function_descriptor()
There are two format specifiers to print out a pointer in symbolic
format: '%pS/%ps' and '%pF/%pf'. On most architectures, the two
mean exactly the same thing, but some architectures (ia64, ppc64,
parisc64) use an indirect pointer for C function pointers, where
the function pointer points to a function descriptor (which in
turn contains the actual pointer to the code). The '%pF/%pf, when
used appropriately, automatically does the appropriate function
descriptor dereference on such architectures.
The "when used appropriately" part is tricky. Basically this is
a subtle ABI detail, specific to some platforms, that made it to
the API level and people can be unaware of it and miss the whole
"we need to dereference the function" business out. [1] proves
that point (note that it fixes only '%pF' and '%pS', there might
be '%pf' and '%ps' cases as well).
It appears that we can handle everything within the affected
arches and make '%pS/%ps' smart enough to retire '%pF/%pf'.
Function descriptors live in .opd elf section and all affected
arches (ia64, ppc64, parisc64) handle it properly for kernel
and modules. So we, technically, can decide if the dereference
is needed by simply looking at the pointer: if it belongs to
.opd section then we need to dereference it.
The kernel and modules have their own .opd sections, obviously,
that's why we need to split dereference_function_descriptor()
and use separate kernel and module dereference arch callbacks.
This patch does the first step, it
a) adds dereference_kernel_function_descriptor() function.
b) adds a weak alias to dereference_module_function_descriptor()
function.
So, for the time being, we will have:
1) dereference_function_descriptor()
A generic function, that simply dereferences the pointer. There is
bunch of places that call it: kgdbts, init/main.c, extable, etc.
2) dereference_kernel_function_descriptor()
A function to call on kernel symbols that does kernel .opd section
address range test.
3) dereference_module_function_descriptor()
A function to call on modules' symbols that does modules' .opd
section address range test.
[1] https://marc.info/?l=linux-kernel&m=150472969730573
Link: http://lkml.kernel.org/r/20171109234830.5067-2-sergey.senozhatsky@gmail.com
To: Fenghua Yu <fenghua.yu@intel.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Paul Mackerras <paulus@samba.org>
To: Michael Ellerman <mpe@ellerman.id.au>
To: James Bottomley <jejb@parisc-linux.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-ia64@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Tested-by: Tony Luck <tony.luck@intel.com> #ia64
Tested-by: Santosh Sivaraj <santosh@fossix.org> #powerpc
Tested-by: Helge Deller <deller@gmx.de> #parisc64
Signed-off-by: Petr Mladek <pmladek@suse.com>
2017-11-10 07:48:25 +08:00
|
|
|
/* Dereference module function descriptor */
|
|
|
|
void *dereference_module_function_descriptor(struct module *mod, void *ptr);
|
|
|
|
|
2008-01-30 06:13:22 +08:00
|
|
|
/* For kallsyms to ask for address resolution. namebuf should be at
|
|
|
|
* least KSYM_NAME_LEN long: a pointer to namebuf is returned if
|
|
|
|
* found, otherwise NULL. */
|
2008-02-08 20:18:43 +08:00
|
|
|
const char *module_address_lookup(unsigned long addr,
|
2008-01-30 06:13:22 +08:00
|
|
|
unsigned long *symbolsize,
|
|
|
|
unsigned long *offset,
|
|
|
|
char **modname,
|
|
|
|
char *namebuf);
|
2007-05-08 15:28:43 +08:00
|
|
|
int lookup_module_symbol_name(unsigned long addr, char *symname);
|
2007-05-08 15:28:47 +08:00
|
|
|
int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-01-16 07:48:49 +08:00
|
|
|
int register_module_notifier(struct notifier_block *nb);
|
|
|
|
int unregister_module_notifier(struct notifier_block *nb);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
extern void print_modules(void);
|
|
|
|
|
2015-05-22 06:49:37 +08:00
|
|
|
static inline bool module_requested_async_probing(struct module *module)
|
|
|
|
{
|
|
|
|
return module && module->async_probe_requested;
|
|
|
|
}
|
|
|
|
|
2016-03-23 08:03:16 +08:00
|
|
|
#ifdef CONFIG_LIVEPATCH
|
|
|
|
static inline bool is_livepatch_module(struct module *mod)
|
|
|
|
{
|
|
|
|
return mod->klp;
|
|
|
|
}
|
|
|
|
#else /* !CONFIG_LIVEPATCH */
|
|
|
|
static inline bool is_livepatch_module(struct module *mod)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_LIVEPATCH */
|
|
|
|
|
2017-10-25 01:37:00 +08:00
|
|
|
bool is_module_sig_enforced(void);
|
2019-01-28 08:03:45 +08:00
|
|
|
void set_module_sig_enforced(void);
|
2017-10-25 01:37:00 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#else /* !CONFIG_MODULES... */
|
|
|
|
|
2009-04-01 03:05:31 +08:00
|
|
|
static inline struct module *__module_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline struct module *__module_text_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-04-01 03:05:31 +08:00
|
|
|
static inline bool is_module_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-03-31 10:33:42 +08:00
|
|
|
static inline bool is_module_percpu_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-02-27 22:37:36 +08:00
|
|
|
static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-04-01 03:05:31 +08:00
|
|
|
static inline bool is_module_text_address(unsigned long addr)
|
2006-07-03 15:24:24 +08:00
|
|
|
{
|
2009-04-01 03:05:31 +08:00
|
|
|
return false;
|
2006-07-03 15:24:24 +08:00
|
|
|
}
|
|
|
|
|
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 07:34:29 +08:00
|
|
|
static inline bool within_module_core(unsigned long addr,
|
|
|
|
const struct module *mod)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-04-16 02:18:33 +08:00
|
|
|
static inline bool within_module_init(unsigned long addr,
|
|
|
|
const struct module *mod)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool within_module(unsigned long addr, const struct module *mod)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Get/put a kernel symbol (calls should be symmetric) */
|
|
|
|
#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
|
2014-01-16 07:48:49 +08:00
|
|
|
#define symbol_put(x) do { } while (0)
|
|
|
|
#define symbol_put_addr(x) do { } while (0)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
static inline void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-19 09:47:22 +08:00
|
|
|
static inline bool try_module_get(struct module *module)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2017-04-19 09:47:22 +08:00
|
|
|
return true;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void module_put(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#define module_name(mod) "kernel"
|
|
|
|
|
|
|
|
/* For kallsyms to ask for address resolution. NULL means not found. */
|
2008-02-08 20:18:43 +08:00
|
|
|
static inline const char *module_address_lookup(unsigned long addr,
|
2008-01-30 06:13:22 +08:00
|
|
|
unsigned long *symbolsize,
|
|
|
|
unsigned long *offset,
|
|
|
|
char **modname,
|
|
|
|
char *namebuf)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-05-08 15:28:43 +08:00
|
|
|
static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
|
|
|
|
{
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
2007-05-08 15:28:47 +08:00
|
|
|
static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
|
|
|
|
{
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
2007-05-08 15:28:39 +08:00
|
|
|
static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
|
|
|
|
char *type, char *name,
|
|
|
|
char *module_name, int *exported)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-05-08 15:28:39 +08:00
|
|
|
return -ERANGE;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long module_kallsyms_lookup_name(const char *name)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-12-06 08:03:58 +08:00
|
|
|
static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
|
|
|
struct module *,
|
|
|
|
unsigned long),
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-16 07:48:49 +08:00
|
|
|
static inline int register_module_notifier(struct notifier_block *nb)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
/* no events will happen anyway, so this can always succeed */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-16 07:48:49 +08:00
|
|
|
static inline int unregister_module_notifier(struct notifier_block *nb)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define module_put_and_exit(code) do_exit(code)
|
|
|
|
|
|
|
|
static inline void print_modules(void)
|
|
|
|
{
|
|
|
|
}
|
2015-05-22 06:49:37 +08:00
|
|
|
|
|
|
|
static inline bool module_requested_async_probing(struct module *module)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-10-25 01:37:00 +08:00
|
|
|
static inline bool is_module_sig_enforced(void)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-01-28 08:03:45 +08:00
|
|
|
static inline void set_module_sig_enforced(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
sections: split dereference_function_descriptor()
There are two format specifiers to print out a pointer in symbolic
format: '%pS/%ps' and '%pF/%pf'. On most architectures, the two
mean exactly the same thing, but some architectures (ia64, ppc64,
parisc64) use an indirect pointer for C function pointers, where
the function pointer points to a function descriptor (which in
turn contains the actual pointer to the code). The '%pF/%pf, when
used appropriately, automatically does the appropriate function
descriptor dereference on such architectures.
The "when used appropriately" part is tricky. Basically this is
a subtle ABI detail, specific to some platforms, that made it to
the API level and people can be unaware of it and miss the whole
"we need to dereference the function" business out. [1] proves
that point (note that it fixes only '%pF' and '%pS', there might
be '%pf' and '%ps' cases as well).
It appears that we can handle everything within the affected
arches and make '%pS/%ps' smart enough to retire '%pF/%pf'.
Function descriptors live in .opd elf section and all affected
arches (ia64, ppc64, parisc64) handle it properly for kernel
and modules. So we, technically, can decide if the dereference
is needed by simply looking at the pointer: if it belongs to
.opd section then we need to dereference it.
The kernel and modules have their own .opd sections, obviously,
that's why we need to split dereference_function_descriptor()
and use separate kernel and module dereference arch callbacks.
This patch does the first step, it
a) adds dereference_kernel_function_descriptor() function.
b) adds a weak alias to dereference_module_function_descriptor()
function.
So, for the time being, we will have:
1) dereference_function_descriptor()
A generic function, that simply dereferences the pointer. There is
bunch of places that call it: kgdbts, init/main.c, extable, etc.
2) dereference_kernel_function_descriptor()
A function to call on kernel symbols that does kernel .opd section
address range test.
3) dereference_module_function_descriptor()
A function to call on modules' symbols that does modules' .opd
section address range test.
[1] https://marc.info/?l=linux-kernel&m=150472969730573
Link: http://lkml.kernel.org/r/20171109234830.5067-2-sergey.senozhatsky@gmail.com
To: Fenghua Yu <fenghua.yu@intel.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Paul Mackerras <paulus@samba.org>
To: Michael Ellerman <mpe@ellerman.id.au>
To: James Bottomley <jejb@parisc-linux.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-ia64@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Tested-by: Tony Luck <tony.luck@intel.com> #ia64
Tested-by: Santosh Sivaraj <santosh@fossix.org> #powerpc
Tested-by: Helge Deller <deller@gmx.de> #parisc64
Signed-off-by: Petr Mladek <pmladek@suse.com>
2017-11-10 07:48:25 +08:00
|
|
|
/* Dereference module function descriptor */
|
|
|
|
static inline
|
|
|
|
void *dereference_module_function_descriptor(struct module *mod, void *ptr)
|
|
|
|
{
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
2007-02-14 07:19:06 +08:00
|
|
|
#endif /* CONFIG_MODULES */
|
|
|
|
|
|
|
|
#ifdef CONFIG_SYSFS
|
2007-11-02 01:39:50 +08:00
|
|
|
extern struct kset *module_kset;
|
|
|
|
extern struct kobj_type module_ktype;
|
|
|
|
extern int module_sysfs_initialized;
|
2007-02-14 07:19:06 +08:00
|
|
|
#endif /* CONFIG_SYSFS */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
|
|
|
|
|
|
|
|
/* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */
|
|
|
|
|
|
|
|
#define __MODULE_STRING(x) __stringify(x)
|
|
|
|
|
2017-02-07 08:31:58 +08:00
|
|
|
#ifdef CONFIG_STRICT_MODULE_RWX
|
2016-07-27 10:36:21 +08:00
|
|
|
extern void module_enable_ro(const struct module *mod, bool after_init);
|
2015-11-26 07:15:08 +08:00
|
|
|
extern void module_disable_ro(const struct module *mod);
|
2010-11-17 05:35:16 +08:00
|
|
|
#else
|
2016-07-27 10:36:21 +08:00
|
|
|
static inline void module_enable_ro(const struct module *mod, bool after_init) { }
|
2015-11-26 07:15:08 +08:00
|
|
|
static inline void module_disable_ro(const struct module *mod) { }
|
2010-11-17 05:35:16 +08:00
|
|
|
#endif
|
2009-06-17 06:33:37 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_GENERIC_BUG
|
2010-10-06 02:29:27 +08:00
|
|
|
void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
|
2009-06-17 06:33:37 +08:00
|
|
|
struct module *);
|
|
|
|
void module_bug_cleanup(struct module *);
|
|
|
|
|
|
|
|
#else /* !CONFIG_GENERIC_BUG */
|
|
|
|
|
2010-10-06 02:29:27 +08:00
|
|
|
static inline void module_bug_finalize(const Elf_Ehdr *hdr,
|
2009-06-17 06:33:37 +08:00
|
|
|
const Elf_Shdr *sechdrs,
|
|
|
|
struct module *mod)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void module_bug_cleanup(struct module *mod) {}
|
|
|
|
#endif /* CONFIG_GENERIC_BUG */
|
|
|
|
|
2018-12-11 00:37:25 +08:00
|
|
|
#ifdef CONFIG_RETPOLINE
|
2018-01-26 07:50:28 +08:00
|
|
|
extern bool retpoline_module_ok(bool has_retpoline);
|
|
|
|
#else
|
|
|
|
static inline bool retpoline_module_ok(bool has_retpoline)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-04-22 11:28:46 +08:00
|
|
|
#ifdef CONFIG_MODULE_SIG
|
|
|
|
static inline bool module_sig_ok(struct module *module)
|
|
|
|
{
|
|
|
|
return module->sig_ok;
|
|
|
|
}
|
|
|
|
#else /* !CONFIG_MODULE_SIG */
|
|
|
|
static inline bool module_sig_ok(struct module *module)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_MODULE_SIG */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* _LINUX_MODULE_H */
|