2018-07-31 19:39:31 +08:00
|
|
|
menu "Kernel hacking"
|
|
|
|
|
2013-07-02 04:04:49 +08:00
|
|
|
menu "printk and dmesg options"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
config PRINTK_TIME
|
|
|
|
bool "Show timing information on printks"
|
2006-12-07 12:36:38 +08:00
|
|
|
depends on PRINTK
|
2005-04-17 06:20:36 +08:00
|
|
|
help
|
2012-05-10 10:30:45 +08:00
|
|
|
Selecting this option causes time stamps of the printk()
|
|
|
|
messages to be added to the output of the syslog() system
|
|
|
|
call and at the console.
|
|
|
|
|
|
|
|
The timestamp is always recorded internally, and exported
|
|
|
|
to /dev/kmsg. This flag just specifies if the timestamp should
|
|
|
|
be included, not that the timestamp is recorded.
|
|
|
|
|
|
|
|
The behavior is also controlled by the kernel command line
|
2016-10-18 20:12:27 +08:00
|
|
|
parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-12-13 08:45:56 +08:00
|
|
|
config CONSOLE_LOGLEVEL_DEFAULT
|
|
|
|
int "Default console loglevel (1-15)"
|
|
|
|
range 1 15
|
|
|
|
default "7"
|
|
|
|
help
|
|
|
|
Default loglevel to determine what will be printed on the console.
|
|
|
|
|
|
|
|
Setting a default here is equivalent to passing in loglevel=<x> in
|
|
|
|
the kernel bootargs. loglevel=<x> continues to override whatever
|
|
|
|
value is specified here as well.
|
|
|
|
|
2016-12-20 08:23:15 +08:00
|
|
|
Note: This does not affect the log level of un-prefixed printk()
|
2016-12-13 08:45:56 +08:00
|
|
|
usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
|
|
|
|
option.
|
|
|
|
|
2018-06-19 19:57:26 +08:00
|
|
|
config CONSOLE_LOGLEVEL_QUIET
|
|
|
|
int "quiet console loglevel (1-15)"
|
|
|
|
range 1 15
|
|
|
|
default "4"
|
|
|
|
help
|
|
|
|
loglevel to use when "quiet" is passed on the kernel commandline.
|
|
|
|
|
|
|
|
When "quiet" is passed on the kernel commandline this loglevel
|
|
|
|
will be used as the loglevel. IOW passing "quiet" will be the
|
|
|
|
equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>"
|
|
|
|
|
2014-08-07 07:09:01 +08:00
|
|
|
config MESSAGE_LOGLEVEL_DEFAULT
|
2011-03-23 07:34:23 +08:00
|
|
|
int "Default message log level (1-7)"
|
|
|
|
range 1 7
|
|
|
|
default "4"
|
|
|
|
help
|
|
|
|
Default log level for printk statements with no specified priority.
|
|
|
|
|
|
|
|
This was hard-coded to KERN_WARNING since at least 2.6.10 but folks
|
|
|
|
that are auditing their logs closely may want to set it to a lower
|
|
|
|
priority.
|
|
|
|
|
2016-12-13 08:45:56 +08:00
|
|
|
Note: This does not affect what message level gets printed on the console
|
|
|
|
by default. To change that, use loglevel=<x> in the kernel bootargs,
|
|
|
|
or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value.
|
|
|
|
|
2013-07-02 04:04:49 +08:00
|
|
|
config BOOT_PRINTK_DELAY
|
|
|
|
bool "Delay each boot printk message by N milliseconds"
|
|
|
|
depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
|
|
|
|
help
|
|
|
|
This build option allows you to read kernel boot messages
|
|
|
|
by inserting a short delay after each one. The delay is
|
|
|
|
specified in milliseconds on the kernel command line,
|
|
|
|
using "boot_delay=N".
|
|
|
|
|
|
|
|
It is likely that you would also need to use "lpj=M" to preset
|
|
|
|
the "loops per jiffie" value.
|
|
|
|
See a previous boot log for the "lpj" value to use for your
|
|
|
|
system, and then set "lpj=M" before setting "boot_delay=N".
|
|
|
|
NOTE: Using this option may adversely affect SMP systems.
|
|
|
|
I.e., processors other than the first one may not boot up.
|
|
|
|
BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
|
|
|
|
what it believes to be lockup conditions.
|
|
|
|
|
|
|
|
config DYNAMIC_DEBUG
|
|
|
|
bool "Enable dynamic printk() support"
|
|
|
|
default n
|
|
|
|
depends on PRINTK
|
|
|
|
depends on DEBUG_FS
|
|
|
|
help
|
|
|
|
|
|
|
|
Compiles debug level messages into the kernel, which would not
|
|
|
|
otherwise be available at runtime. These messages can then be
|
|
|
|
enabled/disabled based on various levels of scope - per source file,
|
|
|
|
function, module, format string, and line number. This mechanism
|
|
|
|
implicitly compiles in all pr_debug() and dev_dbg() calls, which
|
|
|
|
enlarges the kernel text size by about 2%.
|
|
|
|
|
|
|
|
If a source file is compiled with DEBUG flag set, any
|
|
|
|
pr_debug() calls in it are enabled by default, but can be
|
|
|
|
disabled at runtime as below. Note that DEBUG flag is
|
|
|
|
turned on by many CONFIG_*DEBUG* options.
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
|
|
Dynamic debugging is controlled via the 'dynamic_debug/control' file,
|
|
|
|
which is contained in the 'debugfs' filesystem. Thus, the debugfs
|
|
|
|
filesystem must first be mounted before making use of this feature.
|
|
|
|
We refer the control file as: <debugfs>/dynamic_debug/control. This
|
|
|
|
file contains a list of the debug statements that can be enabled. The
|
|
|
|
format for each line of the file is:
|
|
|
|
|
|
|
|
filename:lineno [module]function flags format
|
|
|
|
|
|
|
|
filename : source file of the debug statement
|
|
|
|
lineno : line number of the debug statement
|
|
|
|
module : module that contains the debug statement
|
|
|
|
function : function that contains the debug statement
|
|
|
|
flags : '=p' means the line is turned 'on' for printing
|
|
|
|
format : the format used for the debug statement
|
|
|
|
|
|
|
|
From a live system:
|
|
|
|
|
|
|
|
nullarbor:~ # cat <debugfs>/dynamic_debug/control
|
|
|
|
# filename:lineno [module]function flags format
|
|
|
|
fs/aio.c:222 [aio]__put_ioctx =_ "__put_ioctx:\040freeing\040%p\012"
|
|
|
|
fs/aio.c:248 [aio]ioctx_alloc =_ "ENOMEM:\040nr_events\040too\040high\012"
|
|
|
|
fs/aio.c:1770 [aio]sys_io_cancel =_ "calling\040cancel\012"
|
|
|
|
|
|
|
|
Example usage:
|
|
|
|
|
|
|
|
// enable the message at line 1603 of file svcsock.c
|
|
|
|
nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
|
|
|
|
<debugfs>/dynamic_debug/control
|
|
|
|
|
|
|
|
// enable all the messages in file svcsock.c
|
|
|
|
nullarbor:~ # echo -n 'file svcsock.c +p' >
|
|
|
|
<debugfs>/dynamic_debug/control
|
|
|
|
|
|
|
|
// enable all the messages in the NFS server module
|
|
|
|
nullarbor:~ # echo -n 'module nfsd +p' >
|
|
|
|
<debugfs>/dynamic_debug/control
|
|
|
|
|
|
|
|
// enable all 12 messages in the function svc_process()
|
|
|
|
nullarbor:~ # echo -n 'func svc_process +p' >
|
|
|
|
<debugfs>/dynamic_debug/control
|
|
|
|
|
|
|
|
// disable all 12 messages in the function svc_process()
|
|
|
|
nullarbor:~ # echo -n 'func svc_process -p' >
|
|
|
|
<debugfs>/dynamic_debug/control
|
|
|
|
|
2017-03-16 16:37:32 +08:00
|
|
|
See Documentation/admin-guide/dynamic-debug-howto.rst for additional
|
|
|
|
information.
|
2013-07-02 04:04:49 +08:00
|
|
|
|
|
|
|
endmenu # "printk and dmesg options"
|
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
menu "Compile-time checks and compiler options"
|
|
|
|
|
|
|
|
config DEBUG_INFO
|
|
|
|
bool "Compile the kernel with debug info"
|
2014-02-05 04:20:01 +08:00
|
|
|
depends on DEBUG_KERNEL && !COMPILE_TEST
|
2013-07-02 04:04:46 +08:00
|
|
|
help
|
|
|
|
If you say Y here the resulting kernel image will include
|
|
|
|
debugging info resulting in a larger kernel image.
|
|
|
|
This adds debug symbols to the kernel and modules (gcc -g), and
|
|
|
|
is needed if you intend to use kernel crashdump or binary object
|
|
|
|
tools like crash, kgdb, LKCD, gdb, etc on the kernel.
|
|
|
|
Say Y here only if you plan to debug the kernel.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config DEBUG_INFO_REDUCED
|
|
|
|
bool "Reduce debugging information"
|
|
|
|
depends on DEBUG_INFO
|
|
|
|
help
|
|
|
|
If you say Y here gcc is instructed to generate less debugging
|
|
|
|
information for structure types. This means that tools that
|
|
|
|
need full debugging information (like kgdb or systemtap) won't
|
|
|
|
be happy. But if you merely need debugging information to
|
|
|
|
resolve line numbers there is no loss. Advantage is that
|
|
|
|
build directory object sizes shrink dramatically over a full
|
|
|
|
DEBUG_INFO build and compile times are reduced too.
|
|
|
|
Only works with newer gcc versions.
|
|
|
|
|
2014-07-31 02:50:18 +08:00
|
|
|
config DEBUG_INFO_SPLIT
|
|
|
|
bool "Produce split debuginfo in .dwo files"
|
2018-03-08 06:30:54 +08:00
|
|
|
depends on DEBUG_INFO
|
2014-07-31 02:50:18 +08:00
|
|
|
help
|
|
|
|
Generate debug info into separate .dwo files. This significantly
|
|
|
|
reduces the build directory size for builds with DEBUG_INFO,
|
|
|
|
because it stores the information only once on disk in .dwo
|
|
|
|
files instead of multiple times in object files and executables.
|
|
|
|
In addition the debug information is also compressed.
|
|
|
|
|
|
|
|
Requires recent gcc (4.7+) and recent gdb/binutils.
|
|
|
|
Any tool that packages or reads debug information would need
|
|
|
|
to know about the .dwo files and include them.
|
|
|
|
Incompatible with older versions of ccache.
|
|
|
|
|
2014-07-31 02:50:19 +08:00
|
|
|
config DEBUG_INFO_DWARF4
|
|
|
|
bool "Generate dwarf4 debuginfo"
|
|
|
|
depends on DEBUG_INFO
|
|
|
|
help
|
|
|
|
Generate dwarf4 debug info. This requires recent versions
|
|
|
|
of gcc and gdb. It makes the debug information larger.
|
|
|
|
But it significantly improves the success of resolving
|
|
|
|
variables in gdb on optimized code.
|
|
|
|
|
2015-02-18 05:46:36 +08:00
|
|
|
config GDB_SCRIPTS
|
|
|
|
bool "Provide GDB scripts for kernel debugging"
|
|
|
|
depends on DEBUG_INFO
|
|
|
|
help
|
|
|
|
This creates the required links to GDB helper scripts in the
|
|
|
|
build directory. If you load vmlinux into gdb, the helper
|
|
|
|
scripts will be automatically imported by gdb as well, and
|
|
|
|
additional functions are available to analyze a Linux kernel
|
2016-12-15 07:05:40 +08:00
|
|
|
instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
|
|
|
|
for further details.
|
2015-02-18 05:46:36 +08:00
|
|
|
|
2006-08-15 13:43:18 +08:00
|
|
|
config ENABLE_MUST_CHECK
|
|
|
|
bool "Enable __must_check logic"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Enable the __must_check logic in the kernel build. Disable this to
|
|
|
|
suppress the "warning: ignoring return value of 'foo', declared with
|
|
|
|
attribute warn_unused_result" messages.
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-02-22 22:15:03 +08:00
|
|
|
config FRAME_WARN
|
|
|
|
int "Warn for stack frames larger than (needs gcc 4.4)"
|
|
|
|
range 0 8192
|
kasan: rework Kconfig settings
We get a lot of very large stack frames using gcc-7.0.1 with the default
-fsanitize-address-use-after-scope --param asan-stack=1 options, which can
easily cause an overflow of the kernel stack, e.g.
drivers/gpu/drm/i915/gvt/handlers.c:2434:1: warning: the frame size of 46176 bytes is larger than 3072 bytes
drivers/net/wireless/ralink/rt2x00/rt2800lib.c:5650:1: warning: the frame size of 23632 bytes is larger than 3072 bytes
lib/atomic64_test.c:250:1: warning: the frame size of 11200 bytes is larger than 3072 bytes
drivers/gpu/drm/i915/gvt/handlers.c:2621:1: warning: the frame size of 9208 bytes is larger than 3072 bytes
drivers/media/dvb-frontends/stv090x.c:3431:1: warning: the frame size of 6816 bytes is larger than 3072 bytes
fs/fscache/stats.c:287:1: warning: the frame size of 6536 bytes is larger than 3072 bytes
To reduce this risk, -fsanitize-address-use-after-scope is now split out
into a separate CONFIG_KASAN_EXTRA Kconfig option, leading to stack
frames that are smaller than 2 kilobytes most of the time on x86_64. An
earlier version of this patch also prevented combining KASAN_EXTRA with
KASAN_INLINE, but that is no longer necessary with gcc-7.0.1.
All patches to get the frame size below 2048 bytes with CONFIG_KASAN=y
and CONFIG_KASAN_EXTRA=n have been merged by maintainers now, so we can
bring back that default now. KASAN_EXTRA=y still causes lots of
warnings but now defaults to !COMPILE_TEST to disable it in
allmodconfig, and it remains disabled in all other defconfigs since it
is a new option. I arbitrarily raise the warning limit for KASAN_EXTRA
to 3072 to reduce the noise, but an allmodconfig kernel still has around
50 warnings on gcc-7.
I experimented a bit more with smaller stack frames and have another
follow-up series that reduces the warning limit for 64-bit architectures
to 1280 bytes (without CONFIG_KASAN).
With earlier versions of this patch series, I also had patches to address
the warnings we get with KASAN and/or KASAN_EXTRA, using a
"noinline_if_stackbloat" annotation.
That annotation now got replaced with a gcc-8 bugfix (see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715) and a workaround for
older compilers, which means that KASAN_EXTRA is now just as bad as
before and will lead to an instant stack overflow in a few extreme
cases.
This reverts parts of commit 3f181b4d8652 ("lib/Kconfig.debug: disable
-Wframe-larger-than warnings with KASAN=y"). Two patches in linux-next
should be merged first to avoid introducing warnings in an allmodconfig
build:
3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN")
16c3ada89cff ("media: r820t: fix r820t_write_reg for KASAN")
Do we really need to backport this?
I think we do: without this patch, enabling KASAN will lead to
unavoidable kernel stack overflow in certain device drivers when built
with gcc-7 or higher on linux-4.10+ or any version that contains a
backport of commit c5caf21ab0cf8. Most people are probably still on
older compilers, but it will get worse over time as they upgrade their
distros.
The warnings we get on kernels older than this should all be for code
that uses dangerously large stack frames, though most of them do not
cause an actual stack overflow by themselves.The asan-stack option was
added in linux-4.0, and commit 3f181b4d8652 ("lib/Kconfig.debug:
disable -Wframe-larger-than warnings with KASAN=y") effectively turned
off the warning for allmodconfig kernels, so I would like to see this
fix backported to any kernels later than 4.0.
I have done dozens of fixes for individual functions with stack frames
larger than 2048 bytes with asan-stack, and I plan to make sure that
all those fixes make it into the stable kernels as well (most are
already there).
Part of the complication here is that asan-stack (from 4.0) was
originally assumed to always require much larger stacks, but that
turned out to be a combination of multiple gcc bugs that we have now
worked around and fixed, but sanitize-address-use-after-scope (from
v4.10) has a much higher inherent stack usage and also suffers from at
least three other problems that we have analyzed but not yet fixed
upstream, each of them makes the stack usage more severe than it should
be.
Link: http://lkml.kernel.org/r/20171221134744.2295529-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-02-07 07:41:41 +08:00
|
|
|
default 3072 if KASAN_EXTRA
|
2016-10-28 08:46:41 +08:00
|
|
|
default 2048 if GCC_PLUGIN_LATENT_ENTROPY
|
2017-09-12 03:41:43 +08:00
|
|
|
default 1280 if (!64BIT && PARISC)
|
|
|
|
default 1024 if (!64BIT && !PARISC)
|
2008-02-22 22:15:03 +08:00
|
|
|
default 2048 if 64BIT
|
|
|
|
help
|
|
|
|
Tell gcc to warn at build time for stack frames larger than this.
|
|
|
|
Setting this too low will cause a lot of warnings.
|
|
|
|
Setting it to 0 disables the warning.
|
|
|
|
Requires gcc 4.4
|
|
|
|
|
2009-09-19 03:49:22 +08:00
|
|
|
config STRIP_ASM_SYMS
|
|
|
|
bool "Strip assembler-generated symbols during link"
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
Strip internal assembler-generated symbols during a link (symbols
|
|
|
|
that look like '.Lxxx') so they don't pollute the output of
|
|
|
|
get_wchan() and suchlike.
|
|
|
|
|
2012-03-29 02:51:18 +08:00
|
|
|
config READABLE_ASM
|
|
|
|
bool "Generate readable assembler code"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Disable some compiler optimizations that tend to generate human unreadable
|
|
|
|
assembler output. This may make the kernel slightly slower, but it helps
|
|
|
|
to keep kernel developers who have to stare a lot at assembler listings
|
|
|
|
sane.
|
|
|
|
|
2006-06-28 19:26:45 +08:00
|
|
|
config UNUSED_SYMBOLS
|
|
|
|
bool "Enable unused/obsolete exported symbols"
|
|
|
|
default y if X86
|
|
|
|
help
|
|
|
|
Unused but exported symbols make the kernel needlessly bigger. For
|
|
|
|
that reason most of these unused exports will soon be removed. This
|
|
|
|
option is provided temporarily to provide a transition period in case
|
|
|
|
some external kernel module needs one of these symbols anyway. If you
|
|
|
|
encounter such a case in your module, consider if you are actually
|
|
|
|
using the right API. (rationale: since nobody in the kernel is using
|
|
|
|
this in a module, there is a pretty good chance it's actually the
|
|
|
|
wrong interface to use). If you really need the symbol, please send a
|
|
|
|
mail to the linux kernel mailing list mentioning the symbol and why
|
|
|
|
you really need it, and what the merge plan to the mainline kernel for
|
|
|
|
your module is.
|
|
|
|
|
mm/page_owner: keep track of page owners
This is the page owner tracking code which is introduced so far ago. It
is resident on Andrew's tree, though, nobody tried to upstream so it
remain as is. Our company uses this feature actively to debug memory leak
or to find a memory hogger so I decide to upstream this feature.
This functionality help us to know who allocates the page. When
allocating a page, we store some information about allocation in extra
memory. Later, if we need to know status of all pages, we can get and
analyze it from this stored information.
In previous version of this feature, extra memory is statically defined in
struct page, but, in this version, extra memory is allocated outside of
struct page. It enables us to turn on/off this feature at boottime
without considerable memory waste.
Although we already have tracepoint for tracing page allocation/free,
using it to analyze page owner is rather complex. We need to enlarge the
trace buffer for preventing overlapping until userspace program launched.
And, launched program continually dump out the trace buffer for later
analysis and it would change system behaviour with more possibility rather
than just keeping it in memory, so bad for debug.
Moreover, we can use page_owner feature further for various purposes. For
example, we can use it for fragmentation statistics implemented in this
patch. And, I also plan to implement some CMA failure debugging feature
using this interface.
I'd like to give the credit for all developers contributed this feature,
but, it's not easy because I don't know exact history. Sorry about that.
Below is people who has "Signed-off-by" in the patches in Andrew's tree.
Contributor:
Alexander Nyberg <alexn@dsv.su.se>
Mel Gorman <mgorman@suse.de>
Dave Hansen <dave@linux.vnet.ibm.com>
Minchan Kim <minchan@kernel.org>
Michal Nazarewicz <mina86@mina86.com>
Andrew Morton <akpm@linux-foundation.org>
Jungsoo Son <jungsoo.son@lge.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-12-13 08:56:01 +08:00
|
|
|
config PAGE_OWNER
|
|
|
|
bool "Track page owner"
|
|
|
|
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
|
|
|
|
select DEBUG_FS
|
|
|
|
select STACKTRACE
|
mm/page_owner: use stackdepot to store stacktrace
Currently, we store each page's allocation stacktrace on corresponding
page_ext structure and it requires a lot of memory. This causes the
problem that memory tight system doesn't work well if page_owner is
enabled. Moreover, even with this large memory consumption, we cannot
get full stacktrace because we allocate memory at boot time and just
maintain 8 stacktrace slots to balance memory consumption. We could
increase it to more but it would make system unusable or change system
behaviour.
To solve the problem, this patch uses stackdepot to store stacktrace.
It obviously provides memory saving but there is a drawback that
stackdepot could fail.
stackdepot allocates memory at runtime so it could fail if system has
not enough memory. But, most of allocation stack are generated at very
early time and there are much memory at this time. So, failure would
not happen easily. And, one failure means that we miss just one page's
allocation stacktrace so it would not be a big problem. In this patch,
when memory allocation failure happens, we store special stracktrace
handle to the page that is failed to save stacktrace. With it, user can
guess memory usage properly even if failure happens.
Memory saving looks as following. (4GB memory system with page_owner)
(before the patch -> after the patch)
static allocation:
92274688 bytes -> 25165824 bytes
dynamic allocation after boot + kernel build:
0 bytes -> 327680 bytes
total:
92274688 bytes -> 25493504 bytes
72% reduction in total.
Note that implementation looks complex than someone would imagine
because there is recursion issue. stackdepot uses page allocator and
page_owner is called at page allocation. Using stackdepot in page_owner
could re-call page allcator and then page_owner. That is a recursion.
To detect and avoid it, whenever we obtain stacktrace, recursion is
checked and page_owner is set to dummy information if found. Dummy
information means that this page is allocated for page_owner feature
itself (such as stackdepot) and it's understandable behavior for user.
[iamjoonsoo.kim@lge.com: mm-page_owner-use-stackdepot-to-store-stacktrace-v3]
Link: http://lkml.kernel.org/r/1464230275-25791-6-git-send-email-iamjoonsoo.kim@lge.com
Link: http://lkml.kernel.org/r/1466150259-27727-7-git-send-email-iamjoonsoo.kim@lge.com
Link: http://lkml.kernel.org/r/1464230275-25791-6-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-07-27 06:23:55 +08:00
|
|
|
select STACKDEPOT
|
mm/page_owner: keep track of page owners
This is the page owner tracking code which is introduced so far ago. It
is resident on Andrew's tree, though, nobody tried to upstream so it
remain as is. Our company uses this feature actively to debug memory leak
or to find a memory hogger so I decide to upstream this feature.
This functionality help us to know who allocates the page. When
allocating a page, we store some information about allocation in extra
memory. Later, if we need to know status of all pages, we can get and
analyze it from this stored information.
In previous version of this feature, extra memory is statically defined in
struct page, but, in this version, extra memory is allocated outside of
struct page. It enables us to turn on/off this feature at boottime
without considerable memory waste.
Although we already have tracepoint for tracing page allocation/free,
using it to analyze page owner is rather complex. We need to enlarge the
trace buffer for preventing overlapping until userspace program launched.
And, launched program continually dump out the trace buffer for later
analysis and it would change system behaviour with more possibility rather
than just keeping it in memory, so bad for debug.
Moreover, we can use page_owner feature further for various purposes. For
example, we can use it for fragmentation statistics implemented in this
patch. And, I also plan to implement some CMA failure debugging feature
using this interface.
I'd like to give the credit for all developers contributed this feature,
but, it's not easy because I don't know exact history. Sorry about that.
Below is people who has "Signed-off-by" in the patches in Andrew's tree.
Contributor:
Alexander Nyberg <alexn@dsv.su.se>
Mel Gorman <mgorman@suse.de>
Dave Hansen <dave@linux.vnet.ibm.com>
Minchan Kim <minchan@kernel.org>
Michal Nazarewicz <mina86@mina86.com>
Andrew Morton <akpm@linux-foundation.org>
Jungsoo Son <jungsoo.son@lge.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-12-13 08:56:01 +08:00
|
|
|
select PAGE_EXTENSION
|
|
|
|
help
|
|
|
|
This keeps track of what call chain is the owner of a page, may
|
|
|
|
help to find bare alloc_page(s) leaks. Even if you include this
|
|
|
|
feature on your build, it is disabled in default. You should pass
|
|
|
|
"page_owner=on" to boot parameter in order to enable it. Eats
|
|
|
|
a fair amount of memory if enabled. See tools/vm/page_owner_sort.c
|
|
|
|
for user-space helper.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2006-12-10 18:18:37 +08:00
|
|
|
config DEBUG_FS
|
|
|
|
bool "Debug Filesystem"
|
|
|
|
help
|
|
|
|
debugfs is a virtual file system that kernel developers use to put
|
|
|
|
debugging files into. Enable this option to be able to read and
|
|
|
|
write to these files.
|
|
|
|
|
2008-05-20 06:06:00 +08:00
|
|
|
For detailed documentation on the debugfs API, see
|
2017-05-14 23:04:55 +08:00
|
|
|
Documentation/filesystems/.
|
2008-05-20 06:06:00 +08:00
|
|
|
|
2006-12-10 18:18:37 +08:00
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config HEADERS_CHECK
|
|
|
|
bool "Run 'make headers_check' when building vmlinux"
|
|
|
|
depends on !UML
|
|
|
|
help
|
|
|
|
This option will extract the user-visible kernel headers whenever
|
|
|
|
building the kernel, and will run basic sanity checks on them to
|
|
|
|
ensure that exported files do not attempt to include files which
|
|
|
|
were not exported, etc.
|
|
|
|
|
|
|
|
If you're making modifications to header files which are
|
|
|
|
relevant for userspace, say 'Y', and check the headers
|
|
|
|
exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
|
|
|
|
your build tree), to make sure they're suitable.
|
|
|
|
|
2008-01-22 04:31:44 +08:00
|
|
|
config DEBUG_SECTION_MISMATCH
|
|
|
|
bool "Enable full Section mismatch analysis"
|
|
|
|
help
|
|
|
|
The section mismatch analysis checks if there are illegal
|
|
|
|
references from one section to another section.
|
2011-04-17 12:08:48 +08:00
|
|
|
During linktime or runtime, some sections are dropped;
|
|
|
|
any use of code/data previously in these sections would
|
2008-01-22 04:31:44 +08:00
|
|
|
most likely result in an oops.
|
2011-04-17 12:08:48 +08:00
|
|
|
In the code, functions and variables are annotated with
|
2013-06-20 02:53:51 +08:00
|
|
|
__init,, etc. (see the full list in include/linux/init.h),
|
2008-01-30 18:13:23 +08:00
|
|
|
which results in the code/data being placed in specific sections.
|
2011-04-17 12:08:48 +08:00
|
|
|
The section mismatch analysis is always performed after a full
|
|
|
|
kernel build, and enabling this option causes the following
|
|
|
|
additional steps to occur:
|
|
|
|
- Add the option -fno-inline-functions-called-once to gcc commands.
|
|
|
|
When inlining a function annotated with __init in a non-init
|
|
|
|
function, we would lose the section information and thus
|
2008-01-22 04:31:44 +08:00
|
|
|
the analysis would not catch the illegal reference.
|
2011-04-17 12:08:48 +08:00
|
|
|
This option tells gcc to inline less (but it does result in
|
|
|
|
a larger kernel).
|
2018-02-10 22:25:04 +08:00
|
|
|
- Run the section mismatch analysis for each module/built-in.a file.
|
2011-04-17 12:08:48 +08:00
|
|
|
When we run the section mismatch analysis on vmlinux.o, we
|
2016-09-09 16:04:58 +08:00
|
|
|
lose valuable information about where the mismatch was
|
2008-01-22 04:31:44 +08:00
|
|
|
introduced.
|
2018-02-10 22:25:04 +08:00
|
|
|
Running the analysis for each module/built-in.a file
|
2011-04-17 12:08:48 +08:00
|
|
|
tells where the mismatch happens much closer to the
|
|
|
|
source. The drawback is that the same mismatch is
|
|
|
|
reported at least twice.
|
|
|
|
- Enable verbose reporting from modpost in order to help resolve
|
|
|
|
the section mismatches that are reported.
|
2008-01-22 04:31:44 +08:00
|
|
|
|
2015-10-06 07:14:42 +08:00
|
|
|
config SECTION_MISMATCH_WARN_ONLY
|
|
|
|
bool "Make section mismatch errors non-fatal"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
If you say N here, the build process will fail if there are any
|
|
|
|
section mismatch, instead of just throwing warnings.
|
|
|
|
|
|
|
|
If unsure, say Y.
|
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
#
|
|
|
|
# Select this config option from the architecture Kconfig, if it
|
|
|
|
# is preferred to always offer frame pointers as a config
|
|
|
|
# option on the architecture (regardless of KERNEL_DEBUG):
|
|
|
|
#
|
|
|
|
config ARCH_WANT_FRAME_POINTERS
|
|
|
|
bool
|
2006-01-10 12:54:51 +08:00
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
config FRAME_POINTER
|
|
|
|
bool "Compile the kernel with frame pointers"
|
2018-03-08 06:30:54 +08:00
|
|
|
depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS
|
2013-07-02 04:04:46 +08:00
|
|
|
default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
|
2007-02-12 16:52:00 +08:00
|
|
|
help
|
2013-07-02 04:04:46 +08:00
|
|
|
If you say Y here the resulting kernel image will be slightly
|
|
|
|
larger and slower, but it gives very useful debugging information
|
|
|
|
in case of kernel bugs. (precise oopses/stacktraces/warnings)
|
2007-02-12 16:52:00 +08:00
|
|
|
|
2016-02-29 12:22:42 +08:00
|
|
|
config STACK_VALIDATION
|
|
|
|
bool "Compile-time stack metadata validation"
|
|
|
|
depends on HAVE_STACK_VALIDATION
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
Add compile-time checks to validate stack metadata, including frame
|
|
|
|
pointers (if CONFIG_FRAME_POINTER is enabled). This helps ensure
|
|
|
|
that runtime stack traces are more reliable.
|
|
|
|
|
2017-07-25 07:36:57 +08:00
|
|
|
This is also a prerequisite for generation of ORC unwind data, which
|
2017-10-14 04:02:00 +08:00
|
|
|
is needed for CONFIG_UNWINDER_ORC.
|
2017-07-25 07:36:57 +08:00
|
|
|
|
2016-02-29 12:22:42 +08:00
|
|
|
For more information, see
|
|
|
|
tools/objtool/Documentation/stack-validation.txt.
|
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
config DEBUG_FORCE_WEAK_PER_CPU
|
|
|
|
bool "Force weak per-cpu definitions"
|
|
|
|
depends on DEBUG_KERNEL
|
2005-09-07 06:16:27 +08:00
|
|
|
help
|
2013-07-02 04:04:46 +08:00
|
|
|
s390 and alpha require percpu variables in modules to be
|
|
|
|
defined weak to work around addressing range issue which
|
|
|
|
puts the following two restrictions on percpu variable
|
|
|
|
definitions.
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
1. percpu symbols must be unique whether static or not
|
|
|
|
2. percpu variables can't be defined inside a function
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
To ensure that generic code follows the above rules, this
|
|
|
|
option forces all percpu variables to be defined as weak.
|
2012-02-10 06:42:21 +08:00
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
endmenu # "Compiler options"
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2013-07-02 04:04:46 +08:00
|
|
|
config MAGIC_SYSRQ
|
|
|
|
bool "Magic SysRq key"
|
|
|
|
depends on !UML
|
|
|
|
help
|
|
|
|
If you say Y here, you will have some control over the system even
|
|
|
|
if the system crashes for example during kernel debugging (e.g., you
|
|
|
|
will be able to flush the buffer cache to disk, reboot the system
|
|
|
|
immediately or dump some status information). This is accomplished
|
|
|
|
by pressing various keys while holding SysRq (Alt+PrintScreen). It
|
|
|
|
also works on a serial console (on PC hardware at least), if you
|
|
|
|
send a BREAK and then within 5 seconds a command keypress. The
|
2017-03-16 16:37:32 +08:00
|
|
|
keys are documented in <file:Documentation/admin-guide/sysrq.rst>.
|
|
|
|
Don't say Y unless you really know what this hack does.
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2013-10-07 08:05:46 +08:00
|
|
|
config MAGIC_SYSRQ_DEFAULT_ENABLE
|
|
|
|
hex "Enable magic SysRq key functions by default"
|
|
|
|
depends on MAGIC_SYSRQ
|
|
|
|
default 0x1
|
|
|
|
help
|
|
|
|
Specifies which SysRq key functions are enabled by default.
|
|
|
|
This may be set to 1 or 0 to enable or disable them all, or
|
2017-03-16 16:37:32 +08:00
|
|
|
to a bitmask as described in Documentation/admin-guide/sysrq.rst.
|
2013-10-07 08:05:46 +08:00
|
|
|
|
2016-12-22 15:31:34 +08:00
|
|
|
config MAGIC_SYSRQ_SERIAL
|
|
|
|
bool "Enable magic SysRq key over serial"
|
|
|
|
depends on MAGIC_SYSRQ
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Many embedded boards have a disconnected TTL level serial which can
|
|
|
|
generate some garbage that can lead to spurious false sysrq detects.
|
|
|
|
This option allows you to decide whether you want to enable the
|
|
|
|
magic SysRq key.
|
|
|
|
|
2006-01-10 12:54:51 +08:00
|
|
|
config DEBUG_KERNEL
|
|
|
|
bool "Kernel debugging"
|
2011-03-23 07:34:16 +08:00
|
|
|
help
|
2006-01-10 12:54:51 +08:00
|
|
|
Say Y here if you are developing drivers or trying to debug and
|
|
|
|
identify kernel problems.
|
2011-03-23 07:34:16 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
menu "Memory Debugging"
|
2011-03-23 07:34:16 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
source mm/Kconfig.debug
|
2011-03-23 07:34:16 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEBUG_OBJECTS
|
|
|
|
bool "Debug object operations"
|
|
|
|
depends on DEBUG_KERNEL
|
2008-05-13 03:21:04 +08:00
|
|
|
help
|
2013-07-02 04:04:43 +08:00
|
|
|
If you say Y here, additional code will be inserted into the
|
|
|
|
kernel to track the life time of various objects and validate
|
|
|
|
the operations on those objects.
|
2008-05-13 03:21:04 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEBUG_OBJECTS_SELFTEST
|
|
|
|
bool "Debug objects selftest"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
This enables the selftest of the object debug code.
|
2008-05-13 03:21:04 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEBUG_OBJECTS_FREE
|
|
|
|
bool "Debug objects in freed memory"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
This enables checks whether a k/v free operation frees an area
|
|
|
|
which contains an object which has not been deactivated
|
|
|
|
properly. This can make kmalloc/kfree-intensive workloads
|
|
|
|
much slower.
|
2008-04-30 15:55:01 +08:00
|
|
|
|
2008-04-30 15:55:03 +08:00
|
|
|
config DEBUG_OBJECTS_TIMERS
|
|
|
|
bool "Debug timer objects"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
If you say Y here, additional code will be inserted into the
|
|
|
|
timer routines to track the life time of timer objects and
|
|
|
|
validate the timer operations.
|
|
|
|
|
2009-11-16 00:09:48 +08:00
|
|
|
config DEBUG_OBJECTS_WORK
|
|
|
|
bool "Debug work objects"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
If you say Y here, additional code will be inserted into the
|
|
|
|
work queue routines to track the life time of work objects and
|
|
|
|
validate the work operations.
|
|
|
|
|
2010-04-17 20:48:42 +08:00
|
|
|
config DEBUG_OBJECTS_RCU_HEAD
|
|
|
|
bool "Debug RCU callbacks objects"
|
2011-02-24 01:42:14 +08:00
|
|
|
depends on DEBUG_OBJECTS
|
2010-04-17 20:48:42 +08:00
|
|
|
help
|
|
|
|
Enable this to turn on debugging of RCU list heads (call_rcu() usage).
|
|
|
|
|
2010-10-27 05:23:05 +08:00
|
|
|
config DEBUG_OBJECTS_PERCPU_COUNTER
|
|
|
|
bool "Debug percpu counter objects"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
If you say Y here, additional code will be inserted into the
|
|
|
|
percpu counter routines to track the life time of percpu counter
|
|
|
|
objects and validate the percpu counter operations.
|
|
|
|
|
2008-11-26 17:02:00 +08:00
|
|
|
config DEBUG_OBJECTS_ENABLE_DEFAULT
|
|
|
|
int "debug_objects bootup default value (0-1)"
|
|
|
|
range 0 1
|
|
|
|
default "1"
|
|
|
|
depends on DEBUG_OBJECTS
|
|
|
|
help
|
|
|
|
Debug objects boot parameter default value
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
config DEBUG_SLAB
|
2006-03-25 19:07:22 +08:00
|
|
|
bool "Debug slab memory allocations"
|
2017-11-16 09:36:02 +08:00
|
|
|
depends on DEBUG_KERNEL && SLAB
|
2005-04-17 06:20:36 +08:00
|
|
|
help
|
|
|
|
Say Y here to have the kernel do limited verification on memory
|
|
|
|
allocation as well as poisoning memory on free to catch use of freed
|
|
|
|
memory. This can make kmalloc/kfree-intensive workloads much slower.
|
|
|
|
|
2006-03-25 19:06:39 +08:00
|
|
|
config DEBUG_SLAB_LEAK
|
|
|
|
bool "Memory leak debugging"
|
|
|
|
depends on DEBUG_SLAB
|
|
|
|
|
2007-07-16 14:38:14 +08:00
|
|
|
config SLUB_DEBUG_ON
|
|
|
|
bool "SLUB debugging on by default"
|
2017-11-16 09:36:02 +08:00
|
|
|
depends on SLUB && SLUB_DEBUG
|
2007-07-16 14:38:14 +08:00
|
|
|
default n
|
|
|
|
help
|
|
|
|
Boot with debugging on by default. SLUB boots by default with
|
|
|
|
the runtime debug capabilities switched off. Enabling this is
|
|
|
|
equivalent to specifying the "slub_debug" parameter on boot.
|
|
|
|
There is no support for more fine grained debug control like
|
|
|
|
possible with slub_debug=xxx. SLUB debugging may be switched
|
|
|
|
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
|
|
|
|
"slub_debug=-".
|
|
|
|
|
2008-02-08 09:47:41 +08:00
|
|
|
config SLUB_STATS
|
|
|
|
default n
|
|
|
|
bool "Enable SLUB performance statistics"
|
2010-10-06 02:57:26 +08:00
|
|
|
depends on SLUB && SYSFS
|
2008-02-08 09:47:41 +08:00
|
|
|
help
|
|
|
|
SLUB statistics are useful to debug SLUBs allocation behavior in
|
|
|
|
order find ways to optimize the allocator. This should never be
|
|
|
|
enabled for production use since keeping statistics slows down
|
|
|
|
the allocator by a few percentage points. The slabinfo command
|
|
|
|
supports the determination of the most active slabs to figure
|
|
|
|
out which slabs are relevant to a particular load.
|
|
|
|
Try running: slabinfo -DA
|
|
|
|
|
2012-10-09 07:28:11 +08:00
|
|
|
config HAVE_DEBUG_KMEMLEAK
|
|
|
|
bool
|
|
|
|
|
2009-06-11 20:24:13 +08:00
|
|
|
config DEBUG_KMEMLEAK
|
|
|
|
bool "Kernel memory leak detector"
|
2013-01-17 10:54:16 +08:00
|
|
|
depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK
|
2011-04-28 00:06:19 +08:00
|
|
|
select DEBUG_FS
|
2009-06-11 20:24:13 +08:00
|
|
|
select STACKTRACE if STACKTRACE_SUPPORT
|
|
|
|
select KALLSYMS
|
2009-11-07 07:33:45 +08:00
|
|
|
select CRC32
|
2009-06-11 20:24:13 +08:00
|
|
|
help
|
|
|
|
Say Y here if you want to enable the memory leak
|
|
|
|
detector. The memory allocation/freeing is traced in a way
|
|
|
|
similar to the Boehm's conservative garbage collector, the
|
|
|
|
difference being that the orphan objects are not freed but
|
|
|
|
only shown in /sys/kernel/debug/kmemleak. Enabling this
|
|
|
|
feature will introduce an overhead to memory
|
2016-12-15 07:05:40 +08:00
|
|
|
allocations. See Documentation/dev-tools/kmemleak.rst for more
|
2009-06-11 20:24:13 +08:00
|
|
|
details.
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
|
|
|
|
of finding leaks due to the slab objects poisoning.
|
|
|
|
|
|
|
|
In order to access the kmemleak file, debugfs needs to be
|
|
|
|
mounted (usually at /sys/kernel/debug).
|
|
|
|
|
|
|
|
config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
|
|
|
|
int "Maximum kmemleak early log entries"
|
|
|
|
depends on DEBUG_KMEMLEAK
|
|
|
|
range 200 40000
|
|
|
|
default 400
|
|
|
|
help
|
|
|
|
Kmemleak must track all the memory allocations to avoid
|
|
|
|
reporting false positives. Since memory may be allocated or
|
|
|
|
freed before kmemleak is initialised, an early log buffer is
|
|
|
|
used to store these actions. If kmemleak reports "early log
|
|
|
|
buffer exceeded", please increase this value.
|
|
|
|
|
|
|
|
config DEBUG_KMEMLEAK_TEST
|
|
|
|
tristate "Simple test for the kernel memory leak detector"
|
|
|
|
depends on DEBUG_KMEMLEAK && m
|
|
|
|
help
|
|
|
|
This option enables a module that explicitly leaks memory.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config DEBUG_KMEMLEAK_DEFAULT_OFF
|
|
|
|
bool "Default kmemleak to off"
|
|
|
|
depends on DEBUG_KMEMLEAK
|
|
|
|
help
|
|
|
|
Say Y here to disable kmemleak by default. It can then be enabled
|
|
|
|
on the command line via kmemleak=on.
|
|
|
|
|
|
|
|
config DEBUG_STACK_USAGE
|
|
|
|
bool "Stack utilization instrumentation"
|
2016-03-20 00:54:10 +08:00
|
|
|
depends on DEBUG_KERNEL && !IA64
|
2013-07-02 04:04:43 +08:00
|
|
|
help
|
|
|
|
Enables the display of the minimum amount of free stack which each
|
|
|
|
task has ever had available in the sysrq-T and sysrq-P debug output.
|
|
|
|
|
|
|
|
This option will slow down process creation somewhat.
|
|
|
|
|
|
|
|
config DEBUG_VM
|
|
|
|
bool "Debug VM"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this to turn on extended checks in the virtual-memory system
|
|
|
|
that may impact performance.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-06-05 07:06:46 +08:00
|
|
|
config DEBUG_VM_VMACACHE
|
|
|
|
bool "Debug VMA caching"
|
|
|
|
depends on DEBUG_VM
|
|
|
|
help
|
|
|
|
Enable this to turn on VMA caching debug information. Doing so
|
|
|
|
can cause significant overhead, so only enable it in non-production
|
|
|
|
environments.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEBUG_VM_RB
|
|
|
|
bool "Debug VM red-black trees"
|
|
|
|
depends on DEBUG_VM
|
|
|
|
help
|
2014-04-19 06:07:22 +08:00
|
|
|
Enable VM red-black tree debugging information and extra validations.
|
2013-07-02 04:04:43 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2016-01-16 08:51:21 +08:00
|
|
|
config DEBUG_VM_PGFLAGS
|
|
|
|
bool "Debug page-flags operations"
|
|
|
|
depends on DEBUG_VM
|
|
|
|
help
|
|
|
|
Enables extra validation on page flags operations.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2017-01-11 05:35:40 +08:00
|
|
|
config ARCH_HAS_DEBUG_VIRTUAL
|
|
|
|
bool
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEBUG_VIRTUAL
|
|
|
|
bool "Debug VM translations"
|
2017-01-11 05:35:40 +08:00
|
|
|
depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL
|
2013-07-02 04:04:43 +08:00
|
|
|
help
|
|
|
|
Enable some costly sanity checks in virtual to page code. This can
|
|
|
|
catch mistakes with virt_to_page() and friends.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config DEBUG_NOMMU_REGIONS
|
|
|
|
bool "Debug the global anon/private NOMMU mapping region tree"
|
|
|
|
depends on DEBUG_KERNEL && !MMU
|
|
|
|
help
|
|
|
|
This option causes the global tree of anonymous and private mapping
|
|
|
|
regions to be regularly checked for invalid topology.
|
|
|
|
|
|
|
|
config DEBUG_MEMORY_INIT
|
|
|
|
bool "Debug memory initialisation" if EXPERT
|
|
|
|
default !EXPERT
|
|
|
|
help
|
|
|
|
Enable this for additional checks during memory initialisation.
|
|
|
|
The sanity checks verify aspects of the VM such as the memory model
|
|
|
|
and other information provided by the architecture. Verbose
|
|
|
|
information will be printed at KERN_DEBUG loglevel depending
|
|
|
|
on the mminit_loglevel= command-line option.
|
|
|
|
|
|
|
|
If unsure, say Y
|
|
|
|
|
|
|
|
config MEMORY_NOTIFIER_ERROR_INJECT
|
|
|
|
tristate "Memory hotplug notifier error injection module"
|
|
|
|
depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION
|
|
|
|
help
|
|
|
|
This option provides the ability to inject artificial errors to
|
|
|
|
memory hotplug notifier chain callbacks. It is controlled through
|
|
|
|
debugfs interface under /sys/kernel/debug/notifier-error-inject/memory
|
|
|
|
|
|
|
|
If the notifier call chain should be failed with some events
|
|
|
|
notified, write the error code to "actions/<notifier event>/error".
|
|
|
|
|
|
|
|
Example: Inject memory hotplug offline error (-12 == -ENOMEM)
|
|
|
|
|
|
|
|
# cd /sys/kernel/debug/notifier-error-inject/memory
|
|
|
|
# echo -12 > actions/MEM_GOING_OFFLINE/error
|
|
|
|
# echo offline > /sys/devices/system/memory/memoryXXX/state
|
|
|
|
bash: echo: write error: Cannot allocate memory
|
|
|
|
|
|
|
|
To compile this code as a module, choose M here: the module will
|
|
|
|
be called memory-notifier-error-inject.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config DEBUG_PER_CPU_MAPS
|
|
|
|
bool "Debug access to per_cpu maps"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
depends on SMP
|
|
|
|
help
|
|
|
|
Say Y to verify that the per_cpu map being accessed has
|
|
|
|
been set up. This adds a fair amount of code to kernel memory
|
|
|
|
and decreases performance.
|
|
|
|
|
|
|
|
Say N if unsure.
|
|
|
|
|
|
|
|
config DEBUG_HIGHMEM
|
|
|
|
bool "Highmem debugging"
|
|
|
|
depends on DEBUG_KERNEL && HIGHMEM
|
|
|
|
help
|
2014-04-15 00:55:50 +08:00
|
|
|
This option enables additional error checking for high memory
|
|
|
|
systems. Disable for production systems.
|
2013-07-02 04:04:43 +08:00
|
|
|
|
|
|
|
config HAVE_DEBUG_STACKOVERFLOW
|
|
|
|
bool
|
|
|
|
|
|
|
|
config DEBUG_STACKOVERFLOW
|
|
|
|
bool "Check for stack overflows"
|
|
|
|
depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW
|
|
|
|
---help---
|
|
|
|
Say Y here if you want to check for overflows of kernel, IRQ
|
2015-01-26 02:50:34 +08:00
|
|
|
and exception stacks (if your architecture uses them). This
|
2013-07-02 04:04:43 +08:00
|
|
|
option will show detailed messages if free stack space drops
|
|
|
|
below a certain limit.
|
|
|
|
|
|
|
|
These kinds of bugs usually occur when call-chains in the
|
|
|
|
kernel get too deep, especially when interrupts are
|
|
|
|
involved.
|
|
|
|
|
|
|
|
Use this in cases where you see apparently random memory
|
|
|
|
corruption, especially if it appears in 'struct thread_info'
|
|
|
|
|
|
|
|
If in doubt, say "N".
|
|
|
|
|
kasan: add kernel address sanitizer infrastructure
Kernel Address sanitizer (KASan) is a dynamic memory error detector. It
provides fast and comprehensive solution for finding use-after-free and
out-of-bounds bugs.
KASAN uses compile-time instrumentation for checking every memory access,
therefore GCC > v4.9.2 required. v4.9.2 almost works, but has issues with
putting symbol aliases into the wrong section, which breaks kasan
instrumentation of globals.
This patch only adds infrastructure for kernel address sanitizer. It's
not available for use yet. The idea and some code was borrowed from [1].
Basic idea:
The main idea of KASAN is to use shadow memory to record whether each byte
of memory is safe to access or not, and use compiler's instrumentation to
check the shadow memory on each memory access.
Address sanitizer uses 1/8 of the memory addressable in kernel for shadow
memory and uses direct mapping with a scale and offset to translate a
memory address to its corresponding shadow address.
Here is function to translate address to corresponding shadow address:
unsigned long kasan_mem_to_shadow(unsigned long addr)
{
return (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET;
}
where KASAN_SHADOW_SCALE_SHIFT = 3.
So for every 8 bytes there is one corresponding byte of shadow memory.
The following encoding used for each shadow byte: 0 means that all 8 bytes
of the corresponding memory region are valid for access; k (1 <= k <= 7)
means that the first k bytes are valid for access, and other (8 - k) bytes
are not; Any negative value indicates that the entire 8-bytes are
inaccessible. Different negative values used to distinguish between
different kinds of inaccessible memory (redzones, freed memory) (see
mm/kasan/kasan.h).
To be able to detect accesses to bad memory we need a special compiler.
Such compiler inserts a specific function calls (__asan_load*(addr),
__asan_store*(addr)) before each memory access of size 1, 2, 4, 8 or 16.
These functions check whether memory region is valid to access or not by
checking corresponding shadow memory. If access is not valid an error
printed.
Historical background of the address sanitizer from Dmitry Vyukov:
"We've developed the set of tools, AddressSanitizer (Asan),
ThreadSanitizer and MemorySanitizer, for user space. We actively use
them for testing inside of Google (continuous testing, fuzzing,
running prod services). To date the tools have found more than 10'000
scary bugs in Chromium, Google internal codebase and various
open-source projects (Firefox, OpenSSL, gcc, clang, ffmpeg, MySQL and
lots of others): [2] [3] [4].
The tools are part of both gcc and clang compilers.
We have not yet done massive testing under the Kernel AddressSanitizer
(it's kind of chicken and egg problem, you need it to be upstream to
start applying it extensively). To date it has found about 50 bugs.
Bugs that we've found in upstream kernel are listed in [5].
We've also found ~20 bugs in out internal version of the kernel. Also
people from Samsung and Oracle have found some.
[...]
As others noted, the main feature of AddressSanitizer is its
performance due to inline compiler instrumentation and simple linear
shadow memory. User-space Asan has ~2x slowdown on computational
programs and ~2x memory consumption increase. Taking into account that
kernel usually consumes only small fraction of CPU and memory when
running real user-space programs, I would expect that kernel Asan will
have ~10-30% slowdown and similar memory consumption increase (when we
finish all tuning).
I agree that Asan can well replace kmemcheck. We have plans to start
working on Kernel MemorySanitizer that finds uses of unitialized
memory. Asan+Msan will provide feature-parity with kmemcheck. As
others noted, Asan will unlikely replace debug slab and pagealloc that
can be enabled at runtime. Asan uses compiler instrumentation, so even
if it is disabled, it still incurs visible overheads.
Asan technology is easily portable to other architectures. Compiler
instrumentation is fully portable. Runtime has some arch-dependent
parts like shadow mapping and atomic operation interception. They are
relatively easy to port."
Comparison with other debugging features:
========================================
KMEMCHECK:
- KASan can do almost everything that kmemcheck can. KASan uses
compile-time instrumentation, which makes it significantly faster than
kmemcheck. The only advantage of kmemcheck over KASan is detection of
uninitialized memory reads.
Some brief performance testing showed that kasan could be
x500-x600 times faster than kmemcheck:
$ netperf -l 30
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to localhost (127.0.0.1) port 0 AF_INET
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
no debug: 87380 16384 16384 30.00 41624.72
kasan inline: 87380 16384 16384 30.00 12870.54
kasan outline: 87380 16384 16384 30.00 10586.39
kmemcheck: 87380 16384 16384 30.03 20.23
- Also kmemcheck couldn't work on several CPUs. It always sets
number of CPUs to 1. KASan doesn't have such limitation.
DEBUG_PAGEALLOC:
- KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page
granularity level, so it able to find more bugs.
SLUB_DEBUG (poisoning, redzones):
- SLUB_DEBUG has lower overhead than KASan.
- SLUB_DEBUG in most cases are not able to detect bad reads,
KASan able to detect both reads and writes.
- In some cases (e.g. redzone overwritten) SLUB_DEBUG detect
bugs only on allocation/freeing of object. KASan catch
bugs right before it will happen, so we always know exact
place of first bad read/write.
[1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel
[2] https://code.google.com/p/address-sanitizer/wiki/FoundBugs
[3] https://code.google.com/p/thread-sanitizer/wiki/FoundBugs
[4] https://code.google.com/p/memory-sanitizer/wiki/FoundBugs
[5] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel#Trophies
Based on work by Andrey Konovalov.
Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Acked-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Cc: Yuri Gribov <tetra2005@gmail.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-14 06:39:17 +08:00
|
|
|
source "lib/Kconfig.kasan"
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
endmenu # "Memory Debugging"
|
|
|
|
|
kernel: add kcov code coverage
kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing). Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system. A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/). However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.
kcov does not aim to collect as much coverage as possible. It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g. scheduler, locking).
Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes. Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch). I've
dropped the second mode for simplicity.
This patch adds the necessary support on kernel side. The complimentary
compiler support was added in gcc revision 231296.
We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:
https://github.com/google/syzkaller/wiki/Found-Bugs
We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation". For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.
Why not gcov. Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat. A
typical coverage can be just a dozen of basic blocks (e.g. an invalid
input). In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M). Cost of
kcov depends only on number of executed basic blocks/edges. On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.
kcov exposes kernel PCs and control flow to user-space which is
insecure. But debugfs should not be mapped as user accessible.
Based on a patch by Quentin Casasnovas.
[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:27:30 +08:00
|
|
|
config ARCH_HAS_KCOV
|
|
|
|
bool
|
|
|
|
help
|
|
|
|
KCOV does not have any arch-specific code, but currently it is enabled
|
|
|
|
only for x86_64. KCOV requires testing on other archs, and most likely
|
|
|
|
disabling of instrumentation for some early boot code.
|
|
|
|
|
2018-05-28 17:22:04 +08:00
|
|
|
config CC_HAS_SANCOV_TRACE_PC
|
|
|
|
def_bool $(cc-option,-fsanitize-coverage=trace-pc)
|
|
|
|
|
kernel: add kcov code coverage
kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing). Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system. A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/). However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.
kcov does not aim to collect as much coverage as possible. It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g. scheduler, locking).
Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes. Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch). I've
dropped the second mode for simplicity.
This patch adds the necessary support on kernel side. The complimentary
compiler support was added in gcc revision 231296.
We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:
https://github.com/google/syzkaller/wiki/Found-Bugs
We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation". For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.
Why not gcov. Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat. A
typical coverage can be just a dozen of basic blocks (e.g. an invalid
input). In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M). Cost of
kcov depends only on number of executed basic blocks/edges. On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.
kcov exposes kernel PCs and control flow to user-space which is
insecure. But debugfs should not be mapped as user accessible.
Based on a patch by Quentin Casasnovas.
[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:27:30 +08:00
|
|
|
config KCOV
|
|
|
|
bool "Code coverage for fuzzing"
|
|
|
|
depends on ARCH_HAS_KCOV
|
2018-05-28 17:22:04 +08:00
|
|
|
depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
|
kernel: add kcov code coverage
kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing). Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system. A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/). However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.
kcov does not aim to collect as much coverage as possible. It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g. scheduler, locking).
Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes. Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch). I've
dropped the second mode for simplicity.
This patch adds the necessary support on kernel side. The complimentary
compiler support was added in gcc revision 231296.
We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:
https://github.com/google/syzkaller/wiki/Found-Bugs
We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation". For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.
Why not gcov. Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat. A
typical coverage can be just a dozen of basic blocks (e.g. an invalid
input). In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M). Cost of
kcov depends only on number of executed basic blocks/edges. On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.
kcov exposes kernel PCs and control flow to user-space which is
insecure. But debugfs should not be mapped as user accessible.
Based on a patch by Quentin Casasnovas.
[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:27:30 +08:00
|
|
|
select DEBUG_FS
|
2018-05-28 17:22:04 +08:00
|
|
|
select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
|
kernel: add kcov code coverage
kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing). Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system. A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/). However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.
kcov does not aim to collect as much coverage as possible. It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g. scheduler, locking).
Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes. Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch). I've
dropped the second mode for simplicity.
This patch adds the necessary support on kernel side. The complimentary
compiler support was added in gcc revision 231296.
We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:
https://github.com/google/syzkaller/wiki/Found-Bugs
We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation". For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.
Why not gcov. Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat. A
typical coverage can be just a dozen of basic blocks (e.g. an invalid
input). In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M). Cost of
kcov depends only on number of executed basic blocks/edges. On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.
kcov exposes kernel PCs and control flow to user-space which is
insecure. But debugfs should not be mapped as user accessible.
Based on a patch by Quentin Casasnovas.
[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:27:30 +08:00
|
|
|
help
|
|
|
|
KCOV exposes kernel code coverage information in a form suitable
|
|
|
|
for coverage-guided fuzzing (randomized testing).
|
|
|
|
|
|
|
|
If RANDOMIZE_BASE is enabled, PC values will not be stable across
|
|
|
|
different machines and across reboots. If you need stable PC values,
|
|
|
|
disable RANDOMIZE_BASE.
|
|
|
|
|
2016-12-15 07:05:40 +08:00
|
|
|
For more details, see Documentation/dev-tools/kcov.rst.
|
kernel: add kcov code coverage
kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing). Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system. A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/). However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.
kcov does not aim to collect as much coverage as possible. It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g. scheduler, locking).
Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes. Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch). I've
dropped the second mode for simplicity.
This patch adds the necessary support on kernel side. The complimentary
compiler support was added in gcc revision 231296.
We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:
https://github.com/google/syzkaller/wiki/Found-Bugs
We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation". For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.
Why not gcov. Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat. A
typical coverage can be just a dozen of basic blocks (e.g. an invalid
input). In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M). Cost of
kcov depends only on number of executed basic blocks/edges. On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.
kcov exposes kernel PCs and control flow to user-space which is
insecure. But debugfs should not be mapped as user accessible.
Based on a patch by Quentin Casasnovas.
[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:27:30 +08:00
|
|
|
|
2017-11-18 07:30:50 +08:00
|
|
|
config KCOV_ENABLE_COMPARISONS
|
|
|
|
bool "Enable comparison operands collection by KCOV"
|
|
|
|
depends on KCOV
|
2018-05-28 17:22:04 +08:00
|
|
|
depends on $(cc-option,-fsanitize-coverage=trace-cmp)
|
2017-11-18 07:30:50 +08:00
|
|
|
help
|
|
|
|
KCOV also exposes operands of every comparison in the instrumented
|
|
|
|
code along with operand sizes and PCs of the comparison instructions.
|
|
|
|
These operands can be used by fuzzing engines to improve the quality
|
|
|
|
of fuzzing coverage.
|
|
|
|
|
2016-08-03 05:07:30 +08:00
|
|
|
config KCOV_INSTRUMENT_ALL
|
|
|
|
bool "Instrument all code by default"
|
|
|
|
depends on KCOV
|
2018-05-28 17:22:04 +08:00
|
|
|
default y
|
2016-08-03 05:07:30 +08:00
|
|
|
help
|
|
|
|
If you are doing generic system call fuzzing (like e.g. syzkaller),
|
|
|
|
then you will want to instrument the whole kernel and you should
|
|
|
|
say y here. If you are doing more targeted fuzzing (like e.g.
|
|
|
|
filesystem fuzzing with AFL) then you will want to enable coverage
|
|
|
|
for more specific subsets of files, and should say n here.
|
|
|
|
|
2007-02-12 16:52:00 +08:00
|
|
|
config DEBUG_SHIRQ
|
|
|
|
bool "Debug shared IRQ handlers"
|
2013-08-30 15:39:53 +08:00
|
|
|
depends on DEBUG_KERNEL
|
2007-02-12 16:52:00 +08:00
|
|
|
help
|
|
|
|
Enable this to generate a spurious interrupt as soon as a shared
|
|
|
|
interrupt handler is registered, and just before one is deregistered.
|
|
|
|
Drivers ought to be able to handle interrupts coming in at those
|
|
|
|
points; some don't and need to be caught.
|
|
|
|
|
2013-07-02 04:04:50 +08:00
|
|
|
menu "Debug Lockups and Hangs"
|
|
|
|
|
2010-05-08 05:11:44 +08:00
|
|
|
config LOCKUP_DETECTOR
|
2017-07-13 05:35:46 +08:00
|
|
|
bool
|
|
|
|
|
|
|
|
config SOFTLOCKUP_DETECTOR
|
|
|
|
bool "Detect Soft Lockups"
|
2006-10-11 16:20:44 +08:00
|
|
|
depends on DEBUG_KERNEL && !S390
|
2017-07-13 05:35:46 +08:00
|
|
|
select LOCKUP_DETECTOR
|
2005-09-07 06:16:27 +08:00
|
|
|
help
|
2010-05-08 05:11:44 +08:00
|
|
|
Say Y here to enable the kernel to act as a watchdog to detect
|
2017-07-13 05:35:46 +08:00
|
|
|
soft lockups.
|
2010-05-08 05:11:44 +08:00
|
|
|
|
|
|
|
Softlockups are bugs that cause the kernel to loop in kernel
|
2012-02-10 06:42:21 +08:00
|
|
|
mode for more than 20 seconds, without giving other tasks a
|
2010-05-08 05:11:44 +08:00
|
|
|
chance to run. The current stack trace is displayed upon
|
|
|
|
detection and the system will stay locked up.
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2018-04-11 07:32:51 +08:00
|
|
|
config BOOTPARAM_SOFTLOCKUP_PANIC
|
|
|
|
bool "Panic (Reboot) On Soft Lockups"
|
|
|
|
depends on SOFTLOCKUP_DETECTOR
|
|
|
|
help
|
|
|
|
Say Y here to enable the kernel to panic on "soft lockups",
|
|
|
|
which are bugs that cause the kernel to loop in kernel
|
|
|
|
mode for more than 20 seconds (configurable using the watchdog_thresh
|
|
|
|
sysctl), without giving other tasks a chance to run.
|
|
|
|
|
|
|
|
The panic can be used in combination with panic_timeout,
|
|
|
|
to cause the system to reboot automatically after a
|
|
|
|
lockup has been detected. This feature is useful for
|
|
|
|
high-availability systems that have uptime guarantees and
|
|
|
|
where a lockup must be resolved ASAP.
|
|
|
|
|
|
|
|
Say N if unsure.
|
|
|
|
|
|
|
|
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
|
|
|
|
int
|
|
|
|
depends on SOFTLOCKUP_DETECTOR
|
|
|
|
range 0 1
|
|
|
|
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
|
|
|
|
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
|
|
|
|
|
2017-07-13 05:35:46 +08:00
|
|
|
config HARDLOCKUP_DETECTOR_PERF
|
|
|
|
bool
|
|
|
|
select SOFTLOCKUP_DETECTOR
|
|
|
|
|
2017-08-15 15:50:13 +08:00
|
|
|
#
|
|
|
|
# Enables a timestamp based low pass filter to compensate for perf based
|
|
|
|
# hard lockup detection which runs too fast due to turbo modes.
|
|
|
|
#
|
|
|
|
config HARDLOCKUP_CHECK_TIMESTAMP
|
|
|
|
bool
|
|
|
|
|
2017-07-13 05:35:46 +08:00
|
|
|
#
|
|
|
|
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
|
|
|
|
# lockup detector rather than the perf based detector.
|
|
|
|
#
|
|
|
|
config HARDLOCKUP_DETECTOR
|
|
|
|
bool "Detect Hard Lockups"
|
|
|
|
depends on DEBUG_KERNEL && !S390
|
|
|
|
depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
|
|
|
|
select LOCKUP_DETECTOR
|
|
|
|
select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
|
|
|
|
select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
|
|
|
|
help
|
|
|
|
Say Y here to enable the kernel to act as a watchdog to detect
|
|
|
|
hard lockups.
|
|
|
|
|
2010-05-08 05:11:44 +08:00
|
|
|
Hardlockups are bugs that cause the CPU to loop in kernel mode
|
2012-02-10 06:42:21 +08:00
|
|
|
for more than 10 seconds, without letting other interrupts have a
|
2010-05-08 05:11:44 +08:00
|
|
|
chance to run. The current stack trace is displayed upon detection
|
|
|
|
and the system will stay locked up.
|
2005-09-07 06:16:27 +08:00
|
|
|
|
2011-03-23 07:34:16 +08:00
|
|
|
config BOOTPARAM_HARDLOCKUP_PANIC
|
|
|
|
bool "Panic (Reboot) On Hard Lockups"
|
2012-10-05 08:13:17 +08:00
|
|
|
depends on HARDLOCKUP_DETECTOR
|
2011-03-23 07:34:16 +08:00
|
|
|
help
|
|
|
|
Say Y here to enable the kernel to panic on "hard lockups",
|
|
|
|
which are bugs that cause the kernel to loop in kernel
|
2012-02-10 06:42:21 +08:00
|
|
|
mode with interrupts disabled for more than 10 seconds (configurable
|
|
|
|
using the watchdog_thresh sysctl).
|
2011-03-23 07:34:16 +08:00
|
|
|
|
|
|
|
Say N if unsure.
|
|
|
|
|
|
|
|
config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
|
|
|
|
int
|
2012-10-05 08:13:17 +08:00
|
|
|
depends on HARDLOCKUP_DETECTOR
|
2011-03-23 07:34:16 +08:00
|
|
|
range 0 1
|
|
|
|
default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
|
|
|
|
default 1 if BOOTPARAM_HARDLOCKUP_PANIC
|
|
|
|
|
2009-01-16 03:08:40 +08:00
|
|
|
config DETECT_HUNG_TASK
|
|
|
|
bool "Detect Hung Tasks"
|
|
|
|
depends on DEBUG_KERNEL
|
2017-07-13 05:35:46 +08:00
|
|
|
default SOFTLOCKUP_DETECTOR
|
2009-01-16 03:08:40 +08:00
|
|
|
help
|
2013-07-02 04:04:43 +08:00
|
|
|
Say Y here to enable the kernel to detect "hung tasks",
|
|
|
|
which are bugs that cause the task to be stuck in
|
2016-09-23 04:55:13 +08:00
|
|
|
uninterruptible "D" state indefinitely.
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
When a hung task is detected, the kernel will print the
|
|
|
|
current stack trace (which you should report), but the
|
|
|
|
task will stay in uninterruptible state. If lockdep is
|
|
|
|
enabled then all held locks will also be reported. This
|
|
|
|
feature has negligible overhead.
|
2006-03-25 19:06:39 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config DEFAULT_HUNG_TASK_TIMEOUT
|
|
|
|
int "Default timeout for hung task detection (in seconds)"
|
|
|
|
depends on DETECT_HUNG_TASK
|
|
|
|
default 120
|
2007-07-16 14:38:14 +08:00
|
|
|
help
|
2013-07-02 04:04:43 +08:00
|
|
|
This option controls the default timeout (in seconds) used
|
|
|
|
to determine when a task has become non-responsive and should
|
|
|
|
be considered hung.
|
2007-07-16 14:38:14 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
It can be adjusted at runtime via the kernel.hung_task_timeout_secs
|
|
|
|
sysctl or by writing a value to
|
|
|
|
/proc/sys/kernel/hung_task_timeout_secs.
|
2008-02-08 09:47:41 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
A timeout of 0 disables the check. The default is two minutes.
|
|
|
|
Keeping the default should be fine in most cases.
|
2012-10-09 07:28:11 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config BOOTPARAM_HUNG_TASK_PANIC
|
|
|
|
bool "Panic (Reboot) On Hung Tasks"
|
|
|
|
depends on DETECT_HUNG_TASK
|
2009-06-11 20:24:13 +08:00
|
|
|
help
|
2013-07-02 04:04:43 +08:00
|
|
|
Say Y here to enable the kernel to panic on "hung tasks",
|
|
|
|
which are bugs that cause the kernel to leave a task stuck
|
|
|
|
in uninterruptible "D" state.
|
2009-06-11 20:24:13 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
The panic can be used in combination with panic_timeout,
|
|
|
|
to cause the system to reboot automatically after a
|
|
|
|
hung task has been detected. This feature is useful for
|
|
|
|
high-availability systems that have uptime guarantees and
|
|
|
|
where a hung tasks must be resolved ASAP.
|
2009-06-23 21:40:27 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
Say N if unsure.
|
2009-06-23 21:40:27 +08:00
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config BOOTPARAM_HUNG_TASK_PANIC_VALUE
|
|
|
|
int
|
|
|
|
depends on DETECT_HUNG_TASK
|
|
|
|
range 0 1
|
|
|
|
default 0 if !BOOTPARAM_HUNG_TASK_PANIC
|
|
|
|
default 1 if BOOTPARAM_HUNG_TASK_PANIC
|
2009-06-11 20:24:13 +08:00
|
|
|
|
workqueue: implement lockup detector
Workqueue stalls can happen from a variety of usage bugs such as
missing WQ_MEM_RECLAIM flag or concurrency managed work item
indefinitely staying RUNNING. These stalls can be extremely difficult
to hunt down because the usual warning mechanisms can't detect
workqueue stalls and the internal state is pretty opaque.
To alleviate the situation, this patch implements workqueue lockup
detector. It periodically monitors all worker_pools periodically and,
if any pool failed to make forward progress longer than the threshold
duration, triggers warning and dumps workqueue state as follows.
BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 31s!
Showing busy workqueues and worker pools:
workqueue events: flags=0x0
pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=17/256
pending: monkey_wrench_fn, e1000_watchdog, cache_reap, vmstat_shepherd, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, cgroup_release_agent
workqueue events_power_efficient: flags=0x80
pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256
pending: check_lifetime, neigh_periodic_work
workqueue cgroup_pidlist_destroy: flags=0x0
pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1
pending: cgroup_pidlist_destroy_work_fn
...
The detection mechanism is controller through kernel parameter
workqueue.watchdog_thresh and can be updated at runtime through the
sysfs module parameter file.
v2: Decoupled from softlockup control knobs.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Don Zickus <dzickus@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
2015-12-09 00:28:04 +08:00
|
|
|
config WQ_WATCHDOG
|
|
|
|
bool "Detect Workqueue Stalls"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Say Y here to enable stall detection on workqueues. If a
|
|
|
|
worker pool doesn't make forward progress on a pending work
|
|
|
|
item for over a given amount of time, 30s by default, a
|
|
|
|
warning message is printed along with dump of workqueue
|
|
|
|
state. This can be configured through kernel parameter
|
|
|
|
"workqueue.watchdog_thresh" and its sysfs counterpart.
|
|
|
|
|
2013-07-02 04:04:50 +08:00
|
|
|
endmenu # "Debug lockups and hangs"
|
|
|
|
|
|
|
|
config PANIC_ON_OOPS
|
|
|
|
bool "Panic on Oops"
|
2009-06-25 17:16:11 +08:00
|
|
|
help
|
2013-07-02 04:04:50 +08:00
|
|
|
Say Y here to enable the kernel to panic when it oopses. This
|
|
|
|
has the same effect as setting oops=panic on the kernel command
|
|
|
|
line.
|
2009-06-25 17:16:11 +08:00
|
|
|
|
2013-07-02 04:04:50 +08:00
|
|
|
This feature is useful to ensure that the kernel does not do
|
|
|
|
anything erroneous after an oops which could result in data
|
|
|
|
corruption or other issues.
|
|
|
|
|
|
|
|
Say N if unsure.
|
|
|
|
|
|
|
|
config PANIC_ON_OOPS_VALUE
|
|
|
|
int
|
|
|
|
range 0 1
|
|
|
|
default 0 if !PANIC_ON_OOPS
|
|
|
|
default 1 if PANIC_ON_OOPS
|
|
|
|
|
2013-11-26 07:23:04 +08:00
|
|
|
config PANIC_TIMEOUT
|
|
|
|
int "panic timeout"
|
|
|
|
default 0
|
|
|
|
help
|
|
|
|
Set the timeout value (in seconds) until a reboot occurs when the
|
|
|
|
the kernel panics. If n = 0, then we wait forever. A timeout
|
|
|
|
value n > 0 will wait n seconds before rebooting, while a timeout
|
|
|
|
value n < 0 will reboot immediately.
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config SCHED_DEBUG
|
|
|
|
bool "Collect scheduler debugging info"
|
|
|
|
depends on DEBUG_KERNEL && PROC_FS
|
|
|
|
default y
|
2009-06-11 20:24:14 +08:00
|
|
|
help
|
2013-07-02 04:04:43 +08:00
|
|
|
If you say Y here, the /proc/sched_debug file will be provided
|
|
|
|
that can help debug the scheduler. The runtime overhead of this
|
|
|
|
option is minimal.
|
2009-06-11 20:24:14 +08:00
|
|
|
|
2015-06-26 02:23:37 +08:00
|
|
|
config SCHED_INFO
|
|
|
|
bool
|
|
|
|
default n
|
|
|
|
|
2013-07-02 04:04:43 +08:00
|
|
|
config SCHEDSTATS
|
|
|
|
bool "Collect scheduler statistics"
|
|
|
|
depends on DEBUG_KERNEL && PROC_FS
|
2015-06-26 02:23:37 +08:00
|
|
|
select SCHED_INFO
|
2013-07-02 04:04:43 +08:00
|
|
|
help
|
|
|
|
If you say Y here, additional code will be inserted into the
|
|
|
|
scheduler and related routines to collect statistics about
|
|
|
|
scheduler behavior and provide them in /proc/schedstat. These
|
|
|
|
stats may be useful for both tuning and debugging the scheduler
|
|
|
|
If you aren't debugging the scheduler or trying to tune a specific
|
|
|
|
application, you can say N to avoid the very slight overhead
|
|
|
|
this adds.
|
2009-06-11 20:24:14 +08:00
|
|
|
|
2014-09-12 21:16:19 +08:00
|
|
|
config SCHED_STACK_END_CHECK
|
|
|
|
bool "Detect stack corruption on calls to schedule()"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
This option checks for a stack overrun on calls to schedule().
|
|
|
|
If the stack end location is found to be over written always panic as
|
|
|
|
the content of the corrupted region can no longer be trusted.
|
|
|
|
This is to ensure no erroneous behaviour occurs which could result in
|
|
|
|
data corruption or a sporadic crash at a later stage once the region
|
|
|
|
is examined. The runtime overhead introduced is minimal.
|
|
|
|
|
2015-03-12 12:16:32 +08:00
|
|
|
config DEBUG_TIMEKEEPING
|
|
|
|
bool "Enable extra timekeeping sanity checking"
|
|
|
|
help
|
|
|
|
This option will enable additional timekeeping sanity checks
|
|
|
|
which may be helpful when diagnosing issues where timekeeping
|
|
|
|
problems are suspected.
|
|
|
|
|
|
|
|
This may include checks in the timekeeping hotpaths, so this
|
|
|
|
option may have a (very small) performance impact to some
|
|
|
|
workloads.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
config DEBUG_PREEMPT
|
|
|
|
bool "Debug preemptible kernel"
|
2009-10-16 15:21:39 +08:00
|
|
|
depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
|
2005-04-17 06:20:36 +08:00
|
|
|
default y
|
|
|
|
help
|
|
|
|
If you say Y here then the kernel will use a debug variant of the
|
|
|
|
commonly used smp_processor_id() function and will print warnings
|
|
|
|
if kernel code uses it in a preemption-unsafe way. Also, the kernel
|
|
|
|
will detect preemption count underflows.
|
|
|
|
|
2013-07-02 04:04:47 +08:00
|
|
|
menu "Lock Debugging (spinlocks, mutexes, etc...)"
|
|
|
|
|
2018-03-31 05:27:59 +08:00
|
|
|
config LOCK_DEBUGGING_SUPPORT
|
|
|
|
bool
|
|
|
|
depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
|
|
|
|
default y
|
|
|
|
|
2018-03-31 05:28:00 +08:00
|
|
|
config PROVE_LOCKING
|
|
|
|
bool "Lock debugging: prove locking correctness"
|
|
|
|
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
|
|
|
|
select LOCKDEP
|
|
|
|
select DEBUG_SPINLOCK
|
|
|
|
select DEBUG_MUTEXES
|
|
|
|
select DEBUG_RT_MUTEXES if RT_MUTEXES
|
|
|
|
select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
|
|
|
|
select DEBUG_WW_MUTEX_SLOWPATH
|
|
|
|
select DEBUG_LOCK_ALLOC
|
|
|
|
select TRACE_IRQFLAGS
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
This feature enables the kernel to prove that all locking
|
|
|
|
that occurs in the kernel runtime is mathematically
|
|
|
|
correct: that under no circumstance could an arbitrary (and
|
|
|
|
not yet triggered) combination of observed locking
|
|
|
|
sequences (on an arbitrary number of CPUs, running an
|
|
|
|
arbitrary number of tasks and interrupt contexts) cause a
|
|
|
|
deadlock.
|
|
|
|
|
|
|
|
In short, this feature enables the kernel to report locking
|
|
|
|
related deadlocks before they actually occur.
|
|
|
|
|
|
|
|
The proof does not depend on how hard and complex a
|
|
|
|
deadlock scenario would be to trigger: how many
|
|
|
|
participant CPUs, tasks and irq-contexts would be needed
|
|
|
|
for it to trigger. The proof also does not depend on
|
|
|
|
timing: if a race and a resulting deadlock is possible
|
|
|
|
theoretically (no matter how unlikely the race scenario
|
|
|
|
is), it will be proven so and will immediately be
|
|
|
|
reported by the kernel (once the event is observed that
|
|
|
|
makes the deadlock theoretically possible).
|
|
|
|
|
|
|
|
If a deadlock is impossible (i.e. the locking rules, as
|
|
|
|
observed by the kernel, are mathematically correct), the
|
|
|
|
kernel reports nothing.
|
|
|
|
|
|
|
|
NOTE: this feature can also be enabled for rwlocks, mutexes
|
|
|
|
and rwsems - in which case all dependencies between these
|
|
|
|
different locking variants are observed and mapped too, and
|
|
|
|
the proof of observed correctness is also maintained for an
|
|
|
|
arbitrary combination of these separate locking variants.
|
|
|
|
|
|
|
|
For more details, see Documentation/locking/lockdep-design.txt.
|
|
|
|
|
|
|
|
config LOCK_STAT
|
|
|
|
bool "Lock usage statistics"
|
|
|
|
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
|
|
|
|
select LOCKDEP
|
|
|
|
select DEBUG_SPINLOCK
|
|
|
|
select DEBUG_MUTEXES
|
|
|
|
select DEBUG_RT_MUTEXES if RT_MUTEXES
|
|
|
|
select DEBUG_LOCK_ALLOC
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
This feature enables tracking lock contention points
|
|
|
|
|
|
|
|
For more details, see Documentation/locking/lockstat.txt
|
|
|
|
|
|
|
|
This also enables lock events required by "perf lock",
|
|
|
|
subcommand of perf.
|
|
|
|
If you want to use "perf lock", you also need to turn on
|
|
|
|
CONFIG_EVENT_TRACING.
|
|
|
|
|
|
|
|
CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
|
|
|
|
(CONFIG_LOCKDEP defines "acquire" and "release" events.)
|
|
|
|
|
2006-06-27 17:54:55 +08:00
|
|
|
config DEBUG_RT_MUTEXES
|
|
|
|
bool "RT Mutex debugging, deadlock detection"
|
|
|
|
depends on DEBUG_KERNEL && RT_MUTEXES
|
|
|
|
help
|
|
|
|
This allows rt mutex semantics violations and rt mutex related
|
|
|
|
deadlocks (lockups) to be detected and reported automatically.
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
config DEBUG_SPINLOCK
|
2006-07-03 15:24:55 +08:00
|
|
|
bool "Spinlock and rw-lock debugging: basic checks"
|
2005-04-17 06:20:36 +08:00
|
|
|
depends on DEBUG_KERNEL
|
2012-03-22 17:55:08 +08:00
|
|
|
select UNINLINE_SPIN_UNLOCK
|
2005-04-17 06:20:36 +08:00
|
|
|
help
|
|
|
|
Say Y here and build SMP to catch missing spinlock initialization
|
|
|
|
and certain other kinds of spinlock errors commonly made. This is
|
|
|
|
best used in conjunction with the NMI watchdog so that spinlock
|
|
|
|
deadlocks are also debuggable.
|
|
|
|
|
2006-07-03 15:24:55 +08:00
|
|
|
config DEBUG_MUTEXES
|
|
|
|
bool "Mutex debugging: basic checks"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
This feature allows mutex semantics violations to be detected and
|
|
|
|
reported.
|
|
|
|
|
2013-06-20 19:31:17 +08:00
|
|
|
config DEBUG_WW_MUTEX_SLOWPATH
|
|
|
|
bool "Wait/wound mutex debugging: Slowpath testing"
|
2018-03-31 05:27:59 +08:00
|
|
|
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
|
2013-06-20 19:31:17 +08:00
|
|
|
select DEBUG_LOCK_ALLOC
|
|
|
|
select DEBUG_SPINLOCK
|
|
|
|
select DEBUG_MUTEXES
|
|
|
|
help
|
|
|
|
This feature enables slowpath testing for w/w mutex users by
|
|
|
|
injecting additional -EDEADLK wound/backoff cases. Together with
|
|
|
|
the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
|
|
|
|
will test all possible w/w mutex interface abuse with the
|
|
|
|
exception of simply not acquiring all the required locks.
|
2014-08-27 23:19:26 +08:00
|
|
|
Note that this feature can introduce significant overhead, so
|
|
|
|
it really should not be enabled in a production or distro kernel,
|
|
|
|
even a debug kernel. If you are a driver writer, enable it. If
|
|
|
|
you are a distro, do not.
|
2013-06-20 19:31:17 +08:00
|
|
|
|
2018-03-31 05:27:58 +08:00
|
|
|
config DEBUG_RWSEMS
|
|
|
|
bool "RW Semaphore debugging: basic checks"
|
|
|
|
depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
|
|
|
|
help
|
|
|
|
This debugging feature allows mismatched rw semaphore locks and unlocks
|
|
|
|
to be detected and reported.
|
|
|
|
|
2006-07-03 15:24:55 +08:00
|
|
|
config DEBUG_LOCK_ALLOC
|
|
|
|
bool "Lock debugging: detect incorrect freeing of live locks"
|
2018-03-31 05:27:59 +08:00
|
|
|
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
|
2006-07-03 15:24:55 +08:00
|
|
|
select DEBUG_SPINLOCK
|
|
|
|
select DEBUG_MUTEXES
|
2016-09-19 18:15:37 +08:00
|
|
|
select DEBUG_RT_MUTEXES if RT_MUTEXES
|
2006-07-03 15:24:55 +08:00
|
|
|
select LOCKDEP
|
|
|
|
help
|
|
|
|
This feature will check whether any held lock (spinlock, rwlock,
|
|
|
|
mutex or rwsem) is incorrectly freed by the kernel, via any of the
|
|
|
|
memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
|
|
|
|
vfree(), etc.), whether a live lock is incorrectly reinitialized via
|
|
|
|
spin_lock_init()/mutex_init()/etc., or whether there is any lock
|
|
|
|
held during task exit.
|
|
|
|
|
|
|
|
config LOCKDEP
|
|
|
|
bool
|
2018-03-31 05:27:59 +08:00
|
|
|
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
|
2006-07-03 15:24:55 +08:00
|
|
|
select STACKTRACE
|
arch: remove obsolete architecture ports
This removes the entire architecture code for blackfin, cris, frv, m32r,
metag, mn10300, score, and tile, including the associated device drivers.
I have been working with the (former) maintainers for each one to ensure
that my interpretation was right and the code is definitely unused in
mainline kernels. Many had fond memories of working on the respective
ports to start with and getting them included in upstream, but also saw
no point in keeping the port alive without any users.
In the end, it seems that while the eight architectures are extremely
different, they all suffered the same fate: There was one company
in charge of an SoC line, a CPU microarchitecture and a software
ecosystem, which was more costly than licensing newer off-the-shelf
CPU cores from a third party (typically ARM, MIPS, or RISC-V). It seems
that all the SoC product lines are still around, but have not used the
custom CPU architectures for several years at this point. In contrast,
CPU instruction sets that remain popular and have actively maintained
kernel ports tend to all be used across multiple licensees.
The removal came out of a discussion that is now documented at
https://lwn.net/Articles/748074/. Unlike the original plans, I'm not
marking any ports as deprecated but remove them all at once after I made
sure that they are all unused. Some architectures (notably tile, mn10300,
and blackfin) are still being shipped in products with old kernels,
but those products will never be updated to newer kernel releases.
After this series, we still have a few architectures without mainline
gcc support:
- unicore32 and hexagon both have very outdated gcc releases, but the
maintainers promised to work on providing something newer. At least
in case of hexagon, this will only be llvm, not gcc.
- openrisc, risc-v and nds32 are still in the process of finishing their
support or getting it added to mainline gcc in the first place.
They all have patched gcc-7.3 ports that work to some degree, but
complete upstream support won't happen before gcc-8.1. Csky posted
their first kernel patch set last week, their situation will be similar.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQIcBAABAgAGBQJawdL2AAoJEGCrR//JCVInuH0P/RJAZh1nTD+TR34ZhJq2TBoo
PgygwDU7Z2+tQVU+EZ453Gywz9/NMRFk1RWAZqrLix4ZtyIMvC6A1qfT2yH1Y7Fb
Qh6tccQeLe4ezq5u4S/46R/fQXu3Txr92yVwzJJUuPyU0arF9rv5MmI8e6p7L1en
yb74kSEaCe+/eMlsEj1Cc1dgthDNXGKIURHkRsILoweysCpesjiTg4qDcL+yTibV
FP2wjVbniKESMKS6qL71tiT5sexvLsLwMNcGiHPj94qCIQuI7DLhLdBVsL5Su6gI
sbtgv0dsq4auRYAbQdMaH1hFvu6WptsuttIbOMnz2Yegi2z28H8uVXkbk2WVLbqG
ZESUwutGh8MzOL2RJ4jyyQq5sfo++CRGlfKjr6ImZRv03dv0pe/W85062cK5cKNs
cgDDJjGRorOXW7dyU6jG2gRqODOQBObIv3w5efdq5OgzOWlbI4EC+Y5u1Z0JF/76
pSwtGXA6YhwC+9LLAlnVTHG+yOwuLmAICgoKcTbzTVDKA2YQZG/cYuQfI5S1wD8e
X6urPx3Md2GCwLXQ9mzKBzKZUpu/Tuhx0NvwF4qVxy6x1PELjn68zuP7abDHr46r
57/09ooVN+iXXnEGMtQVS/OPvYHSa2NgTSZz6Y86lCRbZmUOOlK31RDNlMvYNA+s
3iIVHovno/JuJnTOE8LY
=fQ8z
-----END PGP SIGNATURE-----
Merge tag 'arch-removal' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic
Pul removal of obsolete architecture ports from Arnd Bergmann:
"This removes the entire architecture code for blackfin, cris, frv,
m32r, metag, mn10300, score, and tile, including the associated device
drivers.
I have been working with the (former) maintainers for each one to
ensure that my interpretation was right and the code is definitely
unused in mainline kernels. Many had fond memories of working on the
respective ports to start with and getting them included in upstream,
but also saw no point in keeping the port alive without any users.
In the end, it seems that while the eight architectures are extremely
different, they all suffered the same fate: There was one company in
charge of an SoC line, a CPU microarchitecture and a software
ecosystem, which was more costly than licensing newer off-the-shelf
CPU cores from a third party (typically ARM, MIPS, or RISC-V). It
seems that all the SoC product lines are still around, but have not
used the custom CPU architectures for several years at this point. In
contrast, CPU instruction sets that remain popular and have actively
maintained kernel ports tend to all be used across multiple licensees.
[ See the new nds32 port merged in the previous commit for the next
generation of "one company in charge of an SoC line, a CPU
microarchitecture and a software ecosystem" - Linus ]
The removal came out of a discussion that is now documented at
https://lwn.net/Articles/748074/. Unlike the original plans, I'm not
marking any ports as deprecated but remove them all at once after I
made sure that they are all unused. Some architectures (notably tile,
mn10300, and blackfin) are still being shipped in products with old
kernels, but those products will never be updated to newer kernel
releases.
After this series, we still have a few architectures without mainline
gcc support:
- unicore32 and hexagon both have very outdated gcc releases, but the
maintainers promised to work on providing something newer. At least
in case of hexagon, this will only be llvm, not gcc.
- openrisc, risc-v and nds32 are still in the process of finishing
their support or getting it added to mainline gcc in the first
place. They all have patched gcc-7.3 ports that work to some
degree, but complete upstream support won't happen before gcc-8.1.
Csky posted their first kernel patch set last week, their situation
will be similar
[ Palmer Dabbelt points out that RISC-V support is in mainline gcc
since gcc-7, although gcc-7.3.0 is the recommended minimum - Linus ]"
This really says it all:
2498 files changed, 95 insertions(+), 467668 deletions(-)
* tag 'arch-removal' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic: (74 commits)
MAINTAINERS: UNICORE32: Change email account
staging: iio: remove iio-trig-bfin-timer driver
tty: hvc: remove tile driver
tty: remove bfin_jtag_comm and hvc_bfin_jtag drivers
serial: remove tile uart driver
serial: remove m32r_sio driver
serial: remove blackfin drivers
serial: remove cris/etrax uart drivers
usb: Remove Blackfin references in USB support
usb: isp1362: remove blackfin arch glue
usb: musb: remove blackfin port
usb: host: remove tilegx platform glue
pwm: remove pwm-bfin driver
i2c: remove bfin-twi driver
spi: remove blackfin related host drivers
watchdog: remove bfin_wdt driver
can: remove bfin_can driver
mmc: remove bfin_sdh driver
input: misc: remove blackfin rotary driver
input: keyboard: remove bf54x driver
...
2018-04-03 11:20:12 +08:00
|
|
|
select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
|
2006-07-03 15:24:55 +08:00
|
|
|
select KALLSYMS
|
|
|
|
select KALLSYMS_ALL
|
|
|
|
|
2017-04-10 23:50:52 +08:00
|
|
|
config LOCKDEP_SMALL
|
|
|
|
bool
|
|
|
|
|
2006-07-03 15:24:55 +08:00
|
|
|
config DEBUG_LOCKDEP
|
|
|
|
bool "Lock dependency engine debugging"
|
2006-07-14 15:24:32 +08:00
|
|
|
depends on DEBUG_KERNEL && LOCKDEP
|
2006-07-03 15:24:55 +08:00
|
|
|
help
|
|
|
|
If you say Y here, the lock dependency engine will do
|
|
|
|
additional runtime checks to debug itself, at the price
|
|
|
|
of more runtime overhead.
|
|
|
|
|
2011-06-09 01:31:56 +08:00
|
|
|
config DEBUG_ATOMIC_SLEEP
|
|
|
|
bool "Sleep inside atomic section checking"
|
2011-06-08 07:51:02 +08:00
|
|
|
select PREEMPT_COUNT
|
2005-04-17 06:20:36 +08:00
|
|
|
depends on DEBUG_KERNEL
|
2018-07-31 19:39:32 +08:00
|
|
|
depends on !ARCH_NO_PREEMPT
|
2005-04-17 06:20:36 +08:00
|
|
|
help
|
|
|
|
If you say Y here, various routines which may sleep will become very
|
2011-06-09 01:31:56 +08:00
|
|
|
noisy if they are called inside atomic sections: when a spinlock is
|
|
|
|
held, inside an rcu read side critical section, inside preempt disabled
|
|
|
|
sections, inside an interrupt, etc...
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-03 15:24:48 +08:00
|
|
|
config DEBUG_LOCKING_API_SELFTESTS
|
|
|
|
bool "Locking API boot-time self-tests"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Say Y here if you want the kernel to run a short self-test during
|
|
|
|
bootup. The self-test checks whether common types of locking bugs
|
|
|
|
are detected by debugging mechanisms or not. (if you disable
|
|
|
|
lock debugging then those bugs wont be detected of course.)
|
|
|
|
The following locking APIs are covered: spinlocks, rwlocks,
|
|
|
|
mutexes and rwsems.
|
|
|
|
|
2014-02-05 07:51:41 +08:00
|
|
|
config LOCK_TORTURE_TEST
|
|
|
|
tristate "torture tests for locking"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
select TORTURE_TEST
|
|
|
|
help
|
|
|
|
This option provides a kernel module that runs torture tests
|
|
|
|
on kernel locking primitives. The kernel module may be built
|
|
|
|
after the fact on the running kernel to be tested, if desired.
|
|
|
|
|
|
|
|
Say Y here if you want kernel locking-primitive torture tests
|
|
|
|
to be built into the kernel.
|
|
|
|
Say M if you want these torture tests to build as a module.
|
|
|
|
Say N if you are unsure.
|
|
|
|
|
2016-12-01 19:47:06 +08:00
|
|
|
config WW_MUTEX_SELFTEST
|
|
|
|
tristate "Wait/wound mutex selftests"
|
|
|
|
help
|
|
|
|
This option provides a kernel module that runs tests on the
|
|
|
|
on the struct ww_mutex locking API.
|
|
|
|
|
|
|
|
It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction
|
|
|
|
with this test harness.
|
|
|
|
|
|
|
|
Say M if you want these self tests to build as a module.
|
|
|
|
Say N if you are unsure.
|
|
|
|
|
2013-07-02 04:04:47 +08:00
|
|
|
endmenu # lock debugging
|
2006-07-03 15:24:38 +08:00
|
|
|
|
2013-07-02 04:04:47 +08:00
|
|
|
config TRACE_IRQFLAGS
|
|
|
|
bool
|
2011-05-25 08:13:36 +08:00
|
|
|
help
|
2013-07-02 04:04:47 +08:00
|
|
|
Enables hooks to interrupt enabling and disabling for
|
|
|
|
either tracing or lock debugging.
|
2011-05-25 08:13:36 +08:00
|
|
|
|
2006-07-03 15:24:38 +08:00
|
|
|
config STACKTRACE
|
2014-08-30 06:18:35 +08:00
|
|
|
bool "Stack backtrace support"
|
2006-07-03 15:24:38 +08:00
|
|
|
depends on STACKTRACE_SUPPORT
|
2014-08-30 06:18:35 +08:00
|
|
|
help
|
|
|
|
This option causes the kernel to create a /proc/pid/stack for
|
|
|
|
every process, showing its current stack trace.
|
|
|
|
It is also used by various kernel debugging features that require
|
|
|
|
stack trace generation.
|
2011-05-25 08:13:36 +08:00
|
|
|
|
2017-06-08 16:16:59 +08:00
|
|
|
config WARN_ALL_UNSEEDED_RANDOM
|
|
|
|
bool "Warn for all uses of unseeded randomness"
|
|
|
|
default n
|
random: warn when kernel uses unseeded randomness
This enables an important dmesg notification about when drivers have
used the crng without it being seeded first. Prior, these errors would
occur silently, and so there hasn't been a great way of diagnosing these
types of bugs for obscure setups. By adding this as a config option, we
can leave it on by default, so that we learn where these issues happen,
in the field, will still allowing some people to turn it off, if they
really know what they're doing and do not want the log entries.
However, we don't leave it _completely_ by default. An earlier version
of this patch simply had `default y`. I'd really love that, but it turns
out, this problem with unseeded randomness being used is really quite
present and is going to take a long time to fix. Thus, as a compromise
between log-messages-for-all and nobody-knows, this is `default y`,
except it is also `depends on DEBUG_KERNEL`. This will ensure that the
curious see the messages while others don't have to.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2017-06-08 11:06:55 +08:00
|
|
|
help
|
|
|
|
Some parts of the kernel contain bugs relating to their use of
|
|
|
|
cryptographically secure random numbers before it's actually possible
|
|
|
|
to generate those numbers securely. This setting ensures that these
|
|
|
|
flaws don't go unnoticed, by enabling a message, should this ever
|
|
|
|
occur. This will allow people with obscure setups to know when things
|
|
|
|
are going wrong, so that they might contact developers about fixing
|
|
|
|
it.
|
|
|
|
|
2017-06-08 16:16:59 +08:00
|
|
|
Unfortunately, on some models of some architectures getting
|
|
|
|
a fully seeded CRNG is extremely difficult, and so this can
|
|
|
|
result in dmesg getting spammed for a surprisingly long
|
|
|
|
time. This is really bad from a security perspective, and
|
|
|
|
so architecture maintainers really need to do what they can
|
|
|
|
to get the CRNG seeded sooner after the system is booted.
|
|
|
|
However, since users can not do anything actionble to
|
|
|
|
address this, by default the kernel will issue only a single
|
|
|
|
warning for the first use of unseeded randomness.
|
|
|
|
|
|
|
|
Say Y here if you want to receive warnings for all uses of
|
|
|
|
unseeded randomness. This will be of use primarily for
|
|
|
|
those developers interersted in improving the security of
|
|
|
|
Linux kernels running on their architecture (or
|
|
|
|
subarchitecture).
|
random: warn when kernel uses unseeded randomness
This enables an important dmesg notification about when drivers have
used the crng without it being seeded first. Prior, these errors would
occur silently, and so there hasn't been a great way of diagnosing these
types of bugs for obscure setups. By adding this as a config option, we
can leave it on by default, so that we learn where these issues happen,
in the field, will still allowing some people to turn it off, if they
really know what they're doing and do not want the log entries.
However, we don't leave it _completely_ by default. An earlier version
of this patch simply had `default y`. I'd really love that, but it turns
out, this problem with unseeded randomness being used is really quite
present and is going to take a long time to fix. Thus, as a compromise
between log-messages-for-all and nobody-knows, this is `default y`,
except it is also `depends on DEBUG_KERNEL`. This will ensure that the
curious see the messages while others don't have to.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2017-06-08 11:06:55 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
config DEBUG_KOBJECT
|
|
|
|
bool "kobject debugging"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
If you say Y here, some extra kobject debugging messages will be sent
|
|
|
|
to the syslog.
|
|
|
|
|
2013-06-27 22:06:14 +08:00
|
|
|
config DEBUG_KOBJECT_RELEASE
|
|
|
|
bool "kobject release debugging"
|
2013-10-29 23:33:36 +08:00
|
|
|
depends on DEBUG_OBJECTS_TIMERS
|
2013-06-27 22:06:14 +08:00
|
|
|
help
|
|
|
|
kobjects are reference counted objects. This means that their
|
|
|
|
last reference count put is not predictable, and the kobject can
|
|
|
|
live on past the point at which a driver decides to drop it's
|
|
|
|
initial reference to the kobject gained on allocation. An
|
|
|
|
example of this would be a struct device which has just been
|
|
|
|
unregistered.
|
|
|
|
|
|
|
|
However, some buggy drivers assume that after such an operation,
|
|
|
|
the memory backing the kobject can be immediately freed. This
|
|
|
|
goes completely against the principles of a refcounted object.
|
|
|
|
|
|
|
|
If you say Y here, the kernel will delay the release of kobjects
|
|
|
|
on the last reference count to improve the visibility of this
|
|
|
|
kind of kobject release bug.
|
|
|
|
|
2012-10-09 07:28:13 +08:00
|
|
|
config HAVE_DEBUG_BUGVERBOSE
|
|
|
|
bool
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
config DEBUG_BUGVERBOSE
|
2011-01-21 06:44:16 +08:00
|
|
|
bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT
|
2012-10-09 07:28:13 +08:00
|
|
|
depends on BUG && (GENERIC_BUG || HAVE_DEBUG_BUGVERBOSE)
|
2009-12-15 10:00:25 +08:00
|
|
|
default y
|
2005-04-17 06:20:36 +08:00
|
|
|
help
|
|
|
|
Say Y here to make BUG() panics output the file name and line number
|
|
|
|
of the BUG call as well as the EIP and oops trace. This aids
|
|
|
|
debugging but costs about 70-100K of memory.
|
|
|
|
|
2006-09-29 16:59:00 +08:00
|
|
|
config DEBUG_LIST
|
|
|
|
bool "Debug linked list manipulation"
|
2016-08-26 23:42:00 +08:00
|
|
|
depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
|
2006-09-29 16:59:00 +08:00
|
|
|
help
|
|
|
|
Enable this to turn on extended checks in the linked-list
|
|
|
|
walking routines.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-06-05 07:11:54 +08:00
|
|
|
config DEBUG_PI_LIST
|
|
|
|
bool "Debug priority linked list manipulation"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this to turn on extended checks in the priority-ordered
|
|
|
|
linked-list (plist) walking routines. This checks the entire
|
|
|
|
list multiple times during each manipulation.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2007-10-23 02:01:06 +08:00
|
|
|
config DEBUG_SG
|
|
|
|
bool "Debug SG table operations"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this to turn on checks on scatter-gather tables. This can
|
|
|
|
help find problems with drivers that do not properly initialize
|
|
|
|
their sg tables.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2008-08-16 06:29:38 +08:00
|
|
|
config DEBUG_NOTIFIERS
|
|
|
|
bool "Debug notifier call chains"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this to turn on sanity checking for notifier call chains.
|
|
|
|
This is most useful for kernel developers to make sure that
|
|
|
|
modules properly unregister themselves from notifier chains.
|
|
|
|
This is a relatively cheap check but if you care about maximum
|
|
|
|
performance, say N.
|
|
|
|
|
2009-09-02 16:13:40 +08:00
|
|
|
config DEBUG_CREDENTIALS
|
|
|
|
bool "Debug credential management"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this to turn on some debug checking for credential
|
|
|
|
management. The additional code keeps track of the number of
|
|
|
|
pointers from task_structs to any given cred struct, and checks to
|
|
|
|
see that this number never exceeds the usage count of the cred
|
|
|
|
struct.
|
|
|
|
|
|
|
|
Furthermore, if SELinux is enabled, this also checks that the
|
|
|
|
security pointer in the cred struct is never seen to be invalid.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2017-05-18 00:19:44 +08:00
|
|
|
source "kernel/rcu/Kconfig.debug"
|
2013-01-08 00:19:23 +08:00
|
|
|
|
2016-02-10 06:59:38 +08:00
|
|
|
config DEBUG_WQ_FORCE_RR_CPU
|
|
|
|
bool "Force round-robin CPU selection for unbound work items"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
Workqueue used to implicitly guarantee that work items queued
|
|
|
|
without explicit CPU specified are put on the local CPU. This
|
|
|
|
guarantee is no longer true and while local CPU is still
|
|
|
|
preferred work items may be put on foreign CPUs. Kernel
|
|
|
|
parameter "workqueue.debug_force_rr_cpu" is added to force
|
|
|
|
round-robin CPU selection to flush out usages which depend on the
|
|
|
|
now broken guarantee. This config option enables the debug
|
|
|
|
feature by default. When enabled, memory and cache locality will
|
|
|
|
be impacted.
|
|
|
|
|
2008-08-25 18:47:25 +08:00
|
|
|
config DEBUG_BLOCK_EXT_DEVT
|
|
|
|
bool "Force extended block device numbers and spread them"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
depends on BLOCK
|
2008-08-29 15:06:29 +08:00
|
|
|
default n
|
2008-08-25 18:47:25 +08:00
|
|
|
help
|
2008-10-13 16:46:01 +08:00
|
|
|
BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
|
|
|
|
SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
|
|
|
|
YOU ARE DOING. Distros, please enable this and fix whatever
|
|
|
|
is broken.
|
|
|
|
|
2008-08-25 18:47:25 +08:00
|
|
|
Conventionally, block device numbers are allocated from
|
|
|
|
predetermined contiguous area. However, extended block area
|
|
|
|
may introduce non-contiguous block device numbers. This
|
|
|
|
option forces most block device numbers to be allocated from
|
|
|
|
the extended space and spreads them to discover kernel or
|
|
|
|
userland code paths which assume predetermined contiguous
|
|
|
|
device number allocation.
|
|
|
|
|
2008-09-01 19:44:35 +08:00
|
|
|
Note that turning on this debug option shuffles all the
|
|
|
|
device numbers for all IDE and SCSI devices including libata
|
|
|
|
ones, so root partition specified using device number
|
|
|
|
directly (via rdev or root=MAJ:MIN) won't work anymore.
|
|
|
|
Textual device names (root=/dev/sdXn) will continue to work.
|
|
|
|
|
2008-08-25 18:47:25 +08:00
|
|
|
Say N if you are unsure.
|
|
|
|
|
2016-02-27 02:43:32 +08:00
|
|
|
config CPU_HOTPLUG_STATE_CONTROL
|
|
|
|
bool "Enable CPU hotplug state control"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
depends on HOTPLUG_CPU
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
Allows to write steps between "offline" and "online" to the CPUs
|
|
|
|
sysfs target file so states can be stepped granular. This is a debug
|
|
|
|
option for now as the hotplug machinery cannot be stopped and
|
|
|
|
restarted at arbitrary points yet.
|
|
|
|
|
|
|
|
Say N if your are unsure.
|
|
|
|
|
2012-07-31 05:43:02 +08:00
|
|
|
config NOTIFIER_ERROR_INJECTION
|
|
|
|
tristate "Notifier error injection"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
select DEBUG_FS
|
|
|
|
help
|
2012-11-30 15:44:39 +08:00
|
|
|
This option provides the ability to inject artificial errors to
|
2012-07-31 05:43:02 +08:00
|
|
|
specified notifier chain callbacks. It is useful to test the error
|
|
|
|
handling of notifier call chain failures.
|
|
|
|
|
|
|
|
Say N if unsure.
|
|
|
|
|
2012-07-31 05:43:07 +08:00
|
|
|
config PM_NOTIFIER_ERROR_INJECT
|
|
|
|
tristate "PM notifier error injection module"
|
|
|
|
depends on PM && NOTIFIER_ERROR_INJECTION
|
|
|
|
default m if PM_DEBUG
|
|
|
|
help
|
2012-11-30 15:44:39 +08:00
|
|
|
This option provides the ability to inject artificial errors to
|
2012-07-31 05:43:07 +08:00
|
|
|
PM notifier chain callbacks. It is controlled through debugfs
|
|
|
|
interface /sys/kernel/debug/notifier-error-inject/pm
|
|
|
|
|
|
|
|
If the notifier call chain should be failed with some events
|
|
|
|
notified, write the error code to "actions/<notifier event>/error".
|
|
|
|
|
|
|
|
Example: Inject PM suspend error (-12 = -ENOMEM)
|
|
|
|
|
|
|
|
# cd /sys/kernel/debug/notifier-error-inject/pm/
|
|
|
|
# echo -12 > actions/PM_SUSPEND_PREPARE/error
|
|
|
|
# echo mem > /sys/power/state
|
|
|
|
bash: echo: write error: Cannot allocate memory
|
|
|
|
|
|
|
|
To compile this code as a module, choose M here: the module will
|
|
|
|
be called pm-notifier-error-inject.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2012-12-14 07:32:52 +08:00
|
|
|
config OF_RECONFIG_NOTIFIER_ERROR_INJECT
|
|
|
|
tristate "OF reconfig notifier error injection module"
|
|
|
|
depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
|
2012-07-31 05:43:13 +08:00
|
|
|
help
|
2012-11-30 15:44:39 +08:00
|
|
|
This option provides the ability to inject artificial errors to
|
2012-12-14 07:32:52 +08:00
|
|
|
OF reconfig notifier chain callbacks. It is controlled
|
2012-07-31 05:43:13 +08:00
|
|
|
through debugfs interface under
|
2012-12-14 07:32:52 +08:00
|
|
|
/sys/kernel/debug/notifier-error-inject/OF-reconfig/
|
2012-07-31 05:43:13 +08:00
|
|
|
|
|
|
|
If the notifier call chain should be failed with some events
|
|
|
|
notified, write the error code to "actions/<notifier event>/error".
|
|
|
|
|
|
|
|
To compile this code as a module, choose M here: the module will
|
2013-05-01 06:28:49 +08:00
|
|
|
be called of-reconfig-notifier-error-inject.
|
2012-07-31 05:43:13 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2015-11-28 20:45:28 +08:00
|
|
|
config NETDEV_NOTIFIER_ERROR_INJECT
|
|
|
|
tristate "Netdev notifier error injection module"
|
|
|
|
depends on NET && NOTIFIER_ERROR_INJECTION
|
|
|
|
help
|
|
|
|
This option provides the ability to inject artificial errors to
|
|
|
|
netdevice notifier chain callbacks. It is controlled through debugfs
|
|
|
|
interface /sys/kernel/debug/notifier-error-inject/netdev
|
|
|
|
|
|
|
|
If the notifier call chain should be failed with some events
|
|
|
|
notified, write the error code to "actions/<notifier event>/error".
|
|
|
|
|
|
|
|
Example: Inject netdevice mtu change error (-22 = -EINVAL)
|
|
|
|
|
|
|
|
# cd /sys/kernel/debug/notifier-error-inject/netdev
|
|
|
|
# echo -22 > actions/NETDEV_CHANGEMTU/error
|
|
|
|
# ip link set eth0 mtu 1024
|
|
|
|
RTNETLINK answers: Invalid argument
|
|
|
|
|
|
|
|
To compile this code as a module, choose M here: the module will
|
|
|
|
be called netdev-notifier-error-inject.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-06-15 06:27:48 +08:00
|
|
|
config FUNCTION_ERROR_INJECTION
|
|
|
|
def_bool y
|
|
|
|
depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
|
|
|
|
|
2006-12-08 18:39:43 +08:00
|
|
|
config FAULT_INJECTION
|
2006-12-08 18:39:49 +08:00
|
|
|
bool "Fault-injection framework"
|
|
|
|
depends on DEBUG_KERNEL
|
2006-12-08 18:39:48 +08:00
|
|
|
help
|
|
|
|
Provide fault-injection framework.
|
|
|
|
For more details, see Documentation/fault-injection/.
|
2006-12-08 18:39:43 +08:00
|
|
|
|
2006-12-08 18:39:44 +08:00
|
|
|
config FAILSLAB
|
2006-12-08 18:39:49 +08:00
|
|
|
bool "Fault-injection capability for kmalloc"
|
|
|
|
depends on FAULT_INJECTION
|
2008-12-23 18:37:01 +08:00
|
|
|
depends on SLAB || SLUB
|
2006-12-08 18:39:44 +08:00
|
|
|
help
|
2006-12-08 18:39:49 +08:00
|
|
|
Provide fault-injection capability for kmalloc.
|
2006-12-08 18:39:44 +08:00
|
|
|
|
2006-12-08 18:39:45 +08:00
|
|
|
config FAIL_PAGE_ALLOC
|
|
|
|
bool "Fault-injection capabilitiy for alloc_pages()"
|
2006-12-08 18:39:49 +08:00
|
|
|
depends on FAULT_INJECTION
|
2006-12-08 18:39:45 +08:00
|
|
|
help
|
2006-12-08 18:39:49 +08:00
|
|
|
Provide fault-injection capability for alloc_pages().
|
2006-12-08 18:39:45 +08:00
|
|
|
|
2006-12-08 18:39:46 +08:00
|
|
|
config FAIL_MAKE_REQUEST
|
2006-12-13 03:16:36 +08:00
|
|
|
bool "Fault-injection capability for disk IO"
|
2008-09-14 20:56:33 +08:00
|
|
|
depends on FAULT_INJECTION && BLOCK
|
2006-12-08 18:39:46 +08:00
|
|
|
help
|
2006-12-08 18:39:49 +08:00
|
|
|
Provide fault-injection capability for disk IO.
|
2006-12-08 18:39:46 +08:00
|
|
|
|
2008-09-14 20:56:33 +08:00
|
|
|
config FAIL_IO_TIMEOUT
|
2010-07-21 15:05:53 +08:00
|
|
|
bool "Fault-injection capability for faking disk interrupts"
|
2008-09-14 20:56:33 +08:00
|
|
|
depends on FAULT_INJECTION && BLOCK
|
|
|
|
help
|
|
|
|
Provide fault-injection capability on end IO handling. This
|
|
|
|
will make the block layer "forget" an interrupt as configured,
|
|
|
|
thus exercising the error handling.
|
|
|
|
|
|
|
|
Only works with drivers that use the generic timeout handling,
|
|
|
|
for others it wont do anything.
|
|
|
|
|
2015-06-30 14:26:02 +08:00
|
|
|
config FAIL_FUTEX
|
|
|
|
bool "Fault-injection capability for futexes"
|
|
|
|
select DEBUG_FS
|
|
|
|
depends on FAULT_INJECTION && FUTEX
|
|
|
|
help
|
|
|
|
Provide fault-injection capability for futexes.
|
|
|
|
|
2018-06-15 06:27:48 +08:00
|
|
|
config FAULT_INJECTION_DEBUG_FS
|
|
|
|
bool "Debugfs entries for fault-injection capabilities"
|
|
|
|
depends on FAULT_INJECTION && SYSFS && DEBUG_FS
|
|
|
|
help
|
|
|
|
Enable configuration of fault-injection capabilities via debugfs.
|
|
|
|
|
2018-01-13 01:56:03 +08:00
|
|
|
config FAIL_FUNCTION
|
|
|
|
bool "Fault-injection capability for functions"
|
|
|
|
depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
|
|
|
|
help
|
|
|
|
Provide function-based fault-injection capability.
|
|
|
|
This will allow you to override a specific function with a return
|
|
|
|
with given return value. As a result, function caller will see
|
|
|
|
an error value and have to handle it. This is useful to test the
|
|
|
|
error handling in various subsystems.
|
|
|
|
|
2018-06-15 06:27:48 +08:00
|
|
|
config FAIL_MMC_REQUEST
|
|
|
|
bool "Fault-injection capability for MMC IO"
|
|
|
|
depends on FAULT_INJECTION_DEBUG_FS && MMC
|
2006-12-08 18:39:43 +08:00
|
|
|
help
|
2018-06-15 06:27:48 +08:00
|
|
|
Provide fault-injection capability for MMC IO.
|
|
|
|
This will make the mmc core return data errors. This is
|
|
|
|
useful to test the error handling in the mmc block device
|
|
|
|
and to test how the mmc host driver handles retries from
|
|
|
|
the block device.
|
2007-02-21 05:57:56 +08:00
|
|
|
|
|
|
|
config FAULT_INJECTION_STACKTRACE_FILTER
|
|
|
|
bool "stacktrace filter for fault-injection capabilities"
|
|
|
|
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
|
2007-05-13 01:36:53 +08:00
|
|
|
depends on !X86_64
|
2007-02-21 05:57:56 +08:00
|
|
|
select STACKTRACE
|
2018-03-08 06:30:54 +08:00
|
|
|
select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
|
2007-02-21 05:57:56 +08:00
|
|
|
help
|
|
|
|
Provide stacktrace filter for fault-injection capabilities
|
2007-10-19 14:41:07 +08:00
|
|
|
|
2008-01-26 04:08:34 +08:00
|
|
|
config LATENCYTOP
|
|
|
|
bool "Latency measuring infrastructure"
|
2010-08-13 03:31:21 +08:00
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
depends on STACKTRACE_SUPPORT
|
|
|
|
depends on PROC_FS
|
2017-07-25 07:36:58 +08:00
|
|
|
select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
|
2008-01-26 04:08:34 +08:00
|
|
|
select KALLSYMS
|
|
|
|
select KALLSYMS_ALL
|
|
|
|
select STACKTRACE
|
|
|
|
select SCHEDSTATS
|
|
|
|
select SCHED_DEBUG
|
|
|
|
help
|
|
|
|
Enable this option if you want to use the LatencyTOP tool
|
|
|
|
to find out which userspace is blocking on what kernel operations.
|
|
|
|
|
2008-05-13 03:20:42 +08:00
|
|
|
source kernel/trace/Kconfig
|
|
|
|
|
2017-10-14 06:57:33 +08:00
|
|
|
config PROVIDE_OHCI1394_DMA_INIT
|
|
|
|
bool "Remote debugging over FireWire early on boot"
|
|
|
|
depends on PCI && X86
|
|
|
|
help
|
|
|
|
If you want to debug problems which hang or crash the kernel early
|
|
|
|
on boot and the crashing machine has a FireWire port, you can use
|
|
|
|
this feature to remotely access the memory of the crashed machine
|
|
|
|
over FireWire. This employs remote DMA as part of the OHCI1394
|
|
|
|
specification which is now the standard for FireWire controllers.
|
|
|
|
|
|
|
|
With remote DMA, you can monitor the printk buffer remotely using
|
|
|
|
firescope and access all memory below 4GB using fireproxy from gdb.
|
|
|
|
Even controlling a kernel debugger is possible using remote DMA.
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
|
|
If ohci1394_dma=early is used as boot parameter, it will initialize
|
|
|
|
all OHCI1394 controllers which are found in the PCI config space.
|
|
|
|
|
|
|
|
As all changes to the FireWire bus such as enabling and disabling
|
|
|
|
devices cause a bus reset and thereby disable remote DMA for all
|
|
|
|
devices, be sure to have the cable plugged and FireWire enabled on
|
|
|
|
the debugging host before booting the debug target for debugging.
|
|
|
|
|
|
|
|
This code (~1k) is freed after boot. By then, the firewire stack
|
|
|
|
in charge of the OHCI-1394 controllers should be used instead.
|
|
|
|
|
|
|
|
See Documentation/debugging-via-ohci1394.txt for more information.
|
|
|
|
|
|
|
|
config DMA_API_DEBUG
|
|
|
|
bool "Enable debugging of DMA-API usage"
|
2018-05-09 12:53:49 +08:00
|
|
|
select NEED_DMA_MAP_STATE
|
2017-10-14 06:57:33 +08:00
|
|
|
help
|
|
|
|
Enable this option to debug the use of the DMA API by device drivers.
|
|
|
|
With this option you will be able to detect common bugs in device
|
|
|
|
drivers like double-freeing of DMA mappings or freeing mappings that
|
|
|
|
were never allocated.
|
|
|
|
|
|
|
|
This also attempts to catch cases where a page owned by DMA is
|
|
|
|
accessed by the cpu in a way that could cause data corruption. For
|
|
|
|
example, this enables cow_user_page() to check that the source page is
|
|
|
|
not undergoing DMA.
|
|
|
|
|
|
|
|
This option causes a performance degradation. Use only if you want to
|
|
|
|
debug device drivers and dma interactions.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-05-21 19:35:13 +08:00
|
|
|
config DMA_API_DEBUG_SG
|
|
|
|
bool "Debug DMA scatter-gather usage"
|
|
|
|
default y
|
|
|
|
depends on DMA_API_DEBUG
|
|
|
|
help
|
|
|
|
Perform extra checking that callers of dma_map_sg() have respected the
|
|
|
|
appropriate segment length/boundary limits for the given device when
|
|
|
|
preparing DMA scatterlists.
|
|
|
|
|
|
|
|
This is particularly likely to have been overlooked in cases where the
|
|
|
|
dma_map_sg() API is used for general bulk mapping of pages rather than
|
|
|
|
preparing literal scatter-gather descriptors, where there is a risk of
|
|
|
|
unexpected behaviour from DMA API implementations if the scatterlist
|
|
|
|
is technically out-of-spec.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-02-07 07:38:38 +08:00
|
|
|
menuconfig RUNTIME_TESTING_MENU
|
|
|
|
bool "Runtime Testing"
|
2018-02-22 06:46:05 +08:00
|
|
|
def_bool y
|
2018-02-07 07:38:38 +08:00
|
|
|
|
|
|
|
if RUNTIME_TESTING_MENU
|
2013-07-02 04:04:44 +08:00
|
|
|
|
|
|
|
config LKDTM
|
|
|
|
tristate "Linux Kernel Dump Test Tool Module"
|
|
|
|
depends on DEBUG_FS
|
|
|
|
depends on BLOCK
|
|
|
|
help
|
|
|
|
This module enables testing of the different dumping mechanisms by
|
|
|
|
inducing system failures at predefined crash points.
|
|
|
|
If you don't need it: say N
|
|
|
|
Choose M here to compile this code as a module. The module will be
|
|
|
|
called lkdtm.
|
|
|
|
|
|
|
|
Documentation on how to use the module can be found in
|
|
|
|
Documentation/fault-injection/provoke-crashes.txt
|
|
|
|
|
|
|
|
config TEST_LIST_SORT
|
2017-05-09 06:55:26 +08:00
|
|
|
tristate "Linked list sorting test"
|
|
|
|
depends on DEBUG_KERNEL || m
|
2013-07-02 04:04:44 +08:00
|
|
|
help
|
|
|
|
Enable this to turn on 'list_sort()' function test. This test is
|
2017-05-09 06:55:26 +08:00
|
|
|
executed only once during system boot (so affects only boot time),
|
|
|
|
or at module load time.
|
2013-07-02 04:04:44 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2017-02-25 07:01:07 +08:00
|
|
|
config TEST_SORT
|
2017-05-09 06:55:23 +08:00
|
|
|
tristate "Array-based sort test"
|
|
|
|
depends on DEBUG_KERNEL || m
|
2017-02-25 07:01:07 +08:00
|
|
|
help
|
2017-05-09 06:55:23 +08:00
|
|
|
This option enables the self-test function of 'sort()' at boot,
|
|
|
|
or at module load time.
|
2017-02-25 07:01:07 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2013-07-02 04:04:44 +08:00
|
|
|
config KPROBES_SANITY_TEST
|
|
|
|
bool "Kprobes sanity tests"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
depends on KPROBES
|
|
|
|
help
|
|
|
|
This option provides for testing basic kprobes functionality on
|
2018-06-20 00:05:07 +08:00
|
|
|
boot. Samples of kprobe and kretprobe are inserted and
|
2013-07-02 04:04:44 +08:00
|
|
|
verified for functionality.
|
|
|
|
|
|
|
|
Say N if you are unsure.
|
|
|
|
|
|
|
|
config BACKTRACE_SELF_TEST
|
|
|
|
tristate "Self test for the backtrace code"
|
|
|
|
depends on DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
This option provides a kernel module that can be used to test
|
|
|
|
the kernel stack backtrace code. This option is not useful
|
|
|
|
for distributions or general kernels, but only for kernel
|
|
|
|
developers working on architecture code.
|
|
|
|
|
|
|
|
Note that if you want to also test saved backtraces, you will
|
|
|
|
have to enable STACKTRACE as well.
|
|
|
|
|
|
|
|
Say N if you are unsure.
|
|
|
|
|
2012-10-09 07:30:39 +08:00
|
|
|
config RBTREE_TEST
|
|
|
|
tristate "Red-Black tree test"
|
2013-09-12 05:25:19 +08:00
|
|
|
depends on DEBUG_KERNEL
|
2012-10-09 07:30:39 +08:00
|
|
|
help
|
|
|
|
A benchmark measuring the performance of the rbtree library.
|
|
|
|
Also includes rbtree invariant checks.
|
|
|
|
|
rbtree: add prio tree and interval tree tests
Patch 1 implements support for interval trees, on top of the augmented
rbtree API. It also adds synthetic tests to compare the performance of
interval trees vs prio trees. Short answers is that interval trees are
slightly faster (~25%) on insert/erase, and much faster (~2.4 - 3x)
on search. It is debatable how realistic the synthetic test is, and I have
not made such measurements yet, but my impression is that interval trees
would still come out faster.
Patch 2 uses a preprocessor template to make the interval tree generic,
and uses it as a replacement for the vma prio_tree.
Patch 3 takes the other prio_tree user, kmemleak, and converts it to use
a basic rbtree. We don't actually need the augmented rbtree support here
because the intervals are always non-overlapping.
Patch 4 removes the now-unused prio tree library.
Patch 5 proposes an additional optimization to rb_erase_augmented, now
providing it as an inline function so that the augmented callbacks can be
inlined in. This provides an additional 5-10% performance improvement
for the interval tree insert/erase benchmark. There is a maintainance cost
as it exposes augmented rbtree users to some of the rbtree library internals;
however I think this cost shouldn't be too high as I expect the augmented
rbtree will always have much less users than the base rbtree.
I should probably add a quick summary of why I think it makes sense to
replace prio trees with augmented rbtree based interval trees now. One of
the drivers is that we need augmented rbtrees for Rik's vma gap finding
code, and once you have them, it just makes sense to use them for interval
trees as well, as this is the simpler and more well known algorithm. prio
trees, in comparison, seem *too* clever: they impose an additional 'heap'
constraint on the tree, which they use to guarantee a faster worst-case
complexity of O(k+log N) for stabbing queries in a well-balanced prio
tree, vs O(k*log N) for interval trees (where k=number of matches,
N=number of intervals). Now this sounds great, but in practice prio trees
don't realize this theorical benefit. First, the additional constraint
makes them harder to update, so that the kernel implementation has to
simplify things by balancing them like a radix tree, which is not always
ideal. Second, the fact that there are both index and heap properties
makes both tree manipulation and search more complex, which results in a
higher multiplicative time constant. As it turns out, the simple interval
tree algorithm ends up running faster than the more clever prio tree.
This patch:
Add two test modules:
- prio_tree_test measures the performance of lib/prio_tree.c, both for
insertion/removal and for stabbing searches
- interval_tree_test measures the performance of a library of equivalent
functionality, built using the augmented rbtree support.
In order to support the second test module, lib/interval_tree.c is
introduced. It is kept separate from the interval_tree_test main file
for two reasons: first we don't want to provide an unfair advantage
over prio_tree_test by having everything in a single compilation unit,
and second there is the possibility that the interval tree functionality
could get some non-test users in kernel over time.
Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-09 07:31:23 +08:00
|
|
|
config INTERVAL_TREE_TEST
|
|
|
|
tristate "Interval tree test"
|
2017-07-11 06:51:43 +08:00
|
|
|
depends on DEBUG_KERNEL
|
2014-03-17 20:21:54 +08:00
|
|
|
select INTERVAL_TREE
|
rbtree: add prio tree and interval tree tests
Patch 1 implements support for interval trees, on top of the augmented
rbtree API. It also adds synthetic tests to compare the performance of
interval trees vs prio trees. Short answers is that interval trees are
slightly faster (~25%) on insert/erase, and much faster (~2.4 - 3x)
on search. It is debatable how realistic the synthetic test is, and I have
not made such measurements yet, but my impression is that interval trees
would still come out faster.
Patch 2 uses a preprocessor template to make the interval tree generic,
and uses it as a replacement for the vma prio_tree.
Patch 3 takes the other prio_tree user, kmemleak, and converts it to use
a basic rbtree. We don't actually need the augmented rbtree support here
because the intervals are always non-overlapping.
Patch 4 removes the now-unused prio tree library.
Patch 5 proposes an additional optimization to rb_erase_augmented, now
providing it as an inline function so that the augmented callbacks can be
inlined in. This provides an additional 5-10% performance improvement
for the interval tree insert/erase benchmark. There is a maintainance cost
as it exposes augmented rbtree users to some of the rbtree library internals;
however I think this cost shouldn't be too high as I expect the augmented
rbtree will always have much less users than the base rbtree.
I should probably add a quick summary of why I think it makes sense to
replace prio trees with augmented rbtree based interval trees now. One of
the drivers is that we need augmented rbtrees for Rik's vma gap finding
code, and once you have them, it just makes sense to use them for interval
trees as well, as this is the simpler and more well known algorithm. prio
trees, in comparison, seem *too* clever: they impose an additional 'heap'
constraint on the tree, which they use to guarantee a faster worst-case
complexity of O(k+log N) for stabbing queries in a well-balanced prio
tree, vs O(k*log N) for interval trees (where k=number of matches,
N=number of intervals). Now this sounds great, but in practice prio trees
don't realize this theorical benefit. First, the additional constraint
makes them harder to update, so that the kernel implementation has to
simplify things by balancing them like a radix tree, which is not always
ideal. Second, the fact that there are both index and heap properties
makes both tree manipulation and search more complex, which results in a
higher multiplicative time constant. As it turns out, the simple interval
tree algorithm ends up running faster than the more clever prio tree.
This patch:
Add two test modules:
- prio_tree_test measures the performance of lib/prio_tree.c, both for
insertion/removal and for stabbing searches
- interval_tree_test measures the performance of a library of equivalent
functionality, built using the augmented rbtree support.
In order to support the second test module, lib/interval_tree.c is
introduced. It is kept separate from the interval_tree_test main file
for two reasons: first we don't want to provide an unfair advantage
over prio_tree_test by having everything in a single compilation unit,
and second there is the possibility that the interval tree functionality
could get some non-test users in kernel over time.
Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-09 07:31:23 +08:00
|
|
|
help
|
|
|
|
A benchmark measuring the performance of the interval tree library
|
|
|
|
|
2013-11-13 07:08:34 +08:00
|
|
|
config PERCPU_TEST
|
|
|
|
tristate "Per cpu operations test"
|
|
|
|
depends on m && DEBUG_KERNEL
|
|
|
|
help
|
|
|
|
Enable this option to build test module which validates per-cpu
|
|
|
|
operations.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2013-07-02 04:04:44 +08:00
|
|
|
config ATOMIC64_SELFTEST
|
2017-02-25 07:00:55 +08:00
|
|
|
tristate "Perform an atomic64_t self-test"
|
2013-07-02 04:04:44 +08:00
|
|
|
help
|
2017-02-25 07:00:55 +08:00
|
|
|
Enable this option to test the atomic64_t functions at boot or
|
|
|
|
at module load time.
|
2013-07-02 04:04:44 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
|
|
|
config ASYNC_RAID6_TEST
|
|
|
|
tristate "Self test for hardware accelerated raid6 recovery"
|
|
|
|
depends on ASYNC_RAID6_RECOV
|
|
|
|
select ASYNC_MEMCPY
|
|
|
|
---help---
|
|
|
|
This is a one-shot self test that permutes through the
|
|
|
|
recovery of all the possible two disk failure scenarios for a
|
|
|
|
N-disk array. Recovery is performed with the asynchronous
|
|
|
|
raid6 recovery routines, and will optionally use an offload
|
|
|
|
engine if one is available.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2015-02-13 07:02:21 +08:00
|
|
|
config TEST_HEXDUMP
|
|
|
|
tristate "Test functions located in the hexdump module at runtime"
|
|
|
|
|
2013-07-02 04:04:44 +08:00
|
|
|
config TEST_STRING_HELPERS
|
|
|
|
tristate "Test functions located in the string_helpers module at runtime"
|
|
|
|
|
|
|
|
config TEST_KSTRTOX
|
|
|
|
tristate "Test kstrto*() family of functions at runtime"
|
|
|
|
|
2015-11-07 08:30:29 +08:00
|
|
|
config TEST_PRINTF
|
|
|
|
tristate "Test printf() family of functions at runtime"
|
|
|
|
|
2016-02-19 22:24:00 +08:00
|
|
|
config TEST_BITMAP
|
|
|
|
tristate "Test bitmap_*() family of functions at runtime"
|
|
|
|
help
|
|
|
|
Enable this option to test the bitmap functions at boot.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-06-20 14:58:30 +08:00
|
|
|
config TEST_BITFIELD
|
|
|
|
tristate "Test bitfield functions at runtime"
|
|
|
|
help
|
|
|
|
Enable this option to test the bitfield functions at boot.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2016-05-30 22:40:41 +08:00
|
|
|
config TEST_UUID
|
|
|
|
tristate "Test functions located in the uuid module at runtime"
|
|
|
|
|
2018-05-08 06:36:28 +08:00
|
|
|
config TEST_OVERFLOW
|
|
|
|
tristate "Test check_*_overflow() functions at runtime"
|
|
|
|
|
2014-08-02 17:47:44 +08:00
|
|
|
config TEST_RHASHTABLE
|
2015-01-29 22:40:25 +08:00
|
|
|
tristate "Perform selftest on resizable hash table"
|
2014-08-02 17:47:44 +08:00
|
|
|
help
|
|
|
|
Enable this option to test the rhashtable functions at boot.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2016-05-27 10:11:51 +08:00
|
|
|
config TEST_HASH
|
|
|
|
tristate "Perform selftest on hash functions"
|
|
|
|
help
|
siphash: add cryptographically secure PRF
SipHash is a 64-bit keyed hash function that is actually a
cryptographically secure PRF, like HMAC. Except SipHash is super fast,
and is meant to be used as a hashtable keyed lookup function, or as a
general PRF for short input use cases, such as sequence numbers or RNG
chaining.
For the first usage:
There are a variety of attacks known as "hashtable poisoning" in which an
attacker forms some data such that the hash of that data will be the
same, and then preceeds to fill up all entries of a hashbucket. This is
a realistic and well-known denial-of-service vector. Currently
hashtables use jhash, which is fast but not secure, and some kind of
rotating key scheme (or none at all, which isn't good). SipHash is meant
as a replacement for jhash in these cases.
There are a modicum of places in the kernel that are vulnerable to
hashtable poisoning attacks, either via userspace vectors or network
vectors, and there's not a reliable mechanism inside the kernel at the
moment to fix it. The first step toward fixing these issues is actually
getting a secure primitive into the kernel for developers to use. Then
we can, bit by bit, port things over to it as deemed appropriate.
While SipHash is extremely fast for a cryptographically secure function,
it is likely a bit slower than the insecure jhash, and so replacements
will be evaluated on a case-by-case basis based on whether or not the
difference in speed is negligible and whether or not the current jhash usage
poses a real security risk.
For the second usage:
A few places in the kernel are using MD5 or SHA1 for creating secure
sequence numbers, syn cookies, port numbers, or fast random numbers.
SipHash is a faster and more fitting, and more secure replacement for MD5
in those situations. Replacing MD5 and SHA1 with SipHash for these uses is
obvious and straight-forward, and so is submitted along with this patch
series. There shouldn't be much of a debate over its efficacy.
Dozens of languages are already using this internally for their hash
tables and PRFs. Some of the BSDs already use this in their kernels.
SipHash is a widely known high-speed solution to a widely known set of
problems, and it's time we catch-up.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-01-08 20:54:00 +08:00
|
|
|
Enable this option to test the kernel's integer (<linux/hash.h>),
|
|
|
|
string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
|
|
|
|
hash functions on boot (or module load).
|
2016-05-27 10:11:51 +08:00
|
|
|
|
|
|
|
This is intended to help people writing architecture-specific
|
|
|
|
optimized versions. If unsure, say N.
|
|
|
|
|
2018-06-19 04:59:29 +08:00
|
|
|
config TEST_IDA
|
|
|
|
tristate "Perform selftest on IDA functions"
|
|
|
|
|
2017-02-03 17:29:06 +08:00
|
|
|
config TEST_PARMAN
|
|
|
|
tristate "Perform selftest on priority array manager"
|
|
|
|
depends on PARMAN
|
|
|
|
help
|
|
|
|
Enable this option to test priority array manager on boot
|
|
|
|
(or module load).
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-10-14 06:51:38 +08:00
|
|
|
config TEST_LKM
|
test: add minimal module for verification testing
This is a pair of test modules I'd like to see in the tree. Instead of
putting these in lkdtm, where I've been adding various tests that trigger
crashes, these don't make sense there since they need to be either
distinctly separate, or their pass/fail state don't need to crash the
machine.
These live in lib/ for now, along with a few other in-kernel test modules,
and use the slightly more common "test_" naming convention, instead of
"test-". We should likely standardize on the former:
$ find . -name 'test_*.c' | grep -v /tools/ | wc -l
4
$ find . -name 'test-*.c' | grep -v /tools/ | wc -l
2
The first is entirely a no-op module, designed to allow simple testing of
the module loading and verification interface. It's useful to have a
module that has no other uses or dependencies so it can be reliably used
for just testing module loading and verification.
The second is a module that exercises the user memory access functions, in
an effort to make sure that we can quickly catch any regressions in
boundary checking (e.g. like what was recently fixed on ARM).
This patch (of 2):
When doing module loading verification tests (for example, with module
signing, or LSM hooks), it is very handy to have a module that can be
built on all systems under test, isn't auto-loaded at boot, and has no
device or similar dependencies. This creates the "test_module.ko" module
for that purpose, which only reports its load and unload to printk.
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-24 07:54:37 +08:00
|
|
|
tristate "Test module loading with 'hello world' module"
|
|
|
|
depends on m
|
|
|
|
help
|
|
|
|
This builds the "test_module" module that emits "Hello, world"
|
|
|
|
on printk when loaded. It is designed to be used for basic
|
|
|
|
evaluation of the module loading subsystem (for example when
|
|
|
|
validating module verification). It lacks any extra dependencies,
|
|
|
|
and will not normally be loaded by the system unless explicitly
|
|
|
|
requested by name.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-01-24 07:54:38 +08:00
|
|
|
config TEST_USER_COPY
|
|
|
|
tristate "Test user/kernel boundary protections"
|
|
|
|
depends on m
|
|
|
|
help
|
|
|
|
This builds the "test_user_copy" module that runs sanity checks
|
|
|
|
on the copy_to/from_user infrastructure, making sure basic
|
|
|
|
user/kernel boundary testing is working. If it fails to load,
|
|
|
|
a regression has been detected in the user/kernel memory boundary
|
|
|
|
protections.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-05-09 05:10:52 +08:00
|
|
|
config TEST_BPF
|
|
|
|
tristate "Test BPF filter functionality"
|
2014-05-14 00:58:44 +08:00
|
|
|
depends on m && NET
|
2014-05-09 05:10:52 +08:00
|
|
|
help
|
|
|
|
This builds the "test_bpf" module that runs various test vectors
|
|
|
|
against the BPF interpreter or BPF JIT compiler depending on the
|
|
|
|
current setting. This is in particular useful for BPF JIT compiler
|
|
|
|
development, but also to run regression tests against changes in
|
bpf: mini eBPF library, test stubs and verifier testsuite
1.
the library includes a trivial set of BPF syscall wrappers:
int bpf_create_map(int key_size, int value_size, int max_entries);
int bpf_update_elem(int fd, void *key, void *value);
int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key);
int bpf_get_next_key(int fd, void *key, void *next_key);
int bpf_prog_load(enum bpf_prog_type prog_type,
const struct sock_filter_int *insns, int insn_len,
const char *license);
bpf_prog_load() stores verifier log into global bpf_log_buf[] array
and BPF_*() macros to build instructions
2.
test stubs configure eBPF infra with 'unspec' map and program types.
These are fake types used by user space testsuite only.
3.
verifier tests valid and invalid programs and expects predefined
error log messages from kernel.
40 tests so far.
$ sudo ./test_verifier
#0 add+sub+mul OK
#1 unreachable OK
#2 unreachable2 OK
#3 out of range jump OK
#4 out of range jump2 OK
#5 test1 ld_imm64 OK
...
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:07 +08:00
|
|
|
the interpreter code. It also enables test stubs for eBPF maps and
|
|
|
|
verifier used by user space verifier testsuite.
|
2014-05-09 05:10:52 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-02-07 07:38:27 +08:00
|
|
|
config FIND_BIT_BENCHMARK
|
lib: test module for find_*_bit() functions
find_bit functions are widely used in the kernel, including hot paths.
This module tests performance of those functions in 2 typical scenarios:
randomly filled bitmap with relatively equal distribution of set and
cleared bits, and sparse bitmap which has 1 set bit for 500 cleared
bits.
On ThunderX machine:
Start testing find_bit() with random-filled bitmap
find_next_bit: 240043 cycles, 164062 iterations
find_next_zero_bit: 312848 cycles, 163619 iterations
find_last_bit: 193748 cycles, 164062 iterations
find_first_bit: 177720874 cycles, 164062 iterations
Start testing find_bit() with sparse bitmap
find_next_bit: 3633 cycles, 656 iterations
find_next_zero_bit: 620399 cycles, 327025 iterations
find_last_bit: 3038 cycles, 656 iterations
find_first_bit: 691407 cycles, 656 iterations
[arnd@arndb.de: use correct format string for find-bit tests]
Link: http://lkml.kernel.org/r/20171113135605.3166307-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20171109140714.13168-1-ynorov@caviumnetworks.com
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Clement Courbet <courbet@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-18 07:28:31 +08:00
|
|
|
tristate "Test find_bit functions"
|
|
|
|
help
|
|
|
|
This builds the "test_find_bit" module that measure find_*_bit()
|
|
|
|
functions performance.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-07-15 05:38:12 +08:00
|
|
|
config TEST_FIRMWARE
|
|
|
|
tristate "Test firmware loading via userspace interface"
|
|
|
|
depends on FW_LOADER
|
|
|
|
help
|
|
|
|
This builds the "test_firmware" module that creates a userspace
|
|
|
|
interface for testing firmware loading. This can be used to
|
|
|
|
control the triggering of firmware loading without needing an
|
|
|
|
actual firmware-using device. The contents can be rechecked by
|
|
|
|
userspace.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2017-07-13 05:33:43 +08:00
|
|
|
config TEST_SYSCTL
|
|
|
|
tristate "sysctl test driver"
|
|
|
|
depends on PROC_SYSCTL
|
|
|
|
help
|
|
|
|
This builds the "test_sysctl" module. This driver enables to test the
|
|
|
|
proc sysctl interfaces available to drivers safely without affecting
|
|
|
|
production knobs which might alter system functionality.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2014-06-17 05:58:32 +08:00
|
|
|
config TEST_UDELAY
|
|
|
|
tristate "udelay test driver"
|
|
|
|
help
|
|
|
|
This builds the "udelay_test" module that helps to make sure
|
|
|
|
that udelay() is working properly.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2015-08-03 17:42:57 +08:00
|
|
|
config TEST_STATIC_KEYS
|
|
|
|
tristate "Test static keys"
|
2015-07-30 11:59:44 +08:00
|
|
|
depends on m
|
|
|
|
help
|
2015-08-03 17:42:57 +08:00
|
|
|
Test the static key interfaces.
|
2015-07-30 11:59:44 +08:00
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
kmod: add test driver to stress test the module loader
This adds a new stress test driver for kmod: the kernel module loader.
The new stress test driver, test_kmod, is only enabled as a module right
now. It should be possible to load this as built-in and load tests
early (refer to the force_init_test module parameter), however since a
lot of test can get a system out of memory fast we leave this disabled
for now.
Using a system with 1024 MiB of RAM can *easily* get your kernel OOM
fast with this test driver.
The test_kmod driver exposes API knobs for us to fine tune simple
request_module() and get_fs_type() calls. Since these API calls only
allow each one parameter a test driver for these is rather simple.
Other factors that can help out test driver though are the number of
calls we issue and knowing current limitations of each. This exposes
configuration as much as possible through userspace to be able to build
tests directly from userspace.
Since it allows multiple misc devices its will eventually (once we add a
knob to let us create new devices at will) also be possible to perform
more tests in parallel, provided you have enough memory.
We only enable tests we know work as of right now.
Demo screenshots:
# tools/testing/selftests/kmod/kmod.sh
kmod_test_0001_driver: OK! - loading kmod test
kmod_test_0001_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0001_fs: OK! - loading kmod test
kmod_test_0001_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
kmod_test_0002_driver: OK! - loading kmod test
kmod_test_0002_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0002_fs: OK! - loading kmod test
kmod_test_0002_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
kmod_test_0003: OK! - loading kmod test
kmod_test_0003: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0004: OK! - loading kmod test
kmod_test_0004: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0005: OK! - loading kmod test
kmod_test_0005: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0006: OK! - loading kmod test
kmod_test_0006: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0005: OK! - loading kmod test
kmod_test_0005: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0006: OK! - loading kmod test
kmod_test_0006: OK! - Return value: 0 (SUCCESS), expected SUCCESS
XXX: add test restult for 0007
Test completed
You can also request for specific tests:
# tools/testing/selftests/kmod/kmod.sh -t 0001
kmod_test_0001_driver: OK! - loading kmod test
kmod_test_0001_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0001_fs: OK! - loading kmod test
kmod_test_0001_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
Test completed
Lastly, the current available number of tests:
# tools/testing/selftests/kmod/kmod.sh --help
Usage: tools/testing/selftests/kmod/kmod.sh [ -t <4-number-digit> ]
Valid tests: 0001-0009
0001 - Simple test - 1 thread for empty string
0002 - Simple test - 1 thread for modules/filesystems that do not exist
0003 - Simple test - 1 thread for get_fs_type() only
0004 - Simple test - 2 threads for get_fs_type() only
0005 - multithreaded tests with default setup - request_module() only
0006 - multithreaded tests with default setup - get_fs_type() only
0007 - multithreaded tests with default setup test request_module() and get_fs_type()
0008 - multithreaded - push kmod_concurrent over max_modprobes for request_module()
0009 - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()
The following test cases currently fail, as such they are not currently
enabled by default:
# tools/testing/selftests/kmod/kmod.sh -t 0008
# tools/testing/selftests/kmod/kmod.sh -t 0009
To be sure to run them as intended please unload both of the modules:
o test_module
o xfs
And ensure they are not loaded on your system prior to testing them. If
you use these paritions for your rootfs you can change the default test
driver used for get_fs_type() by exporting it into your environment. For
example of other test defaults you can override refer to kmod.sh
allow_user_defaults().
Behind the scenes this is how we fine tune at a test case prior to
hitting a trigger to run it:
cat /sys/devices/virtual/misc/test_kmod0/config
echo -n "2" > /sys/devices/virtual/misc/test_kmod0/config_test_case
echo -n "ext4" > /sys/devices/virtual/misc/test_kmod0/config_test_fs
echo -n "80" > /sys/devices/virtual/misc/test_kmod0/config_num_threads
cat /sys/devices/virtual/misc/test_kmod0/config
echo -n "1" > /sys/devices/virtual/misc/test_kmod0/config_num_threads
Finally to trigger:
echo -n "1" > /sys/devices/virtual/misc/test_kmod0/trigger_config
The kmod.sh script uses the above constructs to build different test cases.
A bit of interpretation of the current failures follows, first two
premises:
a) When request_module() is used userspace figures out an optimized
version of module order for us. Once it finds the modules it needs, as
per depmod symbol dep map, it will finit_module() the respective
modules which are needed for the original request_module() request.
b) We have an optimization in place whereby if a kernel uses
request_module() on a module already loaded we never bother userspace
as the module already is loaded. This is all handled by kernel/kmod.c.
A few things to consider to help identify root causes of issues:
0) kmod 19 has a broken heuristic for modules being assumed to be
built-in to your kernel and will return 0 even though request_module()
failed. Upgrade to a newer version of kmod.
1) A get_fs_type() call for "xfs" will request_module() for "fs-xfs",
not for "xfs". The optimization in kernel described in b) fails to
catch if we have a lot of consecutive get_fs_type() calls. The reason
is the optimization in place does not look for aliases. This means two
consecutive get_fs_type() calls will bump kmod_concurrent, whereas
request_module() will not.
This one explanation why test case 0009 fails at least once for
get_fs_type().
2) If a module fails to load --- for whatever reason (kmod_concurrent
limit reached, file not yet present due to rootfs switch, out of
memory) we have a period of time during which module request for the
same name either with request_module() or get_fs_type() will *also*
fail to load even if the file for the module is ready.
This explains why *multiple* NULLs are possible on test 0009.
3) finit_module() consumes quite a bit of memory.
4) Filesystems typically also have more dependent modules than other
modules, its important to note though that even though a get_fs_type()
call does not incur additional kmod_concurrent bumps, since userspace
loads dependencies it finds it needs via finit_module_fd(), it *will*
take much more memory to load a module with a lot of dependencies.
Because of 3) and 4) we will easily run into out of memory failures with
certain tests. For instance test 0006 fails on qemu with 1024 MiB of RAM.
It panics a box after reaping all userspace processes and still not
having enough memory to reap.
[arnd@arndb.de: add dependencies for test module]
Link: http://lkml.kernel.org/r/20170630154834.3689272-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170628223155.26472-3-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-07-15 05:50:08 +08:00
|
|
|
config TEST_KMOD
|
|
|
|
tristate "kmod stress tester"
|
|
|
|
depends on m
|
|
|
|
depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS
|
|
|
|
depends on NETDEVICES && NET_CORE && INET # for TUN
|
|
|
|
select TEST_LKM
|
|
|
|
select XFS_FS
|
|
|
|
select TUN
|
|
|
|
select BTRFS_FS
|
|
|
|
help
|
|
|
|
Test the kernel's module loading mechanism: kmod. kmod implements
|
|
|
|
support to load modules using the Linux kernel's usermode helper.
|
|
|
|
This test provides a series of tests against kmod.
|
|
|
|
|
|
|
|
Although technically you can either build test_kmod as a module or
|
|
|
|
into the kernel we disallow building it into the kernel since
|
|
|
|
it stress tests request_module() and this will very likely cause
|
|
|
|
some issues by taking over precious threads available from other
|
|
|
|
module load requests, ultimately this could be fatal.
|
|
|
|
|
|
|
|
To run tests run:
|
|
|
|
|
|
|
|
tools/testing/selftests/kmod/kmod.sh --help
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2017-09-09 07:15:31 +08:00
|
|
|
config TEST_DEBUG_VIRTUAL
|
|
|
|
tristate "Test CONFIG_DEBUG_VIRTUAL feature"
|
|
|
|
depends on DEBUG_VIRTUAL
|
|
|
|
help
|
|
|
|
Test the kernel's ability to detect incorrect calls to
|
|
|
|
virt_to_phys() done against the non-linear part of the
|
|
|
|
kernel's virtual address map.
|
|
|
|
|
|
|
|
If unsure, say N.
|
|
|
|
|
2018-02-07 07:38:38 +08:00
|
|
|
endif # RUNTIME_TESTING_MENU
|
2017-10-14 06:57:33 +08:00
|
|
|
|
|
|
|
config MEMTEST
|
|
|
|
bool "Memtest"
|
|
|
|
depends on HAVE_MEMBLOCK
|
|
|
|
---help---
|
|
|
|
This option adds a kernel parameter 'memtest', which allows memtest
|
|
|
|
to be set.
|
|
|
|
memtest=0, mean disabled; -- default
|
|
|
|
memtest=1, mean do 1 test pattern;
|
|
|
|
...
|
|
|
|
memtest=17, mean do 17 test patterns.
|
|
|
|
If you are unsure how to answer this question, answer N.
|
|
|
|
|
|
|
|
config BUG_ON_DATA_CORRUPTION
|
|
|
|
bool "Trigger a BUG when data corruption is detected"
|
|
|
|
select DEBUG_LIST
|
|
|
|
help
|
|
|
|
Select this option if the kernel should BUG when it encounters
|
|
|
|
data corruption in kernel memory structures when they get checked
|
|
|
|
for validity.
|
|
|
|
|
|
|
|
If unsure, say N.
|
2017-09-09 07:15:31 +08:00
|
|
|
|
2007-10-19 14:41:07 +08:00
|
|
|
source "samples/Kconfig"
|
2008-04-18 02:05:37 +08:00
|
|
|
|
|
|
|
source "lib/Kconfig.kgdb"
|
2009-02-27 03:38:56 +08:00
|
|
|
|
2016-01-21 07:00:55 +08:00
|
|
|
source "lib/Kconfig.ubsan"
|
|
|
|
|
2015-11-20 10:19:29 +08:00
|
|
|
config ARCH_HAS_DEVMEM_IS_ALLOWED
|
|
|
|
bool
|
|
|
|
|
|
|
|
config STRICT_DEVMEM
|
|
|
|
bool "Filter access to /dev/mem"
|
2016-12-13 08:46:14 +08:00
|
|
|
depends on MMU && DEVMEM
|
2015-11-20 10:19:29 +08:00
|
|
|
depends on ARCH_HAS_DEVMEM_IS_ALLOWED
|
2018-03-08 06:30:54 +08:00
|
|
|
default y if PPC || X86 || ARM64
|
2015-11-20 10:19:29 +08:00
|
|
|
---help---
|
|
|
|
If this option is disabled, you allow userspace (root) access to all
|
|
|
|
of memory, including kernel and userspace memory. Accidental
|
|
|
|
access to this is obviously disastrous, but specific access can
|
|
|
|
be used by people debugging the kernel. Note that with PAT support
|
|
|
|
enabled, even in this case there are restrictions on /dev/mem
|
|
|
|
use due to the cache aliasing requirements.
|
|
|
|
|
2015-11-24 07:49:03 +08:00
|
|
|
If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
|
|
|
|
file only allows userspace access to PCI space and the BIOS code and
|
|
|
|
data regions. This is sufficient for dosemu and X and all common
|
|
|
|
users of /dev/mem.
|
|
|
|
|
|
|
|
If in doubt, say Y.
|
|
|
|
|
|
|
|
config IO_STRICT_DEVMEM
|
|
|
|
bool "Filter I/O access to /dev/mem"
|
|
|
|
depends on STRICT_DEVMEM
|
|
|
|
---help---
|
|
|
|
If this option is disabled, you allow userspace (root) access to all
|
|
|
|
io-memory regardless of whether a driver is actively using that
|
|
|
|
range. Accidental access to this is obviously disastrous, but
|
|
|
|
specific access can be used by people debugging kernel drivers.
|
|
|
|
|
2015-11-20 10:19:29 +08:00
|
|
|
If this option is switched on, the /dev/mem file only allows
|
2015-11-24 07:49:03 +08:00
|
|
|
userspace access to *idle* io-memory ranges (see /proc/iomem) This
|
|
|
|
may break traditional users of /dev/mem (dosemu, legacy X, etc...)
|
|
|
|
if the driver using a given range cannot be disabled.
|
2015-11-20 10:19:29 +08:00
|
|
|
|
|
|
|
If in doubt, say Y.
|
2018-07-31 19:39:31 +08:00
|
|
|
|
|
|
|
source "arch/$(SRCARCH)/Kconfig.debug"
|
|
|
|
|
|
|
|
endmenu # Kernel hacking
|