kernel: add panic_on_warn
There have been several times where I have had to rebuild a kernel to cause a panic when hitting a WARN() in the code in order to get a crash dump from a system. Sometimes this is easy to do, other times (such as in the case of a remote admin) it is not trivial to send new images to the user. A much easier method would be a switch to change the WARN() over to a panic. This makes debugging easier in that I can now test the actual image the WARN() was seen on and I do not have to engage in remote debugging. This patch adds a panic_on_warn kernel parameter and /proc/sys/kernel/panic_on_warn calls panic() in the warn_slowpath_common() path. The function will still print out the location of the warning. An example of the panic_on_warn output: The first line below is from the WARN_ON() to output the WARN_ON()'s location. After that the panic() output is displayed. WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]() Kernel panic - not syncing: panic_on_warn set ... CPU: 30 PID: 11698 Comm: insmod Tainted: G W OE 3.17.0+ #57 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190 0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df Call Trace: [<ffffffff81665190>] dump_stack+0x46/0x58 [<ffffffff8165e2ec>] panic+0xd0/0x204 [<ffffffffa038e05f>] ? init_dummy+0x1f/0x30 [dummy_module] [<ffffffff81076b90>] warn_slowpath_common+0xd0/0xd0 [<ffffffffa038e040>] ? dummy_greetings+0x40/0x40 [dummy_module] [<ffffffff81076c8a>] warn_slowpath_null+0x1a/0x20 [<ffffffffa038e05f>] init_dummy+0x1f/0x30 [dummy_module] [<ffffffff81002144>] do_one_initcall+0xd4/0x210 [<ffffffff811b52c2>] ? __vunmap+0xc2/0x110 [<ffffffff810f8889>] load_module+0x16a9/0x1b30 [<ffffffff810f3d30>] ? store_uevent+0x70/0x70 [<ffffffff810f49b9>] ? copy_module_from_fd.isra.44+0x129/0x180 [<ffffffff810f8ec6>] SyS_finit_module+0xa6/0xd0 [<ffffffff8166cf29>] system_call_fastpath+0x12/0x17 Successfully tested by me. hpa said: There is another very valid use for this: many operators would rather a machine shuts down than being potentially compromised either functionally or security-wise. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Acked-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Fabian Frederick <fabf@skynet.be> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f938612dd9
commit
9e3961a097
|
@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL:
|
||||||
|
|
||||||
http://people.redhat.com/~anderson/
|
http://people.redhat.com/~anderson/
|
||||||
|
|
||||||
|
Trigger Kdump on WARN()
|
||||||
|
=======================
|
||||||
|
|
||||||
|
The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This
|
||||||
|
will cause a kdump to occur at the panic() call. In cases where a user wants
|
||||||
|
to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
|
||||||
|
to achieve the same behaviour.
|
||||||
|
|
||||||
Contact
|
Contact
|
||||||
=======
|
=======
|
||||||
|
|
|
@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||||
timeout < 0: reboot immediately
|
timeout < 0: reboot immediately
|
||||||
Format: <timeout>
|
Format: <timeout>
|
||||||
|
|
||||||
|
panic_on_warn panic() instead of WARN(). Useful to cause kdump
|
||||||
|
on a WARN().
|
||||||
|
|
||||||
crash_kexec_post_notifiers
|
crash_kexec_post_notifiers
|
||||||
Run kdump after running panic-notifiers and dumping
|
Run kdump after running panic-notifiers and dumping
|
||||||
kmsg. This only for the users who doubt kdump always
|
kmsg. This only for the users who doubt kdump always
|
||||||
|
|
|
@ -54,8 +54,9 @@ show up in /proc/sys/kernel:
|
||||||
- overflowuid
|
- overflowuid
|
||||||
- panic
|
- panic
|
||||||
- panic_on_oops
|
- panic_on_oops
|
||||||
- panic_on_unrecovered_nmi
|
|
||||||
- panic_on_stackoverflow
|
- panic_on_stackoverflow
|
||||||
|
- panic_on_unrecovered_nmi
|
||||||
|
- panic_on_warn
|
||||||
- pid_max
|
- pid_max
|
||||||
- powersave-nap [ PPC only ]
|
- powersave-nap [ PPC only ]
|
||||||
- printk
|
- printk
|
||||||
|
@ -527,19 +528,6 @@ the recommended setting is 60.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
panic_on_unrecovered_nmi:
|
|
||||||
|
|
||||||
The default Linux behaviour on an NMI of either memory or unknown is
|
|
||||||
to continue operation. For many environments such as scientific
|
|
||||||
computing it is preferable that the box is taken out and the error
|
|
||||||
dealt with than an uncorrected parity/ECC error get propagated.
|
|
||||||
|
|
||||||
A small number of systems do generate NMI's for bizarre random reasons
|
|
||||||
such as power management so the default is off. That sysctl works like
|
|
||||||
the existing panic controls already in that directory.
|
|
||||||
|
|
||||||
==============================================================
|
|
||||||
|
|
||||||
panic_on_oops:
|
panic_on_oops:
|
||||||
|
|
||||||
Controls the kernel's behaviour when an oops or BUG is encountered.
|
Controls the kernel's behaviour when an oops or BUG is encountered.
|
||||||
|
@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
panic_on_unrecovered_nmi:
|
||||||
|
|
||||||
|
The default Linux behaviour on an NMI of either memory or unknown is
|
||||||
|
to continue operation. For many environments such as scientific
|
||||||
|
computing it is preferable that the box is taken out and the error
|
||||||
|
dealt with than an uncorrected parity/ECC error get propagated.
|
||||||
|
|
||||||
|
A small number of systems do generate NMI's for bizarre random reasons
|
||||||
|
such as power management so the default is off. That sysctl works like
|
||||||
|
the existing panic controls already in that directory.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
panic_on_warn:
|
||||||
|
|
||||||
|
Calls panic() in the WARN() path when set to 1. This is useful to avoid
|
||||||
|
a kernel rebuild when attempting to kdump at the location of a WARN().
|
||||||
|
|
||||||
|
0: only WARN(), default behaviour.
|
||||||
|
|
||||||
|
1: call panic() after printing out WARN() location.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
perf_cpu_time_max_percent:
|
perf_cpu_time_max_percent:
|
||||||
|
|
||||||
Hints to the kernel how much CPU time it should be allowed to
|
Hints to the kernel how much CPU time it should be allowed to
|
||||||
|
|
|
@ -427,6 +427,7 @@ extern int panic_timeout;
|
||||||
extern int panic_on_oops;
|
extern int panic_on_oops;
|
||||||
extern int panic_on_unrecovered_nmi;
|
extern int panic_on_unrecovered_nmi;
|
||||||
extern int panic_on_io_nmi;
|
extern int panic_on_io_nmi;
|
||||||
|
extern int panic_on_warn;
|
||||||
extern int sysctl_panic_on_stackoverflow;
|
extern int sysctl_panic_on_stackoverflow;
|
||||||
/*
|
/*
|
||||||
* Only to be used by arch init code. If the user over-wrote the default
|
* Only to be used by arch init code. If the user over-wrote the default
|
||||||
|
|
|
@ -153,6 +153,7 @@ enum
|
||||||
KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
|
KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
|
||||||
KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
|
KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
|
||||||
KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
|
KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
|
||||||
|
KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ static int pause_on_oops;
|
||||||
static int pause_on_oops_flag;
|
static int pause_on_oops_flag;
|
||||||
static DEFINE_SPINLOCK(pause_on_oops_lock);
|
static DEFINE_SPINLOCK(pause_on_oops_lock);
|
||||||
static bool crash_kexec_post_notifiers;
|
static bool crash_kexec_post_notifiers;
|
||||||
|
int panic_on_warn __read_mostly;
|
||||||
|
|
||||||
int panic_timeout = CONFIG_PANIC_TIMEOUT;
|
int panic_timeout = CONFIG_PANIC_TIMEOUT;
|
||||||
EXPORT_SYMBOL_GPL(panic_timeout);
|
EXPORT_SYMBOL_GPL(panic_timeout);
|
||||||
|
@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
|
||||||
if (args)
|
if (args)
|
||||||
vprintk(args->fmt, args->args);
|
vprintk(args->fmt, args->args);
|
||||||
|
|
||||||
|
if (panic_on_warn) {
|
||||||
|
/*
|
||||||
|
* This thread may hit another WARN() in the panic path.
|
||||||
|
* Resetting this prevents additional WARN() from panicking the
|
||||||
|
* system on this thread. Other threads are blocked by the
|
||||||
|
* panic_mutex in panic().
|
||||||
|
*/
|
||||||
|
panic_on_warn = 0;
|
||||||
|
panic("panic_on_warn set ...\n");
|
||||||
|
}
|
||||||
|
|
||||||
print_modules();
|
print_modules();
|
||||||
dump_stack();
|
dump_stack();
|
||||||
print_oops_end_marker();
|
print_oops_end_marker();
|
||||||
|
@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
|
||||||
|
|
||||||
core_param(panic, panic_timeout, int, 0644);
|
core_param(panic, panic_timeout, int, 0644);
|
||||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||||
|
core_param(panic_on_warn, panic_on_warn, int, 0644);
|
||||||
|
|
||||||
static int __init setup_crash_kexec_post_notifiers(char *s)
|
static int __init setup_crash_kexec_post_notifiers(char *s)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = {
|
||||||
.proc_handler = proc_dointvec,
|
.proc_handler = proc_dointvec,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
|
{
|
||||||
|
.procname = "panic_on_warn",
|
||||||
|
.data = &panic_on_warn,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = &zero,
|
||||||
|
.extra2 = &one,
|
||||||
|
},
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = {
|
||||||
{ CTL_INT, KERN_COMPAT_LOG, "compat-log" },
|
{ CTL_INT, KERN_COMPAT_LOG, "compat-log" },
|
||||||
{ CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
|
{ CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
|
||||||
{ CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
|
{ CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
|
||||||
|
{ CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" },
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue