A set of watchdog/softlockup related improvements:
- Enforce that the watchdog timestamp is always valid on boot. The original implementation caused a watchdog disabled gap of one second in the boot process due to truncation of the underlying sched clock. The sched clock is divided by 1e9 to convert nanoseconds to seconds. So for the first second of the boot process the result is 0 which is at the same time the indicator to disable the watchdog. The trivial fix is to change the disabled indicator to ULONG_MAX. - Two cleanup patches removing unused and redundant code which got forgotten to be cleaned up in previous changes. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl4vbrQTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoTQHD/9ONyg9VQLjk6aH94H1Sjik/K7zvxoC aMGY2onZ6PddVrcTgJoMmWteQlQ2YScCSVnfVedmxTRU8laEHU/LQnMntTAbuHWj VUkK8X/AI5l+VY6p0Sr1iCyxcFezoC2VMqOKntuQl3080mK7R7/fQ+ZVmimiPihr 46qMikIfBN7w2od7Ger3dZRttbnRj5YsmLBenX/HtBY/HPdhoDx6lfW/5AbAgUH5 qnAmM0yPZ/VUSfo45z+exESUezxByIkGsrROBtPSRwql3Oqbyrza2UC48dRjsuIQ vO0coorlhqJGF72WW45DiLvg4Hew/vVyzcYrIiOSQPZpeTtPzL23zk/cqcqpKy6N pCuiSgimzbPgzqTHs6WQR/D0Dn76rruUqXqteuD5zirC9Kjf2TWeIMPTgPfy8irt 2RwT1+5Ao/SNkdm/Pxk0S/+Y99uRJSqeNTV3lroYGC7IFMAnG4P0S9uyFJ6ZFIMz nOvEOhUlFXWw/w7WPZv+ytx40sRkqFVIePSRtzq+cjlDEYCgLhuveE2A4/6IGPMP Ej6vsGh3lMyHieRhmymESG8uLU2P/L7hhPexUPJJu4QSxKbKQNfWx+0z7bm86Ic7 0uDSNZZl7UDYq6tioS1DBTq9ybly9vn1WDe5tHMJDllPe9TIEnqynvVLIg6MMGdm GjbTNysDPx85yw== =WMiM -----END PGP SIGNATURE----- Merge tag 'core-core-2020-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull watchdog updates from Thomas Gleixner: "A set of watchdog/softlockup related improvements: - Enforce that the watchdog timestamp is always valid on boot. The original implementation caused a watchdog disabled gap of one second in the boot process due to truncation of the underlying sched clock. The sched clock is divided by 1e9 to convert nanoseconds to seconds. So for the first second of the boot process the result is 0 which is at the same time the indicator to disable the watchdog. The trivial fix is to change the disabled indicator to ULONG_MAX. - Two cleanup patches removing unused and redundant code which got forgotten to be cleaned up in previous changes" * tag 'core-core-2020-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: watchdog/softlockup: Enforce that timestamp is valid on boot watchdog/softlockup: Remove obsolete check of last reported task watchdog: Remove soft_lockup_hrtimer_cnt and related code
This commit is contained in:
commit
b11c89a158
|
@ -161,6 +161,8 @@ static void lockup_detector_update_enable(void)
|
|||
|
||||
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
|
||||
|
||||
#define SOFTLOCKUP_RESET ULONG_MAX
|
||||
|
||||
/* Global variables, exported for sysctl */
|
||||
unsigned int __read_mostly softlockup_panic =
|
||||
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
|
||||
|
@ -173,8 +175,6 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
|
|||
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
||||
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
||||
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
|
||||
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
||||
static unsigned long soft_lockup_nmi_warn;
|
||||
|
||||
|
@ -274,7 +274,7 @@ notrace void touch_softlockup_watchdog_sched(void)
|
|||
* Preemption can be enabled. It doesn't matter which CPU's timestamp
|
||||
* gets zeroed here, so use the raw_ operation.
|
||||
*/
|
||||
raw_cpu_write(watchdog_touch_ts, 0);
|
||||
raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
|
||||
}
|
||||
|
||||
notrace void touch_softlockup_watchdog(void)
|
||||
|
@ -298,14 +298,14 @@ void touch_all_softlockup_watchdogs(void)
|
|||
* the softlockup check.
|
||||
*/
|
||||
for_each_cpu(cpu, &watchdog_allowed_mask)
|
||||
per_cpu(watchdog_touch_ts, cpu) = 0;
|
||||
per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
|
||||
wq_watchdog_touch(-1);
|
||||
}
|
||||
|
||||
void touch_softlockup_watchdog_sync(void)
|
||||
{
|
||||
__this_cpu_write(softlockup_touch_sync, true);
|
||||
__this_cpu_write(watchdog_touch_ts, 0);
|
||||
__this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
|
||||
}
|
||||
|
||||
static int is_softlockup(unsigned long touch_ts)
|
||||
|
@ -350,8 +350,6 @@ static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
|
|||
*/
|
||||
static int softlockup_fn(void *data)
|
||||
{
|
||||
__this_cpu_write(soft_lockup_hrtimer_cnt,
|
||||
__this_cpu_read(hrtimer_interrupts));
|
||||
__touch_watchdog();
|
||||
complete(this_cpu_ptr(&softlockup_completion));
|
||||
|
||||
|
@ -383,7 +381,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
/* .. and repeat */
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
|
||||
|
||||
if (touch_ts == 0) {
|
||||
if (touch_ts == SOFTLOCKUP_RESET) {
|
||||
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
|
||||
/*
|
||||
* If the time stamp was touched atomically
|
||||
|
@ -416,22 +414,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
return HRTIMER_RESTART;
|
||||
|
||||
/* only warn once */
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true) {
|
||||
/*
|
||||
* When multiple processes are causing softlockups the
|
||||
* softlockup detector only warns on the first one
|
||||
* because the code relies on a full quiet cycle to
|
||||
* re-arm. The second process prevents the quiet cycle
|
||||
* and never gets reported. Use task pointers to detect
|
||||
* this.
|
||||
*/
|
||||
if (__this_cpu_read(softlockup_task_ptr_saved) !=
|
||||
current) {
|
||||
__this_cpu_write(soft_watchdog_warn, false);
|
||||
__touch_watchdog();
|
||||
}
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||
|
@ -447,7 +431,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||
smp_processor_id(), duration,
|
||||
current->comm, task_pid_nr(current));
|
||||
__this_cpu_write(softlockup_task_ptr_saved, current);
|
||||
print_modules();
|
||||
print_irqtrace_events(current);
|
||||
if (regs)
|
||||
|
|
Loading…
Reference in New Issue