From 54661581fbf5e33c390699de03fd06fad0a01c58 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:54 +0800 Subject: [PATCH 01/11] watchdog: make hardlockup detect code public commit 4ffed7d5435d12be6762e6fdef92fd2c67fc27df openeuler. In current code, the hardlockup detect code is contained by CONFIG_HARDLOCKUP_DETECTOR_PERF. This patch makes this code public so that other arch hardlockup detector can use it. Signed-off-by: huwentao --- include/linux/nmi.h | 15 ++++++++++--- kernel/Makefile | 2 +- kernel/watchdog_hld.c | 50 ++++++++++++++++++++++++++----------------- lib/Kconfig.debug | 2 +- 4 files changed, 44 insertions(+), 25 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e972d1ae1ee6..27504f47add0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,7 +7,7 @@ #include #include -#if defined(CONFIG_HAVE_NMI_WATCHDOG) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) && !defined(CONFIG_SDEI_WATCHDOG) #include #endif @@ -83,6 +83,7 @@ static inline void reset_hung_task_detector(void) { } #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); +extern void watchdog_hardlockup_check(struct pt_regs *regs); extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} @@ -94,8 +95,17 @@ static inline void hardlockup_detector_disable(void) {} # define NMI_WATCHDOG_SYSCTL_PERM 0444 #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +#if defined(CONFIG_HARDLOCKUP_DETECTOR) +#ifndef CONFIG_PPC extern void arch_touch_nmi_watchdog(void); +#endif +#else +# if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline void arch_touch_nmi_watchdog(void) {} +# endif +#endif + +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); @@ -110,7 +120,6 @@ static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif diff --git a/kernel/Makefile b/kernel/Makefile index 0475121f66a7..3466c9842da2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -89,7 +89,7 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582c..76ac86caa50f 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -22,14 +22,11 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); -static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -static DEFINE_PER_CPU(struct perf_event *, dead_event); -static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; -static atomic_t watchdog_cpus = ATOMIC_INIT(0); -notrace void arch_touch_nmi_watchdog(void) +#ifndef CONFIG_PPC +notrace void __weak arch_touch_nmi_watchdog(void) { /* * Using __raw here because some code paths have @@ -41,6 +38,7 @@ notrace void arch_touch_nmi_watchdog(void) raw_cpu_write(watchdog_nmi_touch, true); } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +#endif #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP static DEFINE_PER_CPU(ktime_t, last_timestamp); @@ -98,22 +96,8 @@ static inline bool watchdog_check_timestamp(void) } #endif -static struct perf_event_attr wd_hw_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, -}; - -/* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) +void watchdog_hardlockup_check(struct pt_regs *regs) { - /* Ensure the watchdog never gets throttled */ - event->hw.interrupts = 0; - if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; @@ -163,6 +147,31 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF +static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +static DEFINE_PER_CPU(struct perf_event *, dead_event); +static struct cpumask dead_events_mask; +static atomic_t watchdog_cpus = ATOMIC_INIT(0); + +static struct perf_event_attr wd_hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +/* Callback function for perf event subsystem */ +static void watchdog_overflow_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + /* Ensure the watchdog never gets throttled */ + event->hw.interrupts = 0; + + watchdog_hardlockup_check(regs); +} + static int hardlockup_detector_event_create(void) { unsigned int cpu = smp_processor_id(); @@ -294,3 +303,4 @@ int __init hardlockup_detector_perf_init(void) } return ret; } +#endif /* CONFIG_HARDLOCKUP_DETECTOR_PERF */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8159abeab323..375fce55b1fa 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -868,7 +868,7 @@ config HARDLOCKUP_DETECTOR bool "Detect Hard Lockups" depends on DEBUG_KERNEL && !S390 depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH - select LOCKUP_DETECTOR + select SOFTLOCKUP_DETECTOR select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF help Say Y here to enable the kernel to act as a watchdog to detect From aced53f8a1b69493f703106567284a4d39f7142d Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:55 +0800 Subject: [PATCH 02/11] firmware: arm_sdei: add interrupt binding api commit 860744b94a10a159562fc491fd7f3ea1388965c1 openeuler. This patch add a interrupt binding api function which returns the binded event number. Signed-off-by: huwentao --- drivers/firmware/arm_sdei.c | 10 ++++++++++ include/linux/arm_sdei.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index e497785cd99f..c6c330d90f39 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -192,6 +192,16 @@ int sdei_api_event_context(u32 query, u64 *result) } NOKPROBE_SYMBOL(sdei_api_event_context); +int sdei_api_event_interrupt_bind(int hwirq) +{ + u64 event_number; + + invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_BIND, hwirq, 0, 0, 0, 0, + &event_number); + + return (int)event_number; +} + static int sdei_api_event_get_info(u32 event, u32 info, u64 *result) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0, diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 3305ea7f9dc7..d8b4e8d2520f 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -42,6 +42,7 @@ int sdei_event_unregister(u32 event_num); int sdei_event_enable(u32 event_num); int sdei_event_disable(u32 event_num); +int sdei_api_event_interrupt_bind(int hwirq); /* GHES register/unregister helpers */ int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, From 00082d7172608722dfa8722ca53e10f0c5a07fe6 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:56 +0800 Subject: [PATCH 03/11] firmware: arm_sdei: make 'sdei_api_event_disable/enable' public commit cfaccce945988392d70ad42924e76f330c25ab9a openeuler. NMI Watchdog need to enable the event for each core individually. But the existing public api 'sdei_event_enable' enable events for all cores when the event type is private. Signed-off-by: huwentao --- drivers/firmware/arm_sdei.c | 4 ++-- include/linux/arm_sdei.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index c6c330d90f39..b697d4682254 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -391,7 +391,7 @@ static int sdei_platform_reset(void) return err; } -static int sdei_api_event_enable(u32 event_num) +int sdei_api_event_enable(u32 event_num) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ENABLE, event_num, 0, 0, 0, 0, NULL); @@ -441,7 +441,7 @@ int sdei_event_enable(u32 event_num) } EXPORT_SYMBOL(sdei_event_enable); -static int sdei_api_event_disable(u32 event_num) +int sdei_api_event_disable(u32 event_num) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_DISABLE, event_num, 0, 0, 0, 0, NULL); diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index d8b4e8d2520f..befebeba84f1 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -43,6 +43,8 @@ int sdei_event_unregister(u32 event_num); int sdei_event_enable(u32 event_num); int sdei_event_disable(u32 event_num); int sdei_api_event_interrupt_bind(int hwirq); +int sdei_api_event_disable(u32 event_num); +int sdei_api_event_enable(u32 event_num); /* GHES register/unregister helpers */ int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, From 545b1214b36588ef7a1f334225bdf2b0134147ae Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:57 +0800 Subject: [PATCH 04/11] lockup_detector: init lockup detector after all the init_calls commit bef7d8e1432400f3d78339ac269167e09c15dabd openeuler. We call 'sdei_init' as 'subsys_initcall_sync'. lockup detector need to be initialised after sdei_init. The influence of this patch is that we can not detect the hard lockup in init_calls. Signed-off-by: huwentao --- init/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 9eff2e318781..22d7de2866d0 100644 --- a/init/main.c +++ b/init/main.c @@ -1196,7 +1196,6 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); do_pre_smp_initcalls(); - lockup_detector_init(); smp_init(); sched_init_smp(); @@ -1207,6 +1206,8 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); + lockup_detector_init(); + /* Open the /dev/console on the rootfs, this should never fail */ if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) pr_err("Warning: unable to open an initial console.\n"); From f0bfc2e73d8c648d49ceb4d4dd7b4f429edfb15e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:58 +0800 Subject: [PATCH 05/11] watchdog: add nmi_watchdog support for arm64 based on SDEI commit cc19c0b385e3bd423e20465b06eb232678ce5c16 openeuler. Add nmi_watchdog support for arm64 based on SDEI. Signed-off-by: huwentao --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/watchdog_sdei.c | 112 ++++++++++++++++++++++++++++++ lib/Kconfig.debug | 7 ++ 3 files changed, 120 insertions(+) create mode 100644 arch/arm64/kernel/watchdog_sdei.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4fa92ca3b72e..9763b9f576d8 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_SSBD) += ssbd.o +obj-$(CONFIG_SDEI_WATCHDOG) += watchdog_sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-y += vdso/ probes/ diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c new file mode 100644 index 000000000000..a2154aa6f27a --- /dev/null +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Detect hard lockups on a system + * + * Note: Most of this code is borrowed heavily from the perf hardlockup + * detector, so thanks to Don for the initial implementation. + */ + +#define pr_fmt(fmt) "SDEI NMI watchdog: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* We use the secure physical timer as SDEI NMI watchdog timer */ +#define SDEI_NMI_WATCHDOG_HWIRQ 29 + +static int sdei_watchdog_event_num; +static bool disable_sdei_nmi_watchdog; +static bool sdei_watchdog_registered; + +int watchdog_nmi_enable(unsigned int cpu) +{ + int ret; + + if (!sdei_watchdog_registered) + return -EINVAL; + + ret = sdei_api_event_enable(sdei_watchdog_event_num); + if (ret) { + pr_err("Enable NMI Watchdog failed on cpu%d\n", + smp_processor_id()); + return ret; + } + + return 0; +} + +void watchdog_nmi_disable(unsigned int cpu) +{ + int ret; + + if (!sdei_watchdog_registered) + return; + + ret = sdei_api_event_disable(sdei_watchdog_event_num); + if (ret) + pr_err("Disable NMI Watchdog failed on cpu%d\n", + smp_processor_id()); +} + +static int sdei_watchdog_callback(u32 event, + struct pt_regs *regs, void *arg) +{ + watchdog_hardlockup_check(regs); + + return 0; +} + +static void sdei_nmi_watchdog_bind(void *data) +{ + int ret; + + ret = sdei_api_event_interrupt_bind(SDEI_NMI_WATCHDOG_HWIRQ); + if (ret < 0) + pr_err("SDEI bind failed on cpu%d, return %d\n", + smp_processor_id(), ret); +} + +static int __init disable_sdei_nmi_watchdog_setup(char *str) +{ + disable_sdei_nmi_watchdog = true; + return 1; +} +__setup("disable_sdei_nmi_watchdog", disable_sdei_nmi_watchdog_setup); + +int __init watchdog_nmi_probe(void) +{ + int ret; + + if (disable_sdei_nmi_watchdog) + return -EINVAL; + + if (!is_hyp_mode_available()) { + pr_err("Disable SDEI NMI Watchdog in VM\n"); + return -EINVAL; + } + + sdei_watchdog_event_num = sdei_api_event_interrupt_bind(SDEI_NMI_WATCHDOG_HWIRQ); + if (sdei_watchdog_event_num < 0) { + pr_err("Bind interrupt failed. Firmware may not support SDEI !\n"); + return sdei_watchdog_event_num; + } + + on_each_cpu(sdei_nmi_watchdog_bind, NULL, true); + + ret = sdei_event_register(sdei_watchdog_event_num, + sdei_watchdog_callback, NULL); + if (ret) { + pr_err("SDEI Watchdog register callback failed\n"); + return ret; + } + + sdei_watchdog_registered = true; + pr_info("SDEI Watchdog registered successfully\n"); + + return 0; +} diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 375fce55b1fa..10efbc06392e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -853,6 +853,13 @@ config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR +config SDEI_WATCHDOG + bool "SDEI NMI Watchdog support" + depends on ARM_SDE_INTERFACE + select HAVE_HARDLOCKUP_DETECTOR_ARCH + select HARDLOCKUP_CHECK_TIMESTAMP + select HARDLOCKUP_DETECTOR + # # Enables a timestamp based low pass filter to compensate for perf based # hard lockup detection which runs too fast due to turbo modes. From fb960c041059841a7408541f36d4239ada7b4cc3 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:00:59 +0800 Subject: [PATCH 06/11] sdei_watchdog: refresh 'last_timestamp' when enabling nmi_watchdog commit 5bc048a102ef9c3748464cacce443a0f1d9bed5b openeuler. The trigger period of secure time is set by firmware. We need to check the time_stamp every time the secure time fires to make sure the hardlockup detection is not executed too soon. We need to refresh 'last_timestamp' to the current time when we enable the nmi_watchdog. Otherwise, false hardlockup may be detected when the secure timer fires the first time. Signed-off-by: huwentao --- arch/arm64/kernel/watchdog_sdei.c | 2 ++ include/linux/nmi.h | 1 + kernel/watchdog_hld.c | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index a2154aa6f27a..e36c4d398893 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -30,6 +30,8 @@ int watchdog_nmi_enable(unsigned int cpu) if (!sdei_watchdog_registered) return -EINVAL; + refresh_hld_last_timestamp(); + ret = sdei_api_event_enable(sdei_watchdog_event_num); if (ret) { pr_err("Enable NMI Watchdog failed on cpu%d\n", diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 27504f47add0..e2f09e60d0d9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -208,6 +208,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh); #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); +void refresh_hld_last_timestamp(void); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 76ac86caa50f..88f5c314a1df 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -89,6 +89,15 @@ static bool watchdog_check_timestamp(void) __this_cpu_write(last_timestamp, now); return true; } + +void refresh_hld_last_timestamp(void) +{ + ktime_t now; + + now = ktime_get_mono_fast_ns(); + __this_cpu_write(last_timestamp, now); + +} #else static inline bool watchdog_check_timestamp(void) { From 3d3ce61def8ee7a3fb7893e4df9bd3c5dccec6f1 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:01:00 +0800 Subject: [PATCH 07/11] sdei_watchdog: clear EOI of the secure timer before kdump commit 75ac7be96da43f12bad247de69137500e02fd37f openeuler. When we panic in hardlockup, the secure timer interrupt remains activate because firmware clear eoi after dispatch is completed. This will cause arm_arch_timer interrupt failed to trigger in the second kernel. This patch add a new SMC helper to clear eoi of a certain interrupt and clear eoi of the secure timer before booting the second kernel. Signed-off-by: huwentao --- arch/arm64/kernel/machine_kexec.c | 10 ++++++++++ arch/arm64/kernel/watchdog_sdei.c | 6 ++++++ drivers/firmware/arm_sdei.c | 6 ++++++ include/linux/arm_sdei.h | 1 + include/linux/nmi.h | 6 ++++++ include/uapi/linux/arm_sdei.h | 1 + 6 files changed, 30 insertions(+) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index cc049ff5c6a5..81a70d76ea5a 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -266,6 +267,15 @@ void machine_crash_shutdown(struct pt_regs *regs) /* shutdown non-crashing cpus */ crash_smp_send_stop(); + /* + * when we panic in hardlockup detected by sdei_watchdog, the secure + * timer interrupt remains activate here because firmware clear eoi + * after dispatch is completed. This will cause arm_arch_timer + * interrupt failed to trigger in the second kernel. So we clear eoi + * of the secure timer before booting the second kernel. + */ + sdei_watchdog_clear_eoi(); + /* for crashing cpu */ crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index e36c4d398893..99ab9bdfdee6 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -80,6 +80,12 @@ static int __init disable_sdei_nmi_watchdog_setup(char *str) } __setup("disable_sdei_nmi_watchdog", disable_sdei_nmi_watchdog_setup); +void sdei_watchdog_clear_eoi(void) +{ + if (sdei_watchdog_registered) + sdei_api_clear_eoi(SDEI_NMI_WATCHDOG_HWIRQ); +} + int __init watchdog_nmi_probe(void) { int ret; diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index b697d4682254..f9cd75475f09 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -202,6 +202,12 @@ int sdei_api_event_interrupt_bind(int hwirq) return (int)event_number; } +int sdei_api_clear_eoi(int hwirq) +{ + return invoke_sdei_fn(SDEI_1_0_FN_SDEI_CLEAR_EOI, hwirq, 0, 0, 0, 0, + NULL); +} + static int sdei_api_event_get_info(u32 event, u32 info, u64 *result) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0, diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index befebeba84f1..de49a289c5f3 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -45,6 +45,7 @@ int sdei_event_disable(u32 event_num); int sdei_api_event_interrupt_bind(int hwirq); int sdei_api_event_disable(u32 event_num); int sdei_api_event_enable(u32 event_num); +int sdei_api_clear_eoi(int hwirq); /* GHES register/unregister helpers */ int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e2f09e60d0d9..43a8d0eb5e06 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -229,4 +229,10 @@ extern int proc_watchdog_cpumask(struct ctl_table *, int, #include #endif +#ifdef CONFIG_SDEI_WATCHDOG +void sdei_watchdog_clear_eoi(void); +#else +static inline void sdei_watchdog_clear_eoi(void) { } +#endif + #endif diff --git a/include/uapi/linux/arm_sdei.h b/include/uapi/linux/arm_sdei.h index af0630ba5437..1187b1b49c87 100644 --- a/include/uapi/linux/arm_sdei.h +++ b/include/uapi/linux/arm_sdei.h @@ -24,6 +24,7 @@ #define SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE SDEI_1_0_FN(0x0E) #define SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI_1_0_FN(0x11) #define SDEI_1_0_FN_SDEI_SHARED_RESET SDEI_1_0_FN(0x12) +#define SDEI_1_0_FN_SDEI_CLEAR_EOI SDEI_1_0_FN(0x18) #define SDEI_VERSION_MAJOR_SHIFT 48 #define SDEI_VERSION_MAJOR_MASK 0x7fff From 4108f89bf65fbef66e194931f21cec6c06e2ebe1 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:01:01 +0800 Subject: [PATCH 08/11] sdei_watchdog: set secure timer period base on 'watchdog_thresh' commit 13ddc12768ca98d36ec03bfa21a30b3ebc91673d openeuler. The period of the secure timer is set to 3s by BIOS. That means the secure timer interrupt will trigger every 3 seconds. To further decrease the NMI watchdog's effect on performance, this patch set the period of the secure timer base on 'watchdog_thresh'. This variable is initiallized to 10s. We can also set the period at runtime by modifying '/proc/sys/kernel/watchdog_thresh' Signed-off-by: huwentao --- arch/arm64/kernel/watchdog_sdei.c | 15 +++++++++++++++ drivers/firmware/arm_sdei.c | 6 ++++++ include/linux/arm_sdei.h | 1 + include/uapi/linux/arm_sdei.h | 1 + lib/Kconfig.debug | 1 - 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 99ab9bdfdee6..c342f86560d5 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -30,7 +30,11 @@ int watchdog_nmi_enable(unsigned int cpu) if (!sdei_watchdog_registered) return -EINVAL; +#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP refresh_hld_last_timestamp(); +#endif + + sdei_api_set_secure_timer_period(watchdog_thresh); ret = sdei_api_event_enable(sdei_watchdog_event_num); if (ret) { @@ -104,6 +108,17 @@ int __init watchdog_nmi_probe(void) return sdei_watchdog_event_num; } + /* + * After we introduced 'sdei_api_set_secure_timer_period', we disselect + * 'CONFIG_HARDLOCKUP_CHECK_TIMESTAMP'. So we need to make sure that + * firmware can set the period of the secure timer and the timer + * interrupt doesn't trigger too soon. + */ + if (sdei_api_set_secure_timer_period(watchdog_thresh)) { + pr_err("Firmware doesn't support setting the secure timer period, please update your BIOS !\n"); + return -EINVAL; + } + on_each_cpu(sdei_nmi_watchdog_bind, NULL, true); ret = sdei_event_register(sdei_watchdog_event_num, diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index f9cd75475f09..fa5a3ee0e038 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -208,6 +208,12 @@ int sdei_api_clear_eoi(int hwirq) NULL); } +int sdei_api_set_secure_timer_period(int sec) +{ + return invoke_sdei_fn(SDEI_1_0_FN_SET_SECURE_TIMER_PERIOD, sec, 0, 0, 0, + 0, NULL); +} + static int sdei_api_event_get_info(u32 event, u32 info, u64 *result) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0, diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index de49a289c5f3..caa6fba8120d 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,6 +46,7 @@ int sdei_api_event_interrupt_bind(int hwirq); int sdei_api_event_disable(u32 event_num); int sdei_api_event_enable(u32 event_num); int sdei_api_clear_eoi(int hwirq); +int sdei_api_set_secure_timer_period(int sec); /* GHES register/unregister helpers */ int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, diff --git a/include/uapi/linux/arm_sdei.h b/include/uapi/linux/arm_sdei.h index 1187b1b49c87..a5375679dd50 100644 --- a/include/uapi/linux/arm_sdei.h +++ b/include/uapi/linux/arm_sdei.h @@ -25,6 +25,7 @@ #define SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI_1_0_FN(0x11) #define SDEI_1_0_FN_SDEI_SHARED_RESET SDEI_1_0_FN(0x12) #define SDEI_1_0_FN_SDEI_CLEAR_EOI SDEI_1_0_FN(0x18) +#define SDEI_1_0_FN_SET_SECURE_TIMER_PERIOD SDEI_1_0_FN(0x19) #define SDEI_VERSION_MAJOR_SHIFT 48 #define SDEI_VERSION_MAJOR_MASK 0x7fff diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 10efbc06392e..47e3ba8fedcc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -857,7 +857,6 @@ config SDEI_WATCHDOG bool "SDEI NMI Watchdog support" depends on ARM_SDE_INTERFACE select HAVE_HARDLOCKUP_DETECTOR_ARCH - select HARDLOCKUP_CHECK_TIMESTAMP select HARDLOCKUP_DETECTOR # From 1656fd96c3714e9c4963ccd9f1cc25b60d6fdb88 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:01:02 +0800 Subject: [PATCH 09/11] kprobes/arm64: Blacklist sdei watchdog callback functions commit bdda54cc39843589ee91a0176ca9a94adf307763 openeuler. Functions called in sdei_handler are not allowed to be kprobed, so marked them as NOKPROBE_SYMBOL. There are so many functions in 'watchdog_check_timestamp()'. Luckily, we don't need 'CONFIG_HARDLOCKUP_CHECK_TIMESTAMP' now. So just make CONFIG_SDEI_WATCHDOG depends on !CONFIG_HARDLOCKUP_CHECK_TIMESTAMP in case someone add 'CONFIG_HARDLOCKUP_CHECK_TIMESTAMP' in the future. Signed-off-by: huwentao --- arch/arm64/kernel/watchdog_sdei.c | 2 ++ kernel/watchdog.c | 2 ++ kernel/watchdog_hld.c | 2 ++ lib/Kconfig.debug | 2 +- 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index c342f86560d5..6352b589e02a 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* We use the secure physical timer as SDEI NMI watchdog timer */ @@ -66,6 +67,7 @@ static int sdei_watchdog_callback(u32 event, return 0; } +NOKPROBE_SYMBOL(sdei_watchdog_callback); static void sdei_nmi_watchdog_bind(void *data) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a3d0e928305c..273bb4c5646c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -333,6 +334,7 @@ bool is_hardlockup(void) __this_cpu_write(hrtimer_interrupts_saved, hrint); return false; } +NOKPROBE_SYMBOL(is_hardlockup); static void watchdog_interrupt_count(void) { diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 88f5c314a1df..a3f35067b4d0 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -155,6 +156,7 @@ void watchdog_hardlockup_check(struct pt_regs *regs) __this_cpu_write(hard_watchdog_warn, false); return; } +NOKPROBE_SYMBOL(watchdog_hardlockup_check); #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 47e3ba8fedcc..74a7a2077c51 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -855,7 +855,7 @@ config HARDLOCKUP_DETECTOR_PERF config SDEI_WATCHDOG bool "SDEI NMI Watchdog support" - depends on ARM_SDE_INTERFACE + depends on ARM_SDE_INTERFACE && !HARDLOCKUP_CHECK_TIMESTAMP select HAVE_HARDLOCKUP_DETECTOR_ARCH select HARDLOCKUP_DETECTOR From e3a14898a3fcd25d567015052ef486bb3cc1639e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 20 Feb 2021 11:01:03 +0800 Subject: [PATCH 10/11] sdei_watchdog: avoid possible false hardlockup commit 0fa83fd0f8f7267be1e31c824cedb9d112504785 openeuler. Firmware may not trigger SDEI event as required frequency. SDEI event may be triggered too soon, which cause false hardlockup in kernel. Check the time stamp in sdei_watchdog_callbak and skip the hardlockup check if it is invoked too soon. Signed-off-by: huwentao --- arch/arm64/kernel/watchdog_sdei.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 6352b589e02a..a499a14b23c1 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -23,6 +23,7 @@ static int sdei_watchdog_event_num; static bool disable_sdei_nmi_watchdog; static bool sdei_watchdog_registered; +static DEFINE_PER_CPU(ktime_t, last_check_time); int watchdog_nmi_enable(unsigned int cpu) { @@ -35,6 +36,7 @@ int watchdog_nmi_enable(unsigned int cpu) refresh_hld_last_timestamp(); #endif + __this_cpu_write(last_check_time, ktime_get_mono_fast_ns()); sdei_api_set_secure_timer_period(watchdog_thresh); ret = sdei_api_event_enable(sdei_watchdog_event_num); @@ -63,6 +65,23 @@ void watchdog_nmi_disable(unsigned int cpu) static int sdei_watchdog_callback(u32 event, struct pt_regs *regs, void *arg) { + ktime_t delta, now = ktime_get_mono_fast_ns(); + + delta = now - __this_cpu_read(last_check_time); + __this_cpu_write(last_check_time, now); + + /* + * Set delta to 4/5 of the actual watchdog threshold period so the + * hrtimer is guaranteed to fire at least once within the real + * watchdog threshold. + */ + if (delta < watchdog_thresh * (u64)NSEC_PER_SEC * 4 / 5) { + pr_err(FW_BUG "SDEI Watchdog event triggered too soon, " + "time to last check:%lld ns\n", delta); + WARN_ON(1); + return 0; + } + watchdog_hardlockup_check(regs); return 0; From e26e124849cc2f4e4a4538936885ae6127bd67a8 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 29 Mar 2021 15:45:24 +0800 Subject: [PATCH 11/11] sdei_watchdog: Fix compile error when PPC_WATCHDOG is disable on PowerPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0252aa08aafb4a40ea2d821f58e88e99a644b097 openeuler. When I compile the kernel with CONFIG_PPC_WATCHDOG is disabled on PowerPC, I got the following compile error: In file included from kernel/hung_task.c:11:0: ./include/linux/nmi.h: In function ‘touch_nmi_watchdog’: ./include/linux/nmi.h:143:2: error: implicit declaration of function ‘arch_touch_nmi_watchdog’; did you mean ‘touch_nmi_watchdog’? [-Werror=implicit-function-declaration] arch_touch_nmi_watchdog(); ^~~~~~~~~~~~~~~~~~~~~~~ touch_nmi_watchdog It is because CONFIG_HARDLOCKUP_DETECTOR_PERF is still enabled in my situation. Fix it by excluding arch_touch_nmi_watchdog() only when CONFIG_PPC_WATCHDOG is disabled. Signed-off-by: huwentao --- include/linux/nmi.h | 2 +- kernel/watchdog_hld.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 43a8d0eb5e06..020768634b29 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -96,7 +96,7 @@ static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR) -#ifndef CONFIG_PPC +#ifndef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); #endif #else diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index a3f35067b4d0..ce26950a8140 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -26,8 +26,8 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static unsigned long hardlockup_allcpu_dumped; -#ifndef CONFIG_PPC -notrace void __weak arch_touch_nmi_watchdog(void) +#ifndef CONFIG_PPC_WATCHDOG +notrace void arch_touch_nmi_watchdog(void) { /* * Using __raw here because some code paths have