From 65d798f0f9339ae2c4ebe9480e3260b33382a584 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 12 Apr 2013 16:19:10 -0700 Subject: [PATCH] rcu: Kick adaptive-ticks CPUs that are holding up RCU grace periods Adaptive-ticks CPUs inform RCU when they enter kernel mode, but they do not necessarily turn the scheduler-clock tick back on. This state of affairs could result in RCU waiting on an adaptive-ticks CPU running for an extended period in kernel mode. Such a CPU will never run the RCU state machine, and could therefore indefinitely extend the RCU state machine, sooner or later resulting in an OOM condition. This patch, inspired by an earlier patch by Frederic Weisbecker, therefore causes RCU's force-quiescent-state processing to check for this condition and to send an IPI to CPUs that remain in that state for too long. "Too long" currently means about three jiffies by default, which is quite some time for a CPU to remain in the kernel without blocking. The rcu_tree.jiffies_till_first_fqs and rcutree.jiffies_till_next_fqs sysfs variables may be used to tune "too long" if needed. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/rcutree.c | 10 ++++++++++ kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..f5ab50235cba 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -794,6 +794,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) rdp->offline_fqs++; return 1; } + + /* + * There is a possibility that a CPU in adaptive-ticks state + * might run in the kernel with the scheduling-clock tick disabled + * for an extended time period. Invoke rcu_kick_nohz_cpu() to + * force the CPU to restart the scheduling-clock tick in this + * CPU is in this state. + */ + rcu_kick_nohz_cpu(rdp->cpu); + return 0; } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c896b5045d9d..f993c0ac47db 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -539,6 +539,7 @@ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); static void init_nocb_callback_list(struct rcu_data *rdp); static void __init rcu_init_nocb(void); +static void rcu_kick_nohz_cpu(int cpu); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1cc7e17ff9d..a5745e9b5d5a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -28,6 +28,7 @@ #include #include #include +#include #define RCU_KTHREAD_PRIO 1 @@ -2502,3 +2503,20 @@ static void __init rcu_init_nocb(void) } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + +/* + * An adaptive-ticks CPU can potentially execute in kernel mode for an + * arbitrarily long period of time with the scheduling-clock tick turned + * off. RCU will be paying attention to this CPU because it is in the + * kernel, but the CPU cannot be guaranteed to be executing the RCU state + * machine because the scheduling-clock tick has been disabled. Therefore, + * if an adaptive-ticks CPU is failing to respond to the current grace + * period and has not be idle from an RCU perspective, kick it. + */ +static void rcu_kick_nohz_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_cpu(cpu)) + smp_send_reschedule(cpu); +#endif /* #ifdef CONFIG_NO_HZ_FULL */ +}