diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 9888a0ad2d4e..b1711c48a7ec 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -365,6 +365,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } + rcu_prepare_for_idle(smp_processor_id()); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic_inc(); /* See above. */ atomic_inc(&rdtp->dynticks); @@ -1085,6 +1086,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) * callbacks are waiting on the grace period that just now * completed. */ + rcu_schedule_wake_gp_end(); if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ @@ -1670,6 +1672,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); + rcu_wake_cpus_for_gp_end(); trace_rcu_utilization("End RCU core"); } @@ -1923,7 +1926,7 @@ static int rcu_pending(int cpu) * by the current CPU, even if none need be done immediately, returning * 1 if so. */ -static int rcu_needs_cpu_quick_check(int cpu) +static int rcu_cpu_has_callbacks(int cpu) { /* RCU callbacks either ready or pending? */ return per_cpu(rcu_sched_data, cpu).nxtlist || diff --git a/kernel/rcutree.h b/kernel/rcutree.h index fd2f87db2ab1..ea32405177c9 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,6 +88,7 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ + int wake_gp_end; /* A GP ended, need to wake up CPUs. */ }; /* RCU's kthread states for tracing. */ @@ -467,5 +468,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg); #endif /* #ifdef CONFIG_RCU_BOOST */ static void rcu_cpu_kthread_setrt(int cpu, int to_rt); static void __cpuinit rcu_prepare_kthreads(int cpu); +static void rcu_prepare_for_idle(int cpu); +static void rcu_wake_cpus_for_gp_end(void); +static void rcu_schedule_wake_gp_end(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7a7961feeecf..b70ca8cc52e1 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1953,7 +1953,31 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited); */ int rcu_needs_cpu(int cpu) { - return rcu_needs_cpu_quick_check(cpu); + return rcu_cpu_has_callbacks(cpu); +} + +/* + * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, + * is nothing. + */ +static void rcu_prepare_for_idle(int cpu) +{ +} + +/* + * CPUs are never putting themselves to sleep with callbacks pending, + * so there is no need to awaken them. + */ +static void rcu_wake_cpus_for_gp_end(void) +{ +} + +/* + * CPUs are never putting themselves to sleep with callbacks pending, + * so there is no need to schedule the act of awakening them. + */ +static void rcu_schedule_wake_gp_end(void) +{ } #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ @@ -1961,47 +1985,56 @@ int rcu_needs_cpu(int cpu) #define RCU_NEEDS_CPU_FLUSHES 5 static DEFINE_PER_CPU(int, rcu_dyntick_drain); static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); +static DEFINE_PER_CPU(bool, rcu_awake_at_gp_end); /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no + * callbacks on this CPU, (2) this CPU has not yet attempted to enter + * dyntick-idle mode, or (3) this CPU is in the process of attempting to + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed + * to enter dyntick-idle mode, we refuse to try to enter it. After all, + * it is better to incur scheduling-clock interrupts than to spin + * continuously for the same time duration! + */ +int rcu_needs_cpu(int cpu) +{ + /* If no callbacks, RCU doesn't need the CPU. */ + if (!rcu_cpu_has_callbacks(cpu)) + return 0; + /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ + return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; +} + +/* + * Check to see if any RCU-related work can be done by the current CPU, + * and if so, schedule a softirq to get it done. This function is part + * of the RCU implementation; it is -not- an exported member of the RCU API. * - * Because we are not supporting preemptible RCU, attempt to accelerate - * any current grace periods so that RCU no longer needs this CPU, but - * only if all other CPUs are already in dynticks-idle mode. This will - * allow the CPU cores to be powered down immediately, as opposed to after - * waiting many milliseconds for grace periods to elapse. + * The idea is for the current CPU to clear out all work required by the + * RCU core for the current grace period, so that this CPU can be permitted + * to enter dyntick-idle mode. In some cases, it will need to be awakened + * at the end of the grace period by whatever CPU ends the grace period. + * This allows CPUs to go dyntick-idle more quickly, and to reduce the + * number of wakeups by a modest integer factor. * * Because it is not legal to invoke rcu_process_callbacks() with irqs * disabled, we do one pass of force_quiescent_state(), then do a * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. + * + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu) +static void rcu_prepare_for_idle(int cpu) { int c = 0; - int snap; - int thatcpu; - /* Check for being in the holdoff period. */ - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) - return rcu_needs_cpu_quick_check(cpu); - - /* Don't bother unless we are the last non-dyntick-idle CPU. */ - for_each_online_cpu(thatcpu) { - if (thatcpu == cpu) - continue; - snap = atomic_add_return(0, &per_cpu(rcu_dynticks, - thatcpu).dynticks); - smp_mb(); /* Order sampling of snap with end of grace period. */ - if ((snap & 0x1) != 0) { - per_cpu(rcu_dyntick_drain, cpu) = 0; - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - return rcu_needs_cpu_quick_check(cpu); - } + /* If no callbacks or in the holdoff period, enter dyntick-idle. */ + if (!rcu_cpu_has_callbacks(cpu)) { + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; + return; } + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + return; /* Check and update the rcu_dyntick_drain sequencing. */ if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { @@ -2010,10 +2043,25 @@ int rcu_needs_cpu(int cpu) } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - return rcu_needs_cpu_quick_check(cpu); + if (!rcu_pending(cpu)) { + per_cpu(rcu_awake_at_gp_end, cpu) = 1; + return; /* Nothing to do immediately. */ + } + invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ + return; } - /* Do one step pushing remaining RCU callbacks through. */ + /* + * Do one step of pushing the remaining RCU callbacks through + * the RCU core state machine. + */ +#ifdef CONFIG_TREE_PREEMPT_RCU + if (per_cpu(rcu_preempt_data, cpu).nxtlist) { + rcu_preempt_qs(cpu); + force_quiescent_state(&rcu_preempt_state, 0); + c = c || per_cpu(rcu_preempt_data, cpu).nxtlist; + } +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ if (per_cpu(rcu_sched_data, cpu).nxtlist) { rcu_sched_qs(cpu); force_quiescent_state(&rcu_sched_state, 0); @@ -2028,7 +2076,51 @@ int rcu_needs_cpu(int cpu) /* If RCU callbacks are still pending, RCU still needs this CPU. */ if (c) invoke_rcu_core(); - return c; } +/* + * Wake up a CPU by invoking the RCU core. Intended for use by + * rcu_wake_cpus_for_gp_end(), which passes this function to + * smp_call_function_single(). + */ +static void rcu_wake_cpu(void *unused) +{ + invoke_rcu_core(); +} + +/* + * If an RCU grace period ended recently, scan the rcu_awake_at_gp_end + * per-CPU variables, and wake up any CPUs that requested a wakeup. + */ +static void rcu_wake_cpus_for_gp_end(void) +{ + int cpu; + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (!rdtp->wake_gp_end) + return; + rdtp->wake_gp_end = 0; + for_each_online_cpu(cpu) { + if (per_cpu(rcu_awake_at_gp_end, cpu)) { + per_cpu(rcu_awake_at_gp_end, cpu) = 0; + smp_call_function_single(cpu, rcu_wake_cpu, NULL, 0); + } + } +} + +/* + * A grace period has just ended, and so we will need to awaken CPUs + * that now have work to do. But we cannot send IPIs with interrupts + * disabled, so just set a flag so that this will happen upon exit + * from RCU core processing. + */ +static void rcu_schedule_wake_gp_end(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + rdtp->wake_gp_end = 1; +} + +/* @@@ need tracing as well. */ + #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */