diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg index e4233ac93c2b..6189ffcc6aff 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg @@ -328,13 +328,13 @@ inkscape:window-height="1148" id="namedview90" showgrid="true" - inkscape:zoom="0.80021373" - inkscape:cx="462.49289" - inkscape:cy="473.6718" + inkscape:zoom="0.69092787" + inkscape:cx="476.34085" + inkscape:cy="712.80957" inkscape:window-x="770" inkscape:window-y="24" inkscape:window-maximized="0" - inkscape:current-layer="g4114-9-3-9" + inkscape:current-layer="g4" inkscape:snap-grids="false" fit-margin-top="5" fit-margin-right="5" @@ -813,14 +813,18 @@ reched_cpu() + id="tspan4925-1-2-4-5">Requestcontext switch diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html index 8e4f873b979f..19e7a5fb6b73 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html @@ -72,10 +72,10 @@ will ignore it because idle and offline CPUs are already residing in quiescent states. Otherwise, the expedited grace period will use smp_call_function_single() to send the CPU an IPI, which -is handled by sync_rcu_exp_handler(). +is handled by rcu_exp_handler().

-However, because this is preemptible RCU, sync_rcu_exp_handler() +However, because this is preemptible RCU, rcu_exp_handler() can check to see if the CPU is currently running in an RCU read-side critical section. If not, the handler can immediately report a quiescent state. @@ -145,19 +145,18 @@ expedited grace period is shown in the following diagram:

ExpSchedFlow.svg

-As with RCU-preempt's synchronize_rcu_expedited(), +As with RCU-preempt, RCU-sched's synchronize_sched_expedited() ignores offline and idle CPUs, again because they are in remotely detectable quiescent states. -However, the synchronize_rcu_expedited() handler -is sync_sched_exp_handler(), and because the +However, because the rcu_read_lock_sched() and rcu_read_unlock_sched() leave no trace of their invocation, in general it is not possible to tell whether or not the current CPU is in an RCU read-side critical section. -The best that sync_sched_exp_handler() can do is to check +The best that RCU-sched's rcu_exp_handler() can do is to check for idle, on the off-chance that the CPU went idle while the IPI was in flight. -If the CPU is idle, then sync_sched_exp_handler() reports +If the CPU is idle, then rcu_exp_handler() reports the quiescent state.

Otherwise, the handler forces a future context switch by setting the @@ -298,19 +297,18 @@ Instead, the task pushing the grace period forward will include the idle CPUs in the mask passed to rcu_report_exp_cpu_mult().

-For RCU-sched, there is an additional check for idle in the IPI -handler, sync_sched_exp_handler(). +For RCU-sched, there is an additional check: If the IPI has interrupted the idle loop, then -sync_sched_exp_handler() invokes rcu_report_exp_rdp() +rcu_exp_handler() invokes rcu_report_exp_rdp() to report the corresponding quiescent state.

For RCU-preempt, there is no specific check for idle in the -IPI handler (sync_rcu_exp_handler()), but because +IPI handler (rcu_exp_handler()), but because RCU read-side critical sections are not permitted within the -idle loop, if sync_rcu_exp_handler() sees that the CPU is within +idle loop, if rcu_exp_handler() sees that the CPU is within RCU read-side critical section, the CPU cannot possibly be idle. -Otherwise, sync_rcu_exp_handler() invokes +Otherwise, rcu_exp_handler() invokes rcu_report_exp_rdp() to report the corresponding quiescent state, regardless of whether or not that quiescent state was due to the CPU being idle. @@ -625,6 +623,8 @@ checks, but only during the mid-boot dead zone.

With this refinement, synchronous grace periods can now be used from task context pretty much any time during the life of the kernel. +That is, aside from some points in the suspend, hibernate, or shutdown +code path.

Summary

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bcfd684a5c57..50bb41cdc5fb 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -36,7 +36,6 @@ /* Communicate arguments to a workqueue handler. */ struct rcu_exp_work { - smp_call_func_t rew_func; unsigned long rew_s; struct work_struct rew_work; }; diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d4eb4694b6f..7f5cb4228b59 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -22,6 +22,8 @@ #include +static void rcu_exp_handler(void *unused); + /* * Record the start of an expedited grace period. */ @@ -344,7 +346,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) { int cpu; unsigned long flags; - smp_call_func_t func; unsigned long mask_ofl_test; unsigned long mask_ofl_ipi; int ret; @@ -352,7 +353,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) container_of(wp, struct rcu_exp_work, rew_work); struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); - func = rewp->rew_func; raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Each pass checks a CPU for identity, offline, and idle. */ @@ -396,7 +396,7 @@ retry_ipi: mask_ofl_test |= mask; continue; } - ret = smp_call_function_single(cpu, func, NULL, 0); + ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); if (!ret) { mask_ofl_ipi &= ~mask; continue; @@ -426,7 +426,7 @@ retry_ipi: * Select the nodes that the upcoming expedited grace period needs * to wait for. */ -static void sync_rcu_exp_select_cpus(smp_call_func_t func) +static void sync_rcu_exp_select_cpus(void) { int cpu; struct rcu_node *rnp; @@ -440,7 +440,6 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - rnp->rew.rew_func = func; if (!READ_ONCE(rcu_par_gp_wq) || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { @@ -580,10 +579,10 @@ static void rcu_exp_wait_wake(unsigned long s) * Common code to drive an expedited grace period forward, used by * workqueues and mid-boot-time tasks. */ -static void rcu_exp_sel_wait_wake(smp_call_func_t func, unsigned long s) +static void rcu_exp_sel_wait_wake(unsigned long s) { /* Initialize the rcu_node tree in preparation for the wait. */ - sync_rcu_exp_select_cpus(func); + sync_rcu_exp_select_cpus(); /* Wait and clean up, including waking everyone. */ rcu_exp_wait_wake(s); @@ -597,14 +596,14 @@ static void wait_rcu_exp_gp(struct work_struct *wp) struct rcu_exp_work *rewp; rewp = container_of(wp, struct rcu_exp_work, rew_work); - rcu_exp_sel_wait_wake(rewp->rew_func, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_s); } /* * Given a smp_call_function() handler, kick off the specified * implementation of expedited grace period. */ -static void _synchronize_rcu_expedited(smp_call_func_t func) +static void _synchronize_rcu_expedited(void) { struct rcu_data *rdp; struct rcu_exp_work rew; @@ -625,10 +624,9 @@ static void _synchronize_rcu_expedited(smp_call_func_t func) /* Ensure that load happens before action based on it. */ if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { /* Direct call during scheduler init and early_initcalls(). */ - rcu_exp_sel_wait_wake(func, s); + rcu_exp_sel_wait_wake(s); } else { /* Marshall arguments & schedule the expedited grace period. */ - rew.rew_func = func; rew.rew_s = s; INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); queue_work(rcu_gp_wq, &rew.rew_work); @@ -654,7 +652,7 @@ static void _synchronize_rcu_expedited(smp_call_func_t func) * ->expmask fields in the rcu_node tree. Otherwise, immediately * report the quiescent state. */ -static void sync_rcu_exp_handler(void *unused) +static void rcu_exp_handler(void *unused) { unsigned long flags; struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -760,14 +758,14 @@ void synchronize_rcu_expedited(void) if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; - _synchronize_rcu_expedited(sync_rcu_exp_handler); + _synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #else /* #ifdef CONFIG_PREEMPT_RCU */ /* Invoked on each online non-idle CPU for expedited quiescent state. */ -static void sync_sched_exp_handler(void *unused) +static void rcu_exp_handler(void *unused) { struct rcu_data *rdp; struct rcu_node *rnp; @@ -799,7 +797,7 @@ static void sync_sched_exp_online_cleanup(int cpu) rnp = rdp->mynode; if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) return; - ret = smp_call_function_single(cpu, sync_sched_exp_handler, NULL, 0); + ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); WARN_ON_ONCE(ret); } @@ -835,7 +833,7 @@ void synchronize_rcu_expedited(void) if (rcu_blocking_is_gp()) return; - _synchronize_rcu_expedited(sync_sched_exp_handler); + _synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);