rcu: Don't disable preemption for Tiny and Tree RCU readers
Because preempt_disable() maps to barrier() for non-debug builds, it forces the compiler to spill and reload registers. Because Tree RCU and Tiny RCU now only appear in CONFIG_PREEMPT=n builds, these barrier() instances generate needless extra code for each instance of rcu_read_lock() and rcu_read_unlock(). This extra code slows down Tree RCU and bloats Tiny RCU. This commit therefore removes the preempt_disable() and preempt_enable() from the non-preemptible implementations of __rcu_read_lock() and __rcu_read_unlock(), respectively. However, for debug purposes, preempt_disable() and preempt_enable() are still invoked if CONFIG_PREEMPT_COUNT=y, because this allows detection of sleeping inside atomic sections in non-preemptible kernels. However, Tiny and Tree RCU operates by coalescing all RCU read-side critical sections on a given CPU that lie between successive quiescent states. It is therefore necessary to compensate for removing barriers from __rcu_read_lock() and __rcu_read_unlock() by adding them to a couple of the RCU functions invoked during quiescent states, namely to rcu_all_qs() and rcu_note_context_switch(). However, note that the latter is more paranoia than necessity, at least until link-time optimizations become more aggressive. This is based on an earlier patch by Paul E. McKenney, fixing a bug encountered in kernels built with CONFIG_PREEMPT=n and CONFIG_PREEMPT_COUNT=y. Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
db3e8db45e
commit
bb73c52bad
|
@ -297,12 +297,14 @@ void synchronize_rcu(void);
|
||||||
|
|
||||||
static inline void __rcu_read_lock(void)
|
static inline void __rcu_read_lock(void)
|
||||||
{
|
{
|
||||||
preempt_disable();
|
if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
|
||||||
|
preempt_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __rcu_read_unlock(void)
|
static inline void __rcu_read_unlock(void)
|
||||||
{
|
{
|
||||||
preempt_enable();
|
if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
|
||||||
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void synchronize_rcu(void)
|
static inline void synchronize_rcu(void)
|
||||||
|
|
|
@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void)
|
||||||
|
|
||||||
static inline void rcu_all_qs(void)
|
static inline void rcu_all_qs(void)
|
||||||
{
|
{
|
||||||
|
barrier(); /* Avoid RCU read-side critical sections leaking across. */
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __LINUX_RCUTINY_H */
|
#endif /* __LINUX_RCUTINY_H */
|
||||||
|
|
|
@ -337,12 +337,14 @@ static void rcu_momentary_dyntick_idle(void)
|
||||||
*/
|
*/
|
||||||
void rcu_note_context_switch(void)
|
void rcu_note_context_switch(void)
|
||||||
{
|
{
|
||||||
|
barrier(); /* Avoid RCU read-side critical sections leaking down. */
|
||||||
trace_rcu_utilization(TPS("Start context switch"));
|
trace_rcu_utilization(TPS("Start context switch"));
|
||||||
rcu_sched_qs();
|
rcu_sched_qs();
|
||||||
rcu_preempt_note_context_switch();
|
rcu_preempt_note_context_switch();
|
||||||
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
||||||
rcu_momentary_dyntick_idle();
|
rcu_momentary_dyntick_idle();
|
||||||
trace_rcu_utilization(TPS("End context switch"));
|
trace_rcu_utilization(TPS("End context switch"));
|
||||||
|
barrier(); /* Avoid RCU read-side critical sections leaking up. */
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||||
|
|
||||||
|
@ -353,12 +355,19 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||||
* RCU flavors in desperate need of a quiescent state, which will normally
|
* RCU flavors in desperate need of a quiescent state, which will normally
|
||||||
* be none of them). Either way, do a lightweight quiescent state for
|
* be none of them). Either way, do a lightweight quiescent state for
|
||||||
* all RCU flavors.
|
* all RCU flavors.
|
||||||
|
*
|
||||||
|
* The barrier() calls are redundant in the common case when this is
|
||||||
|
* called externally, but just in case this is called from within this
|
||||||
|
* file.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
void rcu_all_qs(void)
|
void rcu_all_qs(void)
|
||||||
{
|
{
|
||||||
|
barrier(); /* Avoid RCU read-side critical sections leaking down. */
|
||||||
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
||||||
rcu_momentary_dyntick_idle();
|
rcu_momentary_dyntick_idle();
|
||||||
this_cpu_inc(rcu_qs_ctr);
|
this_cpu_inc(rcu_qs_ctr);
|
||||||
|
barrier(); /* Avoid RCU read-side critical sections leaking up. */
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rcu_all_qs);
|
EXPORT_SYMBOL_GPL(rcu_all_qs);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue