rcu/nocb: Avoid ->nocb_lock capture by corresponding CPU
A given rcu_data structure's ->nocb_lock can be acquired very frequently by the corresponding CPU and occasionally by the corresponding no-CBs grace-period and callbacks kthreads. In particular, these two kthreads will have frequent gaps between ->nocb_lock acquisitions that are roughly a grace period in duration. This means that any excessive ->nocb_lock contention will be due to the CPU's acquisitions, and this in turn enables a very naive contention-avoidance strategy to be quite effective. This commit therefore modifies rcu_nocb_lock() to first attempt a raw_spin_trylock(), and to atomically increment a separate ->nocb_lock_contended across a raw_spin_lock(). This new ->nocb_lock_contended field is checked in __call_rcu_nocb_wake() when interrupts are enabled, with a spin-wait for contending acquisitions to complete, thus allowing the kthreads a chance to acquire the lock. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
This commit is contained in:
parent
7f36ef82e5
commit
81c0b3d724
|
@ -197,6 +197,7 @@ struct rcu_data {
|
||||||
struct swait_queue_head nocb_cb_wq; /* For nocb kthreads to sleep on. */
|
struct swait_queue_head nocb_cb_wq; /* For nocb kthreads to sleep on. */
|
||||||
struct task_struct *nocb_gp_kthread;
|
struct task_struct *nocb_gp_kthread;
|
||||||
raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
|
raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
|
||||||
|
atomic_t nocb_lock_contended; /* Contention experienced. */
|
||||||
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
||||||
struct timer_list nocb_timer; /* Enforce finite deferral. */
|
struct timer_list nocb_timer; /* Enforce finite deferral. */
|
||||||
|
|
||||||
|
@ -430,7 +431,22 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
|
||||||
unsigned long flags);
|
unsigned long flags);
|
||||||
#ifdef CONFIG_RCU_NOCB_CPU
|
#ifdef CONFIG_RCU_NOCB_CPU
|
||||||
static void __init rcu_organize_nocb_kthreads(void);
|
static void __init rcu_organize_nocb_kthreads(void);
|
||||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
#define rcu_nocb_lock_irqsave(rdp, flags) \
|
||||||
|
do { \
|
||||||
|
if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) { \
|
||||||
|
local_irq_save(flags); \
|
||||||
|
} else if (!raw_spin_trylock_irqsave(&(rdp)->nocb_lock, (flags))) {\
|
||||||
|
atomic_inc(&(rdp)->nocb_lock_contended); \
|
||||||
|
smp_mb__after_atomic(); /* atomic_inc() before lock. */ \
|
||||||
|
raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \
|
||||||
|
smp_mb__before_atomic(); /* atomic_dec() after lock. */ \
|
||||||
|
atomic_dec(&(rdp)->nocb_lock_contended); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||||
|
#define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)
|
||||||
|
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
|
||||||
|
|
||||||
static void rcu_bind_gp_kthread(void);
|
static void rcu_bind_gp_kthread(void);
|
||||||
static bool rcu_nohz_full_cpu(void);
|
static bool rcu_nohz_full_cpu(void);
|
||||||
static void rcu_dynticks_task_enter(void);
|
static void rcu_dynticks_task_enter(void);
|
||||||
|
|
|
@ -1498,14 +1498,36 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Acquire the specified rcu_data structure's ->nocb_lock, but only
|
* Acquire the specified rcu_data structure's ->nocb_lock, but only
|
||||||
* if it corresponds to a no-CBs CPU.
|
* if it corresponds to a no-CBs CPU. If the lock isn't immediately
|
||||||
|
* available, increment ->nocb_lock_contended to flag the contention.
|
||||||
*/
|
*/
|
||||||
static void rcu_nocb_lock(struct rcu_data *rdp)
|
static void rcu_nocb_lock(struct rcu_data *rdp)
|
||||||
{
|
{
|
||||||
if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
|
|
||||||
lockdep_assert_irqs_disabled();
|
lockdep_assert_irqs_disabled();
|
||||||
|
if (!rcu_segcblist_is_offloaded(&rdp->cblist) ||
|
||||||
|
raw_spin_trylock(&rdp->nocb_lock))
|
||||||
|
return;
|
||||||
|
atomic_inc(&rdp->nocb_lock_contended);
|
||||||
|
smp_mb__after_atomic(); /* atomic_inc() before lock. */
|
||||||
raw_spin_lock(&rdp->nocb_lock);
|
raw_spin_lock(&rdp->nocb_lock);
|
||||||
|
smp_mb__before_atomic(); /* atomic_dec() after lock. */
|
||||||
|
atomic_dec(&rdp->nocb_lock_contended);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Spinwait until the specified rcu_data structure's ->nocb_lock is
|
||||||
|
* not contended. Please note that this is extremely special-purpose,
|
||||||
|
* relying on the fact that at most two kthreads and one CPU contend for
|
||||||
|
* this lock, and also that the two kthreads are guaranteed to have frequent
|
||||||
|
* grace-period-duration time intervals between successive acquisitions
|
||||||
|
* of the lock. This allows us to use an extremely simple throttling
|
||||||
|
* mechanism, and further to apply it only to the CPU doing floods of
|
||||||
|
* call_rcu() invocations. Don't try this at home!
|
||||||
|
*/
|
||||||
|
static void rcu_nocb_wait_contended(struct rcu_data *rdp)
|
||||||
|
{
|
||||||
|
while (atomic_read(&rdp->nocb_lock_contended))
|
||||||
|
cpu_relax();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1575,19 +1597,19 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
|
||||||
|
|
||||||
lockdep_assert_held(&rdp->nocb_lock);
|
lockdep_assert_held(&rdp->nocb_lock);
|
||||||
if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
|
if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (READ_ONCE(rdp_gp->nocb_gp_sleep) || force) {
|
if (READ_ONCE(rdp_gp->nocb_gp_sleep) || force) {
|
||||||
del_timer(&rdp->nocb_timer);
|
del_timer(&rdp->nocb_timer);
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
smp_mb(); /* enqueue before ->nocb_gp_sleep. */
|
smp_mb(); /* enqueue before ->nocb_gp_sleep. */
|
||||||
raw_spin_lock_irqsave(&rdp_gp->nocb_lock, flags);
|
rcu_nocb_lock_irqsave(rdp_gp, flags);
|
||||||
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
|
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
|
||||||
raw_spin_unlock_irqrestore(&rdp_gp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp_gp, flags);
|
||||||
wake_up_process(rdp_gp->nocb_gp_kthread);
|
wake_up_process(rdp_gp->nocb_gp_kthread);
|
||||||
} else {
|
} else {
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1646,23 +1668,23 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
|
||||||
if (!rdp->nocb_cb_sleep &&
|
if (!rdp->nocb_cb_sleep &&
|
||||||
rcu_segcblist_ready_cbs(&rdp->cblist)) {
|
rcu_segcblist_ready_cbs(&rdp->cblist)) {
|
||||||
// Already going full tilt, so don't try to rewake.
|
// Already going full tilt, so don't try to rewake.
|
||||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
|
||||||
} else if (rcu_segcblist_pend_cbs(&rdp->cblist) &&
|
} else if (rcu_segcblist_pend_cbs(&rdp->cblist) &&
|
||||||
raw_spin_trylock_rcu_node(rdp->mynode)) {
|
raw_spin_trylock_rcu_node(rdp->mynode)) {
|
||||||
rcu_advance_cbs_nowake(rdp->mynode, rdp);
|
rcu_advance_cbs_nowake(rdp->mynode, rdp);
|
||||||
raw_spin_unlock_rcu_node(rdp->mynode);
|
raw_spin_unlock_rcu_node(rdp->mynode);
|
||||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
|
||||||
} else {
|
} else {
|
||||||
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
|
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
|
||||||
TPS("WakeOvfIsDeferred"));
|
TPS("WakeOvfIsDeferred"));
|
||||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
|
||||||
}
|
}
|
||||||
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
} else {
|
} else {
|
||||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
|
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
|
||||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
}
|
}
|
||||||
if (!irqs_disabled_flags(flags))
|
if (!irqs_disabled_flags(flags)) {
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
|
rcu_nocb_wait_contended(rdp);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1692,7 +1714,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
||||||
if (rcu_segcblist_empty(&rdp->cblist))
|
if (rcu_segcblist_empty(&rdp->cblist))
|
||||||
continue; /* No callbacks here, try next. */
|
continue; /* No callbacks here, try next. */
|
||||||
rnp = rdp->mynode;
|
rnp = rdp->mynode;
|
||||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
rcu_nocb_lock_irqsave(rdp, flags);
|
||||||
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||||
del_timer(&my_rdp->nocb_timer);
|
del_timer(&my_rdp->nocb_timer);
|
||||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||||
|
@ -1712,7 +1734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
||||||
} else {
|
} else {
|
||||||
needwake = false;
|
needwake = false;
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
if (needwake) {
|
if (needwake) {
|
||||||
swake_up_one(&rdp->nocb_cb_wq);
|
swake_up_one(&rdp->nocb_cb_wq);
|
||||||
gotcbs = true;
|
gotcbs = true;
|
||||||
|
@ -1741,9 +1763,9 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
||||||
trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
|
trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
|
||||||
}
|
}
|
||||||
if (!rcu_nocb_poll) {
|
if (!rcu_nocb_poll) {
|
||||||
raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
|
rcu_nocb_lock_irqsave(my_rdp, flags);
|
||||||
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
|
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
|
||||||
raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(my_rdp, flags);
|
||||||
}
|
}
|
||||||
WARN_ON(signal_pending(current));
|
WARN_ON(signal_pending(current));
|
||||||
}
|
}
|
||||||
|
@ -1784,12 +1806,12 @@ static void nocb_cb_wait(struct rcu_data *rdp)
|
||||||
rcu_do_batch(rdp);
|
rcu_do_batch(rdp);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
rcu_nocb_lock_irqsave(rdp, flags);
|
||||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||||
needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
|
needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
|
||||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||||
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
|
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
if (needwake_gp)
|
if (needwake_gp)
|
||||||
rcu_gp_kthread_wake();
|
rcu_gp_kthread_wake();
|
||||||
return;
|
return;
|
||||||
|
@ -1797,7 +1819,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
|
||||||
|
|
||||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
|
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
|
||||||
WRITE_ONCE(rdp->nocb_cb_sleep, true);
|
WRITE_ONCE(rdp->nocb_cb_sleep, true);
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
if (needwake_gp)
|
if (needwake_gp)
|
||||||
rcu_gp_kthread_wake();
|
rcu_gp_kthread_wake();
|
||||||
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
|
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
|
||||||
|
@ -1839,9 +1861,9 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int ndw;
|
int ndw;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
rcu_nocb_lock_irqsave(rdp, flags);
|
||||||
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
|
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
|
||||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
|
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
|
||||||
|
|
Loading…
Reference in New Issue