2019-01-18 02:13:19 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0+ */
|
2017-05-02 16:31:18 +08:00
|
|
|
/*
|
2017-05-02 21:30:12 +08:00
|
|
|
* RCU segmented callback lists, internal-to-rcu header file
|
2017-05-02 16:31:18 +08:00
|
|
|
*
|
|
|
|
* Copyright IBM Corporation, 2017
|
|
|
|
*
|
2019-01-18 02:13:19 +08:00
|
|
|
* Authors: Paul E. McKenney <paulmck@linux.ibm.com>
|
2017-05-02 16:31:18 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/rcu_segcblist.h>
|
|
|
|
|
2019-07-02 08:36:53 +08:00
|
|
|
/* Return number of callbacks in the specified callback list. */
|
|
|
|
static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
|
|
|
|
{
|
|
|
|
return READ_ONCE(rclp->len);
|
|
|
|
}
|
|
|
|
|
2017-05-02 21:30:12 +08:00
|
|
|
void rcu_cblist_init(struct rcu_cblist *rclp);
|
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
2019-07-03 07:03:33 +08:00
|
|
|
void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp);
|
|
|
|
void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
|
|
|
|
struct rcu_cblist *srclp,
|
|
|
|
struct rcu_head *rhp);
|
2017-05-02 21:30:12 +08:00
|
|
|
struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
|
2017-05-02 16:31:18 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is the specified rcu_segcblist structure empty?
|
|
|
|
*
|
|
|
|
* But careful! The fact that the ->head field is NULL does not
|
|
|
|
* necessarily imply that there are no callbacks associated with
|
|
|
|
* this structure. When callbacks are being invoked, they are
|
|
|
|
* removed as a group. If callback invocation must be preempted,
|
|
|
|
* the remaining callbacks will be added back to the list. Either
|
|
|
|
* way, the counts are updated later.
|
|
|
|
*
|
|
|
|
* So it is often the case that rcu_segcblist_n_cbs() should be used
|
|
|
|
* instead.
|
|
|
|
*/
|
|
|
|
static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
|
|
|
|
{
|
2019-05-14 06:57:50 +08:00
|
|
|
return !READ_ONCE(rsclp->head);
|
2017-05-02 16:31:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Return number of callbacks in segmented callback list. */
|
|
|
|
static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
|
|
|
|
{
|
2019-07-02 08:36:53 +08:00
|
|
|
#ifdef CONFIG_RCU_NOCB_CPU
|
|
|
|
return atomic_long_read(&rsclp->len);
|
|
|
|
#else
|
2017-05-02 16:31:18 +08:00
|
|
|
return READ_ONCE(rsclp->len);
|
2019-07-02 08:36:53 +08:00
|
|
|
#endif
|
2017-05-02 16:31:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is the specified rcu_segcblist enabled, for example, not corresponding
|
2019-05-15 00:50:49 +08:00
|
|
|
* to an offline CPU?
|
2017-05-02 16:31:18 +08:00
|
|
|
*/
|
|
|
|
static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
|
|
|
|
{
|
2019-04-13 03:34:41 +08:00
|
|
|
return rsclp->enabled;
|
2017-05-02 16:31:18 +08:00
|
|
|
}
|
|
|
|
|
2019-04-13 06:58:34 +08:00
|
|
|
/* Is the specified rcu_segcblist offloaded? */
|
|
|
|
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
|
|
|
|
{
|
2020-09-21 20:43:40 +08:00
|
|
|
return IS_ENABLED(CONFIG_RCU_NOCB_CPU) && rsclp->offloaded;
|
2019-04-13 06:58:34 +08:00
|
|
|
}
|
|
|
|
|
2017-05-02 16:31:18 +08:00
|
|
|
/*
|
|
|
|
* Are all segments following the specified segment of the specified
|
|
|
|
* rcu_segcblist structure empty of callbacks? (The specified
|
|
|
|
* segment might well contain callbacks.)
|
|
|
|
*/
|
|
|
|
static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
|
|
|
|
{
|
2019-05-14 05:36:11 +08:00
|
|
|
return !READ_ONCE(*READ_ONCE(rsclp->tails[seg]));
|
2017-05-02 16:31:18 +08:00
|
|
|
}
|
|
|
|
|
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
2019-07-03 07:03:33 +08:00
|
|
|
void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp);
|
rcu/tree: Make rcu_do_batch count how many callbacks were executed
The rcu_do_batch() function extracts the ready-to-invoke callbacks
from the rcu_segcblist located in the ->cblist field of the current
CPU's rcu_data structure. These callbacks are first moved to a local
(unsegmented) rcu_cblist. The rcu_do_batch() function then uses this
rcu_cblist's ->len field to count how many CBs it has invoked, but it
does so by counting that field down from zero. Finally, this function
negates the value in this ->len field (resulting in a positive number)
and subtracts the result from the ->len field of the current CPU's
->cblist field.
Except that it is sometimes necessary for rcu_do_batch() to stop invoking
callbacks mid-stream, despite there being more ready to invoke, for
example, if a high-priority task wakes up. In this case the remaining
not-yet-invoked callbacks are requeued back onto the CPU's ->cblist,
but remain in the ready-to-invoke segment of that list. As above, the
negative of the local rcu_cblist's ->len field is still subtracted from
the ->len field of the current CPU's ->cblist field.
The design of counting down from 0 is confusing and error-prone, plus
use of a positive count will make it easier to provide a uniform and
consistent API to deal with the per-segment counts that are added
later in this series. For example, rcu_segcblist_extract_done_cbs()
can unconditionally populate the resulting unsegmented list's ->len
field during extraction.
This commit therefore explicitly counts how many callbacks were executed
in rcu_do_batch() itself, counting up from zero, and then uses that
to update the per-CPU segcb list's ->len field, without relying on the
downcounting of rcl->len from zero.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2020-11-03 22:25:57 +08:00
|
|
|
void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v);
|
2017-05-02 21:30:12 +08:00
|
|
|
void rcu_segcblist_init(struct rcu_segcblist *rsclp);
|
|
|
|
void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
|
2019-04-13 06:58:34 +08:00
|
|
|
void rcu_segcblist_offload(struct rcu_segcblist *rsclp);
|
2017-05-02 21:30:12 +08:00
|
|
|
bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
|
|
|
|
bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
|
|
|
|
struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
|
|
|
|
struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
|
2019-05-16 00:56:40 +08:00
|
|
|
bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp);
|
2017-05-02 21:30:12 +08:00
|
|
|
void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
|
2019-08-31 00:36:32 +08:00
|
|
|
struct rcu_head *rhp);
|
2017-05-02 21:30:12 +08:00
|
|
|
bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
|
2019-08-31 00:36:32 +08:00
|
|
|
struct rcu_head *rhp);
|
2017-05-02 21:30:12 +08:00
|
|
|
void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
|
|
|
|
struct rcu_cblist *rclp);
|
|
|
|
void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
|
|
|
|
struct rcu_cblist *rclp);
|
|
|
|
void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
|
|
|
|
struct rcu_cblist *rclp);
|
|
|
|
void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
|
|
|
|
struct rcu_cblist *rclp);
|
|
|
|
void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
|
|
|
|
struct rcu_cblist *rclp);
|
|
|
|
void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
|
|
|
|
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
|
2017-06-27 22:44:06 +08:00
|
|
|
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
|
|
|
|
struct rcu_segcblist *src_rsclp);
|