rcu: Reduce expedited GP memory contention via per-CPU variables
Currently, the piggybacked-work checks carried out by sync_exp_work_done() atomically increment a small set of variables (the ->expedited_workdone0, ->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3 fields in the rcu_state structure), which will form a memory-contention bottleneck given a sufficiently large number of CPUs concurrently invoking either synchronize_rcu_expedited() or synchronize_sched_expedited(). This commit therefore moves these for fields to the per-CPU rcu_data structure, eliminating the memory contention. The show_rcuexp() function also changes to sum up each field in the rcu_data structures. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
1307f21487
commit
df5bd5144a
|
@ -3585,7 +3585,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||||
*/
|
*/
|
||||||
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
{
|
{
|
||||||
struct rcu_data *rdp;
|
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
|
||||||
struct rcu_node *rnp0;
|
struct rcu_node *rnp0;
|
||||||
struct rcu_node *rnp1 = NULL;
|
struct rcu_node *rnp1 = NULL;
|
||||||
|
|
||||||
|
@ -3599,7 +3599,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
|
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
|
||||||
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
|
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
|
||||||
if (sync_exp_work_done(rsp, rnp0, NULL,
|
if (sync_exp_work_done(rsp, rnp0, NULL,
|
||||||
&rsp->expedited_workdone0, s))
|
&rdp->expedited_workdone0, s))
|
||||||
return NULL;
|
return NULL;
|
||||||
return rnp0;
|
return rnp0;
|
||||||
}
|
}
|
||||||
|
@ -3613,14 +3613,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
* can be inexact, as it is just promoting locality and is not
|
* can be inexact, as it is just promoting locality and is not
|
||||||
* strictly needed for correctness.
|
* strictly needed for correctness.
|
||||||
*/
|
*/
|
||||||
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
|
if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
|
||||||
if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
|
|
||||||
return NULL;
|
return NULL;
|
||||||
mutex_lock(&rdp->exp_funnel_mutex);
|
mutex_lock(&rdp->exp_funnel_mutex);
|
||||||
rnp0 = rdp->mynode;
|
rnp0 = rdp->mynode;
|
||||||
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
|
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
|
||||||
if (sync_exp_work_done(rsp, rnp1, rdp,
|
if (sync_exp_work_done(rsp, rnp1, rdp,
|
||||||
&rsp->expedited_workdone2, s))
|
&rdp->expedited_workdone2, s))
|
||||||
return NULL;
|
return NULL;
|
||||||
mutex_lock(&rnp0->exp_funnel_mutex);
|
mutex_lock(&rnp0->exp_funnel_mutex);
|
||||||
if (rnp1)
|
if (rnp1)
|
||||||
|
@ -3630,7 +3629,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
rnp1 = rnp0;
|
rnp1 = rnp0;
|
||||||
}
|
}
|
||||||
if (sync_exp_work_done(rsp, rnp1, rdp,
|
if (sync_exp_work_done(rsp, rnp1, rdp,
|
||||||
&rsp->expedited_workdone3, s))
|
&rdp->expedited_workdone3, s))
|
||||||
return NULL;
|
return NULL;
|
||||||
return rnp1;
|
return rnp1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -386,6 +386,10 @@ struct rcu_data {
|
||||||
struct rcu_head oom_head;
|
struct rcu_head oom_head;
|
||||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||||
struct mutex exp_funnel_mutex;
|
struct mutex exp_funnel_mutex;
|
||||||
|
atomic_long_t expedited_workdone0; /* # done by others #0. */
|
||||||
|
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
||||||
|
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
||||||
|
atomic_long_t expedited_workdone3; /* # done by others #3. */
|
||||||
|
|
||||||
/* 7) Callback offloading. */
|
/* 7) Callback offloading. */
|
||||||
#ifdef CONFIG_RCU_NOCB_CPU
|
#ifdef CONFIG_RCU_NOCB_CPU
|
||||||
|
@ -500,10 +504,6 @@ struct rcu_state {
|
||||||
/* End of fields guarded by barrier_mutex. */
|
/* End of fields guarded by barrier_mutex. */
|
||||||
|
|
||||||
unsigned long expedited_sequence; /* Take a ticket. */
|
unsigned long expedited_sequence; /* Take a ticket. */
|
||||||
atomic_long_t expedited_workdone0; /* # done by others #0. */
|
|
||||||
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
|
||||||
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
|
||||||
atomic_long_t expedited_workdone3; /* # done by others #3. */
|
|
||||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||||
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
||||||
|
|
|
@ -183,14 +183,20 @@ static const struct file_operations rcudata_fops = {
|
||||||
|
|
||||||
static int show_rcuexp(struct seq_file *m, void *v)
|
static int show_rcuexp(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
|
int cpu;
|
||||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||||
|
struct rcu_data *rdp;
|
||||||
|
unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||||
|
s0 += atomic_long_read(&rdp->expedited_workdone0);
|
||||||
|
s1 += atomic_long_read(&rdp->expedited_workdone1);
|
||||||
|
s2 += atomic_long_read(&rdp->expedited_workdone2);
|
||||||
|
s3 += atomic_long_read(&rdp->expedited_workdone3);
|
||||||
|
}
|
||||||
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
||||||
rsp->expedited_sequence,
|
rsp->expedited_sequence, s0, s1, s2, s3,
|
||||||
atomic_long_read(&rsp->expedited_workdone0),
|
|
||||||
atomic_long_read(&rsp->expedited_workdone1),
|
|
||||||
atomic_long_read(&rsp->expedited_workdone2),
|
|
||||||
atomic_long_read(&rsp->expedited_workdone3),
|
|
||||||
atomic_long_read(&rsp->expedited_normal),
|
atomic_long_read(&rsp->expedited_normal),
|
||||||
atomic_read(&rsp->expedited_need_qs),
|
atomic_read(&rsp->expedited_need_qs),
|
||||||
rsp->expedited_sequence / 2);
|
rsp->expedited_sequence / 2);
|
||||||
|
|
Loading…
Reference in New Issue