Merge branches 'doc.2016.04.19a', 'exp.2016.03.31d', 'fixes.2016.03.31d' and 'torture.2016.04.21a' into HEAD

doc.2016.04.19a: Documentation updates
exp.2016.03.31d: Expedited grace-period updates
fixes.2016.03.31d: Miscellaneous fixes
torture.2016.004.21a Torture-test updates
This commit is contained in:
Paul E. McKenney 2016-04-21 13:48:20 -07:00
commit dcd36d01fb
29 changed files with 1559 additions and 177 deletions

View File

@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
These fields are as follows:
o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress.
o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
attempt to start an expedited grace period found that someone
else had completed an expedited grace period that satisfies the
attempted request. "Our work is done."
o "wd1", "wd2", and "wd3" are the number of times that an attempt
to start an expedited grace period found that someone else had
completed an expedited grace period that satisfies the attempted
request. "Our work is done."
o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period.

View File

@ -3284,6 +3284,44 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Lazy RCU callbacks are those which RCU can
prove do nothing more than free memory.
rcuperf.gp_exp= [KNL]
Measure performance of expedited synchronous
grace-period primitives.
rcuperf.holdoff= [KNL]
Set test-start holdoff period. The purpose of
this parameter is to delay the start of the
test until boot completes in order to avoid
interference.
rcuperf.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N, where N is the number of CPUs. A value
"n" less than -1 selects N-n+1, where N is again
the number of CPUs. For example, -2 selects N
(the number of CPUs), -3 selects N+1, and so on.
A value of "n" less than or equal to -N selects
a single reader.
rcuperf.nwriters= [KNL]
Set number of RCU writers. The values operate
the same as for rcuperf.nreaders.
N, where N is the number of CPUs
rcuperf.perf_runnable= [BOOT]
Start rcuperf running at boot time.
rcuperf.shutdown= [KNL]
Shut the system down after performance tests
complete. This is useful for hands-off automated
testing.
rcuperf.perf_type= [KNL]
Specify the RCU implementation to test.
rcuperf.verbose= [KNL]
Enable additional printk() statements.
rcutorture.cbflood_inter_holdoff= [KNL]
Set holdoff time (jiffies) between successive
callback-flood tests.

View File

@ -508,14 +508,7 @@ int rcu_read_lock_bh_held(void);
* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
* critical section unless it can prove otherwise.
*/
#ifdef CONFIG_PREEMPT_COUNT
int rcu_read_lock_sched_held(void);
#else /* #ifdef CONFIG_PREEMPT_COUNT */
static inline int rcu_read_lock_sched_held(void)
{
return 1;
}
#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@ -532,18 +525,10 @@ static inline int rcu_read_lock_bh_held(void)
return 1;
}
#ifdef CONFIG_PREEMPT_COUNT
static inline int rcu_read_lock_sched_held(void)
{
return preempt_count() != 0 || irqs_disabled();
return !preemptible();
}
#else /* #ifdef CONFIG_PREEMPT_COUNT */
static inline int rcu_read_lock_sched_held(void)
{
return 1;
}
#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU

View File

@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void)
return 0;
}
/*
* Return the number of expedited grace periods completed.
*/
static inline unsigned long rcu_exp_batches_completed(void)
{
return 0;
}
/*
* Return the number of expedited sched grace periods completed.
*/
static inline unsigned long rcu_exp_batches_completed_sched(void)
{
return 0;
}
static inline void rcu_force_quiescent_state(void)
{
}

View File

@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void);
unsigned long rcu_batches_completed(void);
unsigned long rcu_batches_completed_bh(void);
unsigned long rcu_batches_completed_sched(void);
unsigned long rcu_exp_batches_completed(void);
unsigned long rcu_exp_batches_completed_sched(void);
void show_rcu_gp_kthreads(void);
void rcu_force_quiescent_state(void);

View File

@ -171,6 +171,77 @@ TRACE_EVENT(rcu_grace_period_init,
__entry->grplo, __entry->grphi, __entry->qsmask)
);
/*
* Tracepoint for expedited grace-period events. Takes a string identifying
* the RCU flavor, the expedited grace-period sequence number, and a string
* identifying the grace-period-related event as follows:
*
* "snap": Captured snapshot of expedited grace period sequence number.
* "start": Started a real expedited grace period.
* "end": Ended a real expedited grace period.
* "endwake": Woke piggybackers up.
* "done": Someone else did the expedited grace period for us.
*/
TRACE_EVENT(rcu_exp_grace_period,
TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent),
TP_ARGS(rcuname, gpseq, gpevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
__field(unsigned long, gpseq)
__field(const char *, gpevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
__entry->gpseq = gpseq;
__entry->gpevent = gpevent;
),
TP_printk("%s %lu %s",
__entry->rcuname, __entry->gpseq, __entry->gpevent)
);
/*
* Tracepoint for expedited grace-period funnel-locking events. Takes a
* string identifying the RCU flavor, an integer identifying the rcu_node
* combining-tree level, another pair of integers identifying the lowest-
* and highest-numbered CPU associated with the current rcu_node structure,
* and a string. identifying the grace-period-related event as follows:
*
* "nxtlvl": Advance to next level of rcu_node funnel
* "wait": Wait for someone else to do expedited GP
*/
TRACE_EVENT(rcu_exp_funnel_lock,
TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi,
const char *gpevent),
TP_ARGS(rcuname, level, grplo, grphi, gpevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
__field(u8, level)
__field(int, grplo)
__field(int, grphi)
__field(const char *, gpevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
__entry->level = level;
__entry->grplo = grplo;
__entry->grphi = grphi;
__entry->gpevent = gpevent;
),
TP_printk("%s %d %d %d %s",
__entry->rcuname, __entry->level, __entry->grplo,
__entry->grphi, __entry->gpevent)
);
/*
* Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended
* to assist debugging of these handoffs.
@ -704,11 +775,15 @@ TRACE_EVENT(rcu_barrier,
#else /* #ifdef CONFIG_RCU_TRACE */
#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
level, grplo, grphi, event) \
do { } while (0)
#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
do { } while (0)
#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
do { } while (0)
#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)

View File

@ -5,6 +5,7 @@ KCOV_INSTRUMENT := n
obj-y += update.o sync.o
obj-$(CONFIG_SRCU) += srcu.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
obj-$(CONFIG_TREE_RCU) += tree.o
obj-$(CONFIG_PREEMPT_RCU) += tree.o
obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o

655
kernel/rcu/rcuperf.c Normal file
View File

@ -0,0 +1,655 @@
/*
* Read-Copy Update module-based performance-test facility
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright (C) IBM Corporation, 2015
*
* Authors: Paul E. McKenney <paulmck@us.ibm.com>
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/freezer.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/stat.h>
#include <linux/srcu.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
#include <linux/torture.h>
#include <linux/vmalloc.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
#define PERF_FLAG "-perf:"
#define PERFOUT_STRING(s) \
pr_alert("%s" PERF_FLAG s "\n", perf_type)
#define VERBOSE_PERFOUT_STRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
#define VERBOSE_PERFOUT_ERRSTRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, nwriters, -1, "Number of RCU updater threads");
torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)");
static int nrealreaders;
static int nrealwriters;
static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;
static struct task_struct *shutdown_task;
static u64 **writer_durations;
static int *writer_n_durations;
static atomic_t n_rcu_perf_reader_started;
static atomic_t n_rcu_perf_writer_started;
static atomic_t n_rcu_perf_writer_finished;
static wait_queue_head_t shutdown_wq;
static u64 t_rcu_perf_writer_started;
static u64 t_rcu_perf_writer_finished;
static unsigned long b_rcu_perf_writer_started;
static unsigned long b_rcu_perf_writer_finished;
static int rcu_perf_writer_state;
#define RTWS_INIT 0
#define RTWS_EXP_SYNC 1
#define RTWS_SYNC 2
#define RTWS_IDLE 2
#define RTWS_STOPPING 3
#define MAX_MEAS 10000
#define MIN_MEAS 100
#if defined(MODULE) || defined(CONFIG_RCU_PERF_TEST_RUNNABLE)
#define RCUPERF_RUNNABLE_INIT 1
#else
#define RCUPERF_RUNNABLE_INIT 0
#endif
static int perf_runnable = RCUPERF_RUNNABLE_INIT;
module_param(perf_runnable, int, 0444);
MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
/*
* Operations vector for selecting different types of tests.
*/
struct rcu_perf_ops {
int ptype;
void (*init)(void);
void (*cleanup)(void);
int (*readlock)(void);
void (*readunlock)(int idx);
unsigned long (*started)(void);
unsigned long (*completed)(void);
unsigned long (*exp_completed)(void);
void (*sync)(void);
void (*exp_sync)(void);
const char *name;
};
static struct rcu_perf_ops *cur_ops;
/*
* Definitions for rcu perf testing.
*/
static int rcu_perf_read_lock(void) __acquires(RCU)
{
rcu_read_lock();
return 0;
}
static void rcu_perf_read_unlock(int idx) __releases(RCU)
{
rcu_read_unlock();
}
static unsigned long __maybe_unused rcu_no_completed(void)
{
return 0;
}
static void rcu_sync_perf_init(void)
{
}
static struct rcu_perf_ops rcu_ops = {
.ptype = RCU_FLAVOR,
.init = rcu_sync_perf_init,
.readlock = rcu_perf_read_lock,
.readunlock = rcu_perf_read_unlock,
.started = rcu_batches_started,
.completed = rcu_batches_completed,
.exp_completed = rcu_exp_batches_completed,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
.name = "rcu"
};
/*
* Definitions for rcu_bh perf testing.
*/
static int rcu_bh_perf_read_lock(void) __acquires(RCU_BH)
{
rcu_read_lock_bh();
return 0;
}
static void rcu_bh_perf_read_unlock(int idx) __releases(RCU_BH)
{
rcu_read_unlock_bh();
}
static struct rcu_perf_ops rcu_bh_ops = {
.ptype = RCU_BH_FLAVOR,
.init = rcu_sync_perf_init,
.readlock = rcu_bh_perf_read_lock,
.readunlock = rcu_bh_perf_read_unlock,
.started = rcu_batches_started_bh,
.completed = rcu_batches_completed_bh,
.exp_completed = rcu_exp_batches_completed_sched,
.sync = synchronize_rcu_bh,
.exp_sync = synchronize_rcu_bh_expedited,
.name = "rcu_bh"
};
/*
* Definitions for srcu perf testing.
*/
DEFINE_STATIC_SRCU(srcu_ctl_perf);
static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
{
return srcu_read_lock(srcu_ctlp);
}
static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
{
srcu_read_unlock(srcu_ctlp, idx);
}
static unsigned long srcu_perf_completed(void)
{
return srcu_batches_completed(srcu_ctlp);
}
static void srcu_perf_synchronize(void)
{
synchronize_srcu(srcu_ctlp);
}
static void srcu_perf_synchronize_expedited(void)
{
synchronize_srcu_expedited(srcu_ctlp);
}
static struct rcu_perf_ops srcu_ops = {
.ptype = SRCU_FLAVOR,
.init = rcu_sync_perf_init,
.readlock = srcu_perf_read_lock,
.readunlock = srcu_perf_read_unlock,
.started = NULL,
.completed = srcu_perf_completed,
.exp_completed = srcu_perf_completed,
.sync = srcu_perf_synchronize,
.exp_sync = srcu_perf_synchronize_expedited,
.name = "srcu"
};
/*
* Definitions for sched perf testing.
*/
static int sched_perf_read_lock(void)
{
preempt_disable();
return 0;
}
static void sched_perf_read_unlock(int idx)
{
preempt_enable();
}
static struct rcu_perf_ops sched_ops = {
.ptype = RCU_SCHED_FLAVOR,
.init = rcu_sync_perf_init,
.readlock = sched_perf_read_lock,
.readunlock = sched_perf_read_unlock,
.started = rcu_batches_started_sched,
.completed = rcu_batches_completed_sched,
.exp_completed = rcu_exp_batches_completed_sched,
.sync = synchronize_sched,
.exp_sync = synchronize_sched_expedited,
.name = "sched"
};
#ifdef CONFIG_TASKS_RCU
/*
* Definitions for RCU-tasks perf testing.
*/
static int tasks_perf_read_lock(void)
{
return 0;
}
static void tasks_perf_read_unlock(int idx)
{
}
static struct rcu_perf_ops tasks_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_perf_init,
.readlock = tasks_perf_read_lock,
.readunlock = tasks_perf_read_unlock,
.started = rcu_no_completed,
.completed = rcu_no_completed,
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks,
.name = "tasks"
};
#define RCUPERF_TASKS_OPS &tasks_ops,
static bool __maybe_unused torturing_tasks(void)
{
return cur_ops == &tasks_ops;
}
#else /* #ifdef CONFIG_TASKS_RCU */
#define RCUPERF_TASKS_OPS
static bool __maybe_unused torturing_tasks(void)
{
return false;
}
#endif /* #else #ifdef CONFIG_TASKS_RCU */
/*
* If performance tests complete, wait for shutdown to commence.
*/
static void rcu_perf_wait_shutdown(void)
{
cond_resched_rcu_qs();
if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
return;
while (!torture_must_stop())
schedule_timeout_uninterruptible(1);
}
/*
* RCU perf reader kthread. Repeatedly does empty RCU read-side
* critical section, minimizing update-side interference.
*/
static int
rcu_perf_reader(void *arg)
{
unsigned long flags;
int idx;
long me = (long)arg;
VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
set_user_nice(current, MAX_NICE);
atomic_inc(&n_rcu_perf_reader_started);
do {
local_irq_save(flags);
idx = cur_ops->readlock();
cur_ops->readunlock(idx);
local_irq_restore(flags);
rcu_perf_wait_shutdown();
} while (!torture_must_stop());
torture_kthread_stopping("rcu_perf_reader");
return 0;
}
/*
* RCU perf writer kthread. Repeatedly does a grace period.
*/
static int
rcu_perf_writer(void *arg)
{
int i = 0;
int i_max;
long me = (long)arg;
struct sched_param sp;
bool started = false, done = false, alldone = false;
u64 t;
u64 *wdp;
u64 *wdpp = writer_durations[me];
VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
WARN_ON(rcu_gp_is_normal() && gp_exp);
WARN_ON(!wdpp);
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
sp.sched_priority = 1;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
if (holdoff)
schedule_timeout_uninterruptible(holdoff * HZ);
t = ktime_get_mono_fast_ns();
if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
t_rcu_perf_writer_started = t;
if (gp_exp) {
b_rcu_perf_writer_started =
cur_ops->exp_completed() / 2;
} else {
b_rcu_perf_writer_started =
cur_ops->completed();
}
}
do {
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
if (gp_exp) {
rcu_perf_writer_state = RTWS_EXP_SYNC;
cur_ops->exp_sync();
} else {
rcu_perf_writer_state = RTWS_SYNC;
cur_ops->sync();
}
rcu_perf_writer_state = RTWS_IDLE;
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
i_max = i;
if (!started &&
atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
started = true;
if (!done && i >= MIN_MEAS) {
done = true;
sp.sched_priority = 0;
sched_setscheduler_nocheck(current,
SCHED_NORMAL, &sp);
pr_alert("%s" PERF_FLAG
"rcu_perf_writer %ld has %d measurements\n",
perf_type, me, MIN_MEAS);
if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
nrealwriters) {
schedule_timeout_interruptible(10);
rcu_ftrace_dump(DUMP_ALL);
PERFOUT_STRING("Test complete");
t_rcu_perf_writer_finished = t;
if (gp_exp) {
b_rcu_perf_writer_finished =
cur_ops->exp_completed() / 2;
} else {
b_rcu_perf_writer_finished =
cur_ops->completed();
}
if (shutdown) {
smp_mb(); /* Assign before wake. */
wake_up(&shutdown_wq);
}
}
}
if (done && !alldone &&
atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
alldone = true;
if (started && !alldone && i < MAX_MEAS - 1)
i++;
rcu_perf_wait_shutdown();
} while (!torture_must_stop());
rcu_perf_writer_state = RTWS_STOPPING;
writer_n_durations[me] = i_max;
torture_kthread_stopping("rcu_perf_writer");
return 0;
}
static inline void
rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
{
pr_alert("%s" PERF_FLAG
"--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
}
static void
rcu_perf_cleanup(void)
{
int i;
int j;
int ngps = 0;
u64 *wdp;
u64 *wdpp;
if (torture_cleanup_begin())
return;
if (reader_tasks) {
for (i = 0; i < nrealreaders; i++)
torture_stop_kthread(rcu_perf_reader,
reader_tasks[i]);
kfree(reader_tasks);
}
if (writer_tasks) {
for (i = 0; i < nrealwriters; i++) {
torture_stop_kthread(rcu_perf_writer,
writer_tasks[i]);
if (!writer_n_durations)
continue;
j = writer_n_durations[i];
pr_alert("%s%s writer %d gps: %d\n",
perf_type, PERF_FLAG, i, j);
ngps += j;
}
pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
perf_type, PERF_FLAG,
t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
t_rcu_perf_writer_finished -
t_rcu_perf_writer_started,
ngps,
b_rcu_perf_writer_finished -
b_rcu_perf_writer_started);
for (i = 0; i < nrealwriters; i++) {
if (!writer_durations)
break;
if (!writer_n_durations)
continue;
wdpp = writer_durations[i];
if (!wdpp)
continue;
for (j = 0; j <= writer_n_durations[i]; j++) {
wdp = &wdpp[j];
pr_alert("%s%s %4d writer-duration: %5d %llu\n",
perf_type, PERF_FLAG,
i, j, *wdp);
if (j % 100 == 0)
schedule_timeout_uninterruptible(1);
}
kfree(writer_durations[i]);
}
kfree(writer_tasks);
kfree(writer_durations);
kfree(writer_n_durations);
}
/* Do flavor-specific cleanup operations. */
if (cur_ops->cleanup != NULL)
cur_ops->cleanup();
torture_cleanup_end();
}
/*
* Return the number if non-negative. If -1, the number of CPUs.
* If less than -1, that much less than the number of CPUs, but
* at least one.
*/
static int compute_real(int n)
{
int nr;
if (n >= 0) {
nr = n;
} else {
nr = num_online_cpus() + 1 + n;
if (nr <= 0)
nr = 1;
}
return nr;
}
/*
* RCU perf shutdown kthread. Just waits to be awakened, then shuts
* down system.
*/
static int
rcu_perf_shutdown(void *arg)
{
do {
wait_event(shutdown_wq,
atomic_read(&n_rcu_perf_writer_finished) >=
nrealwriters);
} while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
smp_mb(); /* Wake before output. */
rcu_perf_cleanup();
kernel_power_off();
return -EINVAL;
}
static int __init
rcu_perf_init(void)
{
long i;
int firsterr = 0;
static struct rcu_perf_ops *perf_ops[] = {
&rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
RCUPERF_TASKS_OPS
};
if (!torture_init_begin(perf_type, verbose, &perf_runnable))
return -EBUSY;
/* Process args and tell the world that the perf'er is on the job. */
for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
cur_ops = perf_ops[i];
if (strcmp(perf_type, cur_ops->name) == 0)
break;
}
if (i == ARRAY_SIZE(perf_ops)) {
pr_alert("rcu-perf: invalid perf type: \"%s\"\n",
perf_type);
pr_alert("rcu-perf types:");
for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
pr_alert(" %s", perf_ops[i]->name);
pr_alert("\n");
firsterr = -EINVAL;
goto unwind;
}
if (cur_ops->init)
cur_ops->init();
nrealwriters = compute_real(nwriters);
nrealreaders = compute_real(nreaders);
atomic_set(&n_rcu_perf_reader_started, 0);
atomic_set(&n_rcu_perf_writer_started, 0);
atomic_set(&n_rcu_perf_writer_finished, 0);
rcu_perf_print_module_parms(cur_ops, "Start of test");
/* Start up the kthreads. */
if (shutdown) {
init_waitqueue_head(&shutdown_wq);
firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
shutdown_task);
if (firsterr)
goto unwind;
schedule_timeout_uninterruptible(1);
}
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_PERFOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < nrealreaders; i++) {
firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
reader_tasks[i]);
if (firsterr)
goto unwind;
}
while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
schedule_timeout_uninterruptible(1);
writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
GFP_KERNEL);
writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
GFP_KERNEL);
writer_n_durations =
kcalloc(nrealwriters, sizeof(*writer_n_durations),
GFP_KERNEL);
if (!writer_tasks || !writer_durations || !writer_n_durations) {
VERBOSE_PERFOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < nrealwriters; i++) {
writer_durations[i] =
kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
GFP_KERNEL);
if (!writer_durations[i])
goto unwind;
firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
writer_tasks[i]);
if (firsterr)
goto unwind;
}
torture_init_end();
return 0;
unwind:
torture_init_end();
rcu_perf_cleanup();
return firsterr;
}
module_init(rcu_perf_init);
module_exit(rcu_perf_cleanup);

View File

@ -130,8 +130,8 @@ static struct rcu_torture __rcu *rcu_torture_current;
static unsigned long rcu_torture_current_version;
static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
static DEFINE_SPINLOCK(rcu_torture_lock);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 };
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 };
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
static atomic_t n_rcu_torture_alloc;
static atomic_t n_rcu_torture_alloc_fail;
@ -916,7 +916,7 @@ rcu_torture_fqs(void *arg)
static int
rcu_torture_writer(void *arg)
{
bool can_expedite = !rcu_gp_is_expedited();
bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
int expediting = 0;
unsigned long gp_snap;
bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
@ -932,7 +932,7 @@ rcu_torture_writer(void *arg)
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite) {
pr_alert("%s" TORTURE_FLAG
" Grace periods expedited from boot/sysfs for %s,\n",
" GP expediting controlled from boot/sysfs for %s,\n",
torture_type, cur_ops->name);
pr_alert("%s" TORTURE_FLAG
" Disabled dynamic grace-period expediting.\n",
@ -1478,7 +1478,9 @@ static int rcu_torture_barrier_cbs(void *arg)
* The above smp_load_acquire() ensures barrier_phase load
* is ordered before the folloiwng ->call().
*/
local_irq_disable(); /* Just to test no-irq call_rcu(). */
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
local_irq_enable();
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
@ -1585,7 +1587,7 @@ static int rcutorture_cpu_notify(struct notifier_block *self,
{
long cpu = (long)hcpu;
switch (action) {
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
case CPU_DOWN_FAILED:
(void)rcutorture_booster_init(cpu);

View File

@ -102,6 +102,8 @@ struct rcu_state sname##_state = { \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.name = RCU_STATE_NAME(sname), \
.abbr = sabbr, \
.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
}
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@ -370,6 +372,21 @@ void rcu_all_qs(void)
rcu_momentary_dyntick_idle();
local_irq_restore(flags);
}
if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
/*
* Yes, we just checked a per-CPU variable with preemption
* enabled, so we might be migrated to some other CPU at
* this point. That is OK because in that case, the
* migration will supply the needed quiescent state.
* We might end up needlessly disabling preemption and
* invoking rcu_sched_qs() on the destination CPU, but
* the probability and cost are both quite low, so this
* should not be a problem in practice.
*/
preempt_disable();
rcu_sched_qs();
preempt_enable();
}
this_cpu_inc(rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
@ -385,9 +402,11 @@ module_param(qlowmark, long, 0444);
static ulong jiffies_till_first_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
module_param(jiffies_till_first_fqs, ulong, 0644);
module_param(jiffies_till_next_fqs, ulong, 0644);
module_param(rcu_kick_kthreads, bool, 0644);
/*
* How long the grace period must be before we start recruiting
@ -459,6 +478,28 @@ unsigned long rcu_batches_completed_bh(void)
}
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
* Return the number of RCU expedited batches completed thus far for
* debug & stats. Odd numbers mean that a batch is in progress, even
* numbers mean idle. The value returned will thus be roughly double
* the cumulative batches since boot.
*/
unsigned long rcu_exp_batches_completed(void)
{
return rcu_state_p->expedited_sequence;
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
/*
* Return the number of RCU-sched expedited batches completed thus far
* for debug & stats. Similar to rcu_exp_batches_completed().
*/
unsigned long rcu_exp_batches_completed_sched(void)
{
return rcu_sched_state.expedited_sequence;
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
/*
* Force a quiescent state.
*/
@ -1224,8 +1265,10 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
rsp->gp_flags,
gp_state_getname(rsp->gp_state), rsp->gp_state,
rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
if (rsp->gp_kthread)
if (rsp->gp_kthread) {
sched_show_task(rsp->gp_kthread);
wake_up_process(rsp->gp_kthread);
}
}
}
@ -1249,6 +1292,25 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
}
}
/*
* If too much time has passed in the current grace period, and if
* so configured, go kick the relevant kthreads.
*/
static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
{
unsigned long j;
if (!rcu_kick_kthreads)
return;
j = READ_ONCE(rsp->jiffies_kick_kthreads);
if (time_after(jiffies, j) && rsp->gp_kthread) {
WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
rcu_ftrace_dump(DUMP_ALL);
wake_up_process(rsp->gp_kthread);
WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ);
}
}
static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
{
int cpu;
@ -1260,6 +1322,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
struct rcu_node *rnp = rcu_get_root(rsp);
long totqlen = 0;
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads(rsp);
if (rcu_cpu_stall_suppress)
return;
/* Only let one CPU complain about others per time interval. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@ -1333,6 +1400,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
long totqlen = 0;
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads(rsp);
if (rcu_cpu_stall_suppress)
return;
/*
* OK, time to rat on ourselves...
* See Documentation/RCU/stallwarn.txt for info on how to debug
@ -1377,8 +1449,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
unsigned long js;
struct rcu_node *rnp;
if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
!rcu_gp_in_progress(rsp))
return;
rcu_stall_kick_kthreads(rsp);
j = jiffies;
/*
@ -2117,8 +2191,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
ret = 0;
for (;;) {
if (!ret)
if (!ret) {
rsp->jiffies_force_qs = jiffies + j;
WRITE_ONCE(rsp->jiffies_kick_kthreads,
jiffies + 3 * j);
}
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqswait"));
@ -2144,6 +2221,15 @@ static int __noreturn rcu_gp_kthread(void *arg)
TPS("fqsend"));
cond_resched_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
ret = 0; /* Force full wait till next FQS. */
j = jiffies_till_next_fqs;
if (j > HZ) {
j = HZ;
jiffies_till_next_fqs = HZ;
} else if (j < 1) {
j = 1;
jiffies_till_next_fqs = 1;
}
} else {
/* Deal with stray signal. */
cond_resched_rcu_qs();
@ -2152,14 +2238,12 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqswaitsig"));
}
j = jiffies_till_next_fqs;
if (j > HZ) {
j = HZ;
jiffies_till_next_fqs = HZ;
} else if (j < 1) {
j = 1;
jiffies_till_next_fqs = 1;
ret = 1; /* Keep old FQS timing. */
j = jiffies;
if (time_after(jiffies, rsp->jiffies_force_qs))
j = 1;
else
j = rsp->jiffies_force_qs - j;
}
}
@ -3376,8 +3460,12 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
unsigned long s;
smp_mb(); /* Caller's modifications seen first by other CPUs. */
return rcu_seq_snap(&rsp->expedited_sequence);
s = rcu_seq_snap(&rsp->expedited_sequence);
trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
return s;
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
{
@ -3469,7 +3557,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
* for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means.
*
* Caller must hold the root rcu_node's exp_funnel_mutex.
* Caller must hold the rcu_state's exp_mutex.
*/
static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{
@ -3485,8 +3573,8 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
* Caller must hold the root rcu_node's exp_funnel_mutex and the
* specified rcu_node structure's ->lock.
* Caller must hold the rcu_state's exp_mutex and the specified rcu_node
* structure's ->lock.
*/
static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake, unsigned long flags)
@ -3523,7 +3611,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
* Report expedited quiescent state for specified node. This is a
* lock-acquisition wrapper function for __rcu_report_exp_rnp().
*
* Caller must hold the root rcu_node's exp_funnel_mutex.
* Caller must hold the rcu_state's exp_mutex.
*/
static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
struct rcu_node *rnp, bool wake)
@ -3536,8 +3624,8 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
/*
* Report expedited quiescent state for multiple CPUs, all covered by the
* specified leaf rcu_node structure. Caller must hold the root
* rcu_node's exp_funnel_mutex.
* specified leaf rcu_node structure. Caller must hold the rcu_state's
* exp_mutex.
*/
static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long mask, bool wake)
@ -3555,7 +3643,6 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Report expedited quiescent state for specified rcu_data (CPU).
* Caller must hold the root rcu_node's exp_funnel_mutex.
*/
static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
bool wake)
@ -3564,15 +3651,11 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
}
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp,
atomic_long_t *stat, unsigned long s)
static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
unsigned long s)
{
if (rcu_exp_gp_seq_done(rsp, s)) {
if (rnp)
mutex_unlock(&rnp->exp_funnel_mutex);
else if (rdp)
mutex_unlock(&rdp->exp_funnel_mutex);
trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(stat);
@ -3582,59 +3665,65 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
}
/*
* Funnel-lock acquisition for expedited grace periods. Returns a
* pointer to the root rcu_node structure, or NULL if some other
* task did the expedited grace period for us.
* Funnel-lock acquisition for expedited grace periods. Returns true
* if some other task completed an expedited grace period that this task
* can piggy-back on, and with no mutex held. Otherwise, returns false
* with the mutex held, indicating that the caller must actually do the
* expedited grace period.
*/
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
struct rcu_node *rnp = rdp->mynode;
struct rcu_node *rnp_root = rcu_get_root(rsp);
/* Low-contention fastpath. */
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
(rnp == rnp_root ||
ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
!mutex_is_locked(&rsp->exp_mutex) &&
mutex_trylock(&rsp->exp_mutex))
goto fastpath;
/*
* First try directly acquiring the root lock in order to reduce
* latency in the common case where expedited grace periods are
* rare. We check mutex_is_locked() to avoid pathological levels of
* memory contention on ->exp_funnel_mutex in the heavy-load case.
* Each pass through the following loop works its way up
* the rcu_node tree, returning if others have done the work or
* otherwise falls through to acquire rsp->exp_mutex. The mapping
* from CPU to rcu_node structure can be inexact, as it is just
* promoting locality and is not strictly needed for correctness.
*/
rnp0 = rcu_get_root(rsp);
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
&rdp->expedited_workdone0, s))
return NULL;
return rnp0;
for (; rnp != NULL; rnp = rnp->parent) {
if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
return true;
/* Work not done, either wait here or go up. */
spin_lock(&rnp->exp_lock);
if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
/* Someone else doing GP, so wait for them. */
spin_unlock(&rnp->exp_lock);
trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
rnp->grplo, rnp->grphi,
TPS("wait"));
wait_event(rnp->exp_wq[(s >> 1) & 0x3],
sync_exp_work_done(rsp,
&rdp->exp_workdone2, s));
return true;
}
rnp->exp_seq_rq = s; /* Followers can wait on us. */
spin_unlock(&rnp->exp_lock);
trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
rnp->grphi, TPS("nxtlvl"));
}
/*
* Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the
* work or otherwise falls through holding the root rnp's
* ->exp_funnel_mutex. The mapping from CPU to rcu_node structure
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
*/
if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
return NULL;
mutex_lock(&rdp->exp_funnel_mutex);
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
&rdp->expedited_workdone2, s))
return NULL;
mutex_lock(&rnp0->exp_funnel_mutex);
if (rnp1)
mutex_unlock(&rnp1->exp_funnel_mutex);
else
mutex_unlock(&rdp->exp_funnel_mutex);
rnp1 = rnp0;
mutex_lock(&rsp->exp_mutex);
fastpath:
if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
mutex_unlock(&rsp->exp_mutex);
return true;
}
if (sync_exp_work_done(rsp, rnp1, rdp,
&rdp->expedited_workdone3, s))
return NULL;
return rnp1;
rcu_exp_gp_seq_start(rsp);
trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
return false;
}
/* Invoked on each online non-idle CPU for expedited quiescent state. */
@ -3649,6 +3738,11 @@ static void sync_sched_exp_handler(void *data)
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
return;
if (rcu_is_cpu_rrupt_from_idle()) {
rcu_report_exp_rdp(&rcu_sched_state,
this_cpu_ptr(&rcu_sched_data), true);
return;
}
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
resched_cpu(smp_processor_id());
}
@ -3773,7 +3867,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rsp->name);
ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
ndetected = rcu_print_task_exp_stall(rnp);
ndetected += rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
struct rcu_data *rdp;
@ -3783,7 +3877,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
"O."[cpu_online(cpu)],
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
}
@ -3792,7 +3886,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rsp->expedited_sequence,
rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
if (!ndetected) {
if (ndetected) {
pr_err("blocking rcu_node structures:");
rcu_for_each_node_breadth_first(rsp, rnp) {
if (rnp == rnp_root)
@ -3818,6 +3912,41 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
}
/*
* Wait for the current expedited grace period to complete, and then
* wake up everyone who piggybacked on the just-completed expedited
* grace period. Also update all the ->exp_seq_rq counters as needed
* in order to avoid counter-wrap problems.
*/
static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
{
struct rcu_node *rnp;
synchronize_sched_expedited_wait(rsp);
rcu_exp_gp_seq_end(rsp);
trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
/*
* Switch over to wakeup mode, allowing the next GP, but -only- the
* next GP, to proceed.
*/
mutex_lock(&rsp->exp_wake_mutex);
mutex_unlock(&rsp->exp_mutex);
rcu_for_each_node_breadth_first(rsp, rnp) {
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
spin_lock(&rnp->exp_lock);
/* Recheck, avoid hang in case someone just arrived. */
if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
rnp->exp_seq_rq = s;
spin_unlock(&rnp->exp_lock);
}
wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
}
trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
mutex_unlock(&rsp->exp_wake_mutex);
}
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
@ -3837,7 +3966,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
void synchronize_sched_expedited(void)
{
unsigned long s;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
/* If only one CPU, this is automatically a grace period. */
@ -3852,17 +3980,14 @@ void synchronize_sched_expedited(void)
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
rnp = exp_funnel_lock(rsp, s);
if (rnp == NULL)
if (exp_funnel_lock(rsp, s))
return; /* Someone else did our work for us. */
rcu_exp_gp_seq_start(rsp);
/* Initialize the rcu_node tree in preparation for the wait. */
sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
synchronize_sched_expedited_wait(rsp);
rcu_exp_gp_seq_end(rsp);
mutex_unlock(&rnp->exp_funnel_mutex);
/* Wait and clean up, including waking everyone. */
rcu_exp_wait_wake(rsp, s);
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
@ -4162,7 +4287,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
rdp->cpu = cpu;
rdp->rsp = rsp;
mutex_init(&rdp->exp_funnel_mutex);
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
@ -4420,10 +4544,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
{
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
static const char * const exp[] = RCU_EXP_NAME_INIT;
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
@ -4482,9 +4604,11 @@ static void __init rcu_init_one(struct rcu_state *rsp)
rnp->level = i;
INIT_LIST_HEAD(&rnp->blkd_tasks);
rcu_init_one_nocb(rnp);
mutex_init(&rnp->exp_funnel_mutex);
lockdep_set_class_and_name(&rnp->exp_funnel_mutex,
&rcu_exp_class[i], exp[i]);
init_waitqueue_head(&rnp->exp_wq[0]);
init_waitqueue_head(&rnp->exp_wq[1]);
init_waitqueue_head(&rnp->exp_wq[2]);
init_waitqueue_head(&rnp->exp_wq[3]);
spin_lock_init(&rnp->exp_lock);
}
}

View File

@ -70,7 +70,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
# define RCU_NODE_NAME_INIT { "rcu_node_0" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
# define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
#elif NR_CPUS <= RCU_FANOUT_2
# define RCU_NUM_LVLS 2
# define NUM_RCU_LVL_0 1
@ -79,7 +78,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
#elif NR_CPUS <= RCU_FANOUT_3
# define RCU_NUM_LVLS 3
# define NUM_RCU_LVL_0 1
@ -89,7 +87,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
#elif NR_CPUS <= RCU_FANOUT_4
# define RCU_NUM_LVLS 4
# define NUM_RCU_LVL_0 1
@ -100,7 +97,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
@ -252,7 +248,9 @@ struct rcu_node {
/* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp;
spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
unsigned long exp_seq_rq;
wait_queue_head_t exp_wq[4];
} ____cacheline_internodealigned_in_smp;
/*
@ -387,11 +385,9 @@ struct rcu_data {
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex;
atomic_long_t expedited_workdone0; /* # done by others #0. */
atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_workdone3; /* # done by others #3. */
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
@ -505,6 +501,8 @@ struct rcu_state {
/* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */
struct mutex exp_mutex; /* Serialize expedited GP. */
struct mutex exp_wake_mutex; /* Serialize wakeup. */
unsigned long expedited_sequence; /* Take a ticket. */
atomic_long_t expedited_normal; /* # fallbacks to normal. */
atomic_t expedited_need_qs; /* # CPUs left to check in. */
@ -513,6 +511,8 @@ struct rcu_state {
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long jiffies_kick_kthreads; /* Time at which to kick */
/* kthreads, if configured. */
unsigned long n_force_qs; /* Number of calls to */
/* force_quiescent_state(). */
unsigned long n_force_qs_lh; /* ~Number of calls leaving */

View File

@ -722,18 +722,22 @@ static void sync_rcu_exp_handler(void *info)
* synchronize_rcu_expedited - Brute-force RCU grace period
*
* Wait for an RCU-preempt grace period, but expedite it. The basic
* idea is to invoke synchronize_sched_expedited() to push all the tasks to
* the ->blkd_tasks lists and wait for this list to drain. This consumes
* significant time on all CPUs and is unfriendly to real-time workloads,
* so is thus not recommended for any sort of common-case code.
* In fact, if you are using synchronize_rcu_expedited() in a loop,
* please restructure your code to batch your updates, and then Use a
* single synchronize_rcu() instead.
* idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
* checks whether the CPU is in an RCU-preempt critical section, and
* if so, it sets a flag that causes the outermost rcu_read_unlock()
* to report the quiescent state. On the other hand, if the CPU is
* not in an RCU read-side critical section, the IPI handler reports
* the quiescent state immediately.
*
* Although this is a greate improvement over previous expedited
* implementations, it is still unfriendly to real-time workloads, so is
* thus not recommended for any sort of common-case code. In fact, if
* you are using synchronize_rcu_expedited() in a loop, please restructure
* your code to batch your updates, and then Use a single synchronize_rcu()
* instead.
*/
void synchronize_rcu_expedited(void)
{
struct rcu_node *rnp;
struct rcu_node *rnp_unlock;
struct rcu_state *rsp = rcu_state_p;
unsigned long s;
@ -744,23 +748,14 @@ void synchronize_rcu_expedited(void)
}
s = rcu_exp_gp_seq_snap(rsp);
rnp_unlock = exp_funnel_lock(rsp, s);
if (rnp_unlock == NULL)
if (exp_funnel_lock(rsp, s))
return; /* Someone else did our work for us. */
rcu_exp_gp_seq_start(rsp);
/* Initialize the rcu_node tree in preparation for the wait. */
sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
/* Wait for snapshotted ->blkd_tasks lists to drain. */
rnp = rcu_get_root(rsp);
synchronize_sched_expedited_wait(rsp);
/* Clean up and exit. */
rcu_exp_gp_seq_end(rsp);
mutex_unlock(&rnp_unlock->exp_funnel_mutex);
/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
rcu_exp_wait_wake(rsp, s);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

View File

@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
unsigned long s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
s0 += atomic_long_read(&rdp->expedited_workdone0);
s1 += atomic_long_read(&rdp->expedited_workdone1);
s2 += atomic_long_read(&rdp->expedited_workdone2);
s3 += atomic_long_read(&rdp->expedited_workdone3);
s1 += atomic_long_read(&rdp->exp_workdone1);
s2 += atomic_long_read(&rdp->exp_workdone2);
s3 += atomic_long_read(&rdp->exp_workdone3);
}
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence, s0, s1, s2, s3,
seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);

View File

@ -67,7 +67,7 @@ static int rcu_normal_after_boot;
module_param(rcu_normal_after_boot, int, 0);
#endif /* #ifndef CONFIG_TINY_RCU */
#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
* rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
*
@ -111,7 +111,7 @@ int rcu_read_lock_sched_held(void)
return 0;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
return lockdep_opinion || !preemptible();
}
EXPORT_SYMBOL(rcu_read_lock_sched_held);
#endif

View File

@ -451,6 +451,7 @@ static int torture_shutdown(void *arg)
torture_shutdown_hook();
else
VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
ftrace_dump(DUMP_ALL);
kernel_power_off(); /* Shut down the system. */
return 0;
}
@ -602,8 +603,9 @@ bool torture_init_begin(char *ttype, bool v, int *runnable)
{
mutex_lock(&fullstop_mutex);
if (torture_type != NULL) {
pr_alert("torture_init_begin: refusing %s init: %s running",
pr_alert("torture_init_begin: Refusing %s init: %s running.\n",
ttype, torture_type);
pr_alert("torture_init_begin: One torture test at a time!\n");
mutex_unlock(&fullstop_mutex);
return false;
}

View File

@ -1289,6 +1289,39 @@ config TORTURE_TEST
tristate
default n
config RCU_PERF_TEST
tristate "performance tests for RCU"
depends on DEBUG_KERNEL
select TORTURE_TEST
select SRCU
select TASKS_RCU
default n
help
This option provides a kernel module that runs performance
tests on the RCU infrastructure. The kernel module may be built
after the fact on the running kernel to be tested, if desired.
Say Y here if you want RCU performance tests to be built into
the kernel.
Say M if you want the RCU performance tests to build as a module.
Say N if you are unsure.
config RCU_PERF_TEST_RUNNABLE
bool "performance tests for RCU runnable by default"
depends on RCU_PERF_TEST = y
default n
help
This option provides a way to build the RCU performance tests
directly into the kernel without them starting up at boot time.
You can use /sys/module to manually override this setting.
This /proc file is available only when the RCU performance
tests have been built into the kernel.
Say Y here if you want the RCU performance tests to start during
boot (you probably don't).
Say N here if you want the RCU performance tests to start only
after being manually enabled via /sys/module.
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL

View File

@ -0,0 +1,90 @@
#!/bin/bash
#
# Alternate sleeping and spinning on randomly selected CPUs. The purpose
# of this script is to inflict random OS jitter on a concurrently running
# test.
#
# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
#
# me: Random-number-generator seed salt.
# duration: Time to run in seconds.
# sleepmax: Maximum microseconds to sleep, defaults to one second.
# spinmax: Maximum microseconds to spin, defaults to one millisecond.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2016
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
me=$(($1 * 1000))
duration=$2
sleepmax=${3-1000000}
spinmax=${4-1000}
n=1
starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
while :
do
# Check for done.
t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
if test "$t" -gt "$duration"
then
exit 0;
fi
# Set affinity to randomly selected CPU
cpus=`ls /sys/devices/system/cpu/*/online |
sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
grep -v '^0*$'`
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
curcpu = ca[int(rand() * ncpus + 1)];
mask = lshift(1, curcpu);
if (mask + 0 <= 0)
mask = 1;
printf("%#x\n", mask);
}' < /dev/null`
n=$(($n+1))
if ! taskset -p $cpumask $$ > /dev/null 2>&1
then
echo taskset failure: '"taskset -p ' $cpumask $$ '"'
exit 1
fi
# Sleep a random duration
sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
srand(n + me + systime());
printf("%06d", int(rand() * sleepmax));
}' < /dev/null`
n=$(($n+1))
sleep .$sleeptime
# Spin a random duration
limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
srand(n + me + systime());
printf("%06d", int(rand() * spinmax));
}' < /dev/null`
n=$(($n+1))
for i in {1..$limit}
do
echo > /dev/null
done
done
exit 1

View File

@ -0,0 +1,121 @@
#!/bin/bash
#
# Analyze a given results directory for rcuperf performance measurements,
# looking for ftrace data. Exits with 0 if data was found, analyzed, and
# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
# argument checking.
#
# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2016
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
i="$1"
. tools/testing/selftests/rcutorture/bin/functions.sh
if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
then
exit 10
fi
sed -e 's/^\[[^]]*]//' < $i/console.log |
grep 'us : rcu_exp_grace_period' |
sed -e 's/us : / : /' |
tr -d '\015' |
awk '
$8 == "start" {
if (starttask != "")
nlost++;
starttask = $1;
starttime = $3;
startseq = $7;
}
$8 == "end" {
if (starttask == $1 && startseq == $7) {
curgpdur = $3 - starttime;
gptimes[++n] = curgpdur;
gptaskcnt[starttask]++;
sum += curgpdur;
if (curgpdur > 1000)
print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
starttask = "";
} else {
# Lost a message or some such, reset.
starttask = "";
nlost++;
}
}
$8 == "done" {
piggybackcnt[$1]++;
}
END {
newNR = asort(gptimes);
if (newNR <= 0) {
print "No ftrace records found???"
exit 10;
}
pct50 = int(newNR * 50 / 100);
if (pct50 < 1)
pct50 = 1;
pct90 = int(newNR * 90 / 100);
if (pct90 < 1)
pct90 = 1;
pct99 = int(newNR * 99 / 100);
if (pct99 < 1)
pct99 = 1;
div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
print "Histogram bucket size: " div;
last = gptimes[1] - 10;
count = 0;
for (i = 1; i <= newNR; i++) {
current = div * int(gptimes[i] / div);
if (last == current) {
count++;
} else {
if (count > 0)
print last, count;
count = 1;
last = current;
}
}
if (count > 0)
print last, count;
print "Distribution of grace periods across tasks:";
for (i in gptaskcnt) {
print "\t" i, gptaskcnt[i];
nbatches += gptaskcnt[i];
}
ngps = nbatches;
print "Distribution of piggybacking across tasks:";
for (i in piggybackcnt) {
print "\t" i, piggybackcnt[i];
ngps += piggybackcnt[i];
}
print "Average grace-period duration: " sum / newNR " microseconds";
print "Minimum grace-period duration: " gptimes[1];
print "50th percentile grace-period duration: " gptimes[pct50];
print "90th percentile grace-period duration: " gptimes[pct90];
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
print "Computed from ftrace data.";
}'
exit 0

View File

@ -0,0 +1,96 @@
#!/bin/bash
#
# Analyze a given results directory for rcuperf performance measurements.
#
# Usage: kvm-recheck-rcuperf.sh resdir
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2016
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
i="$1"
if test -d $i
then
:
else
echo Unreadable results directory: $i
exit 1
fi
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. tools/testing/selftests/rcutorture/bin/functions.sh
if kvm-recheck-rcuperf-ftrace.sh $i
then
# ftrace data was successfully analyzed, call it good!
exit 0
fi
configfile=`echo $i | sed -e 's/^.*\///'`
sed -e 's/^\[[^]]*]//' < $i/console.log |
awk '
/-perf: .* gps: .* batches:/ {
ngps = $9;
nbatches = $11;
}
/-perf: .*writer-duration/ {
gptimes[++n] = $5 / 1000.;
sum += $5 / 1000.;
}
END {
newNR = asort(gptimes);
if (newNR <= 0) {
print "No rcuperf records found???"
exit;
}
pct50 = int(newNR * 50 / 100);
if (pct50 < 1)
pct50 = 1;
pct90 = int(newNR * 90 / 100);
if (pct90 < 1)
pct90 = 1;
pct99 = int(newNR * 99 / 100);
if (pct99 < 1)
pct99 = 1;
div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
print "Histogram bucket size: " div;
last = gptimes[1] - 10;
count = 0;
for (i = 1; i <= newNR; i++) {
current = div * int(gptimes[i] / div);
if (last == current) {
count++;
} else {
if (count > 0)
print last, count;
count = 1;
last = current;
}
}
if (count > 0)
print last, count;
print "Average grace-period duration: " sum / newNR " microseconds";
print "Minimum grace-period duration: " gptimes[1];
print "50th percentile grace-period duration: " gptimes[pct50];
print "90th percentile grace-period duration: " gptimes[pct90];
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
print "Computed from rcuperf printk output.";
}'

View File

@ -48,7 +48,10 @@ do
cat $i/Make.oldconfig.err
fi
parse-build.sh $i/Make.out $configfile
parse-torture.sh $i/console.log $configfile
if test "$TORTURE_SUITE" != rcuperf
then
parse-torture.sh $i/console.log $configfile
fi
parse-console.sh $i/console.log $configfile
if test -r $i/Warnings
then

View File

@ -6,7 +6,7 @@
# Execute this in the source tree. Do not run it as a background task
# because qemu does not seem to like that much.
#
# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args
# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
#
# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with
# arguments specifying the number of CPUs and other
@ -91,25 +91,33 @@ fi
# CONFIG_PCMCIA=n
# CONFIG_CARDBUS=n
# CONFIG_YENTA=n
if kvm-build.sh $config_template $builddir $T
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
then
# Rerunning previous test, so use that test's kernel.
QEMU="`identify_qemu $base_resdir/vmlinux`"
KERNEL=$base_resdir/bzImage
ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
elif kvm-build.sh $config_template $builddir $T
then
# Had to build a kernel for this test.
QEMU="`identify_qemu $builddir/vmlinux`"
BOOT_IMAGE="`identify_boot_image $QEMU`"
cp $builddir/Make*.out $resdir
cp $builddir/vmlinux $resdir
cp $builddir/.config $resdir
if test -n "$BOOT_IMAGE"
then
cp $builddir/$BOOT_IMAGE $resdir
KERNEL=$resdir/bzImage
else
echo No identifiable boot image, not running KVM, see $resdir.
echo Do the torture scripts know about your architecture?
fi
parse-build.sh $resdir/Make.out $title
if test -f $builddir.wait
then
mv $builddir.wait $builddir.ready
fi
else
# Build failed.
cp $builddir/Make*.out $resdir
cp $builddir/.config $resdir || :
echo Build failed, not running KVM, see $resdir.
@ -119,12 +127,15 @@ else
fi
exit 1
fi
if test -f $builddir.wait
then
mv $builddir.wait $builddir.ready
fi
while test -f $builddir.ready
do
sleep 1
done
minutes=$4
seconds=$(($minutes * 60))
seconds=$4
qemu_args=$5
boot_args=$6
@ -167,15 +178,26 @@ then
exit 0
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
qemu_pid=$!
echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
echo Monitoring qemu job at pid $qemu_pid
sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid"
then
qemu_pid=`cat "$resdir/qemu_pid"`
echo Monitoring qemu job at pid $qemu_pid
else
qemu_pid=""
echo Monitoring qemu job at yet-as-unknown pid
fi
while :
do
if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
then
qemu_pid=`cat "$resdir/qemu_pid"`
fi
kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if kill -0 $qemu_pid > /dev/null 2>&1
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
then
if test $kruntime -ge $seconds
then
@ -195,12 +217,16 @@ do
ps -fp $killpid >> $resdir/Warnings 2>&1
fi
else
echo ' ---' `date`: Kernel done
echo ' ---' `date`: "Kernel done"
fi
break
fi
done
if test $commandcompleted -eq 0
if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
then
qemu_pid=`cat "$resdir/qemu_pid"`
fi
if test $commandcompleted -eq 0 -a -n "$qemu_pid"
then
echo Grace period for qemu job at pid $qemu_pid
while :
@ -220,6 +246,9 @@ then
fi
sleep 1
done
elif test -z "$qemu_pid"
then
echo Unknown PID, cannot kill qemu command
fi
parse-torture.sh $resdir/console.log $title

View File

@ -34,7 +34,7 @@ T=/tmp/kvm.sh.$$
trap 'rm -rf $T' 0
mkdir $T
dur=30
dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
@ -48,6 +48,7 @@ resdir=""
configs=""
cpus=0
ds=`date +%Y.%m.%d-%H:%M:%S`
jitter=0
. functions.sh
@ -63,6 +64,7 @@ usage () {
echo " --dryrun sched|script"
echo " --duration minutes"
echo " --interactive"
echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
echo " --no-initrd"
@ -116,12 +118,17 @@ do
;;
--duration)
checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
dur=$2
dur=$(($2*60))
shift
;;
--interactive)
TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
;;
--jitter)
checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
jitter="$2"
shift
;;
--kmake-arg)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
TORTURE_KMAKE_ARG="$2"
@ -156,7 +163,7 @@ do
shift
;;
--torture)
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
TORTURE_SUITE=$2
shift
;;
@ -299,6 +306,7 @@ awk < $T/cfgcpu.pack \
-v CONFIGDIR="$CONFIGFRAG/" \
-v KVM="$KVM" \
-v ncpus=$cpus \
-v jitter="$jitter" \
-v rd=$resdir/$ds/ \
-v dur=$dur \
-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
@ -359,6 +367,16 @@ function dump(first, pastlast, batchnum)
print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log";
print "fi"
}
njitter = 0;
split(jitter, ja);
if (ja[1] == -1 && ncpus == 0)
njitter = 1;
else if (ja[1] == -1)
njitter = ncpus;
else
njitter = ja[1];
for (j = 0; j < njitter; j++)
print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
print "wait"
print "if test -z \"$TORTURE_BUILDONLY\""
print "then"

View File

@ -14,7 +14,7 @@ CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=4
CONFIG_RCU_FANOUT_LEAF=4
CONFIG_RCU_FANOUT_LEAF=3
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n

View File

@ -1 +1 @@
rcutorture.torture_type=rcu_bh
rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4

View File

@ -0,0 +1 @@
TREE

View File

@ -0,0 +1,2 @@
CONFIG_RCU_PERF_TEST=y
CONFIG_PRINTK_TIME=y

View File

@ -0,0 +1,20 @@
CONFIG_SMP=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
#CHECK#CONFIG_PREEMPT_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_TRACE=y

View File

@ -0,0 +1,23 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=54
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
#CHECK#CONFIG_PREEMPT_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_TRACE=y

View File

@ -0,0 +1,52 @@
#!/bin/bash
#
# Torture-suite-dependent shell functions for the rest of the scripts.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2015
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
# rcuperf_param_nreaders bootparam-string
#
# Adds nreaders rcuperf module parameter if not already specified.
rcuperf_param_nreaders () {
if ! echo "$1" | grep -q "rcuperf.nreaders"
then
echo rcuperf.nreaders=-1
fi
}
# rcuperf_param_nwriters bootparam-string
#
# Adds nwriters rcuperf module parameter if not already specified.
rcuperf_param_nwriters () {
if ! echo "$1" | grep -q "rcuperf.nwriters"
then
echo rcuperf.nwriters=-1
fi
}
# per_version_boot_params bootparam-string config-file seconds
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 `rcuperf_param_nreaders "$1"` \
`rcuperf_param_nwriters "$1"` \
rcuperf.perf_runnable=1 \
rcuperf.shutdown=1 \
rcuperf.verbose=1
}