OpenCloudOS-Kernel/kernel/torture.c

959 lines
25 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0+
/*
* Common functions for in-kernel torture tests.
*
* Copyright (C) IBM Corporation, 2014
*
* Author: Paul E. McKenney <paulmck@linux.ibm.com>
* Based on kernel/rcu/torture.c.
*/
#define pr_fmt(fmt) fmt
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/freezer.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/trace_clock.h>
#include <linux/ktime.h>
#include <asm/byteorder.h>
#include <linux/torture.h>
#include "rcu/rcu.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
static bool disable_onoff_at_boot;
module_param(disable_onoff_at_boot, bool, 0444);
static bool ftrace_dump_at_shutdown;
module_param(ftrace_dump_at_shutdown, bool, 0444);
static int verbose_sleep_frequency;
module_param(verbose_sleep_frequency, int, 0444);
static int verbose_sleep_duration = 1;
module_param(verbose_sleep_duration, int, 0444);
static char *torture_type;
static int verbose;
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
#define FULLSTOP_SHUTDOWN 1 /* System shutdown with torture running. */
#define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */
static int fullstop = FULLSTOP_RMMOD;
static DEFINE_MUTEX(fullstop_mutex);
static atomic_t verbose_sleep_counter;
/*
* Sleep if needed from VERBOSE_TOROUT*().
*/
void verbose_torout_sleep(void)
{
if (verbose_sleep_frequency > 0 &&
verbose_sleep_duration > 0 &&
!(atomic_inc_return(&verbose_sleep_counter) % verbose_sleep_frequency))
schedule_timeout_uninterruptible(verbose_sleep_duration);
}
EXPORT_SYMBOL_GPL(verbose_torout_sleep);
/*
* Schedule a high-resolution-timer sleep in nanoseconds, with a 32-bit
* nanosecond random fuzz. This function and its friends desynchronize
* testing from the timer wheel.
*/
int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp)
{
ktime_t hto = baset_ns;
if (trsp)
hto += (torture_random(trsp) >> 3) % fuzzt_ns;
set_current_state(TASK_UNINTERRUPTIBLE);
return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);
/*
* Schedule a high-resolution-timer sleep in microseconds, with a 32-bit
* nanosecond (not microsecond!) random fuzz.
*/
int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp)
{
ktime_t baset_ns = baset_us * NSEC_PER_USEC;
return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_us);
/*
* Schedule a high-resolution-timer sleep in milliseconds, with a 32-bit
* microsecond (not millisecond!) random fuzz.
*/
int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp)
{
ktime_t baset_ns = baset_ms * NSEC_PER_MSEC;
u32 fuzzt_ns;
if ((u32)~0U / NSEC_PER_USEC < fuzzt_us)
fuzzt_ns = (u32)~0U;
else
fuzzt_ns = fuzzt_us * NSEC_PER_USEC;
return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_ms);
/*
* Schedule a high-resolution-timer sleep in jiffies, with an
* implied one-jiffy random fuzz. This is intended to replace calls to
* schedule_timeout_interruptible() and friends.
*/
int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp)
{
ktime_t baset_ns = jiffies_to_nsecs(baset_j);
return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), trsp);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_jiffies);
/*
* Schedule a high-resolution-timer sleep in milliseconds, with a 32-bit
* millisecond (not second!) random fuzz.
*/
int torture_hrtimeout_s(u32 baset_s, u32 fuzzt_ms, struct torture_random_state *trsp)
{
ktime_t baset_ns = baset_s * NSEC_PER_SEC;
u32 fuzzt_ns;
if ((u32)~0U / NSEC_PER_MSEC < fuzzt_ms)
fuzzt_ns = (u32)~0U;
else
fuzzt_ns = fuzzt_ms * NSEC_PER_MSEC;
return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_s);
#ifdef CONFIG_HOTPLUG_CPU
/*
* Variables for online-offline handling. Only present if CPU hotplug
* is enabled, otherwise does nothing.
*/
static struct task_struct *onoff_task;
static long onoff_holdoff;
static long onoff_interval;
static torture_ofl_func *onoff_f;
static long n_offline_attempts;
static long n_offline_successes;
static unsigned long sum_offline;
static int min_offline = -1;
static int max_offline;
static long n_online_attempts;
static long n_online_successes;
static unsigned long sum_online;
static int min_online = -1;
static int max_online;
static int torture_online_cpus = NR_CPUS;
/*
* Some torture testing leverages confusion as to the number of online
* CPUs. This function returns the torture-testing view of this number,
* which allows torture tests to load-balance appropriately.
*/
int torture_num_online_cpus(void)
{
return READ_ONCE(torture_online_cpus);
}
EXPORT_SYMBOL_GPL(torture_num_online_cpus);
/*
* Attempt to take a CPU offline. Return false if the CPU is already
* offline or if it is not subject to CPU-hotplug operations. The
* caller can detect other failures by looking at the statistics.
*/
bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
unsigned long *sum_offl, int *min_offl, int *max_offl)
{
unsigned long delta;
int ret;
char *s;
unsigned long starttime;
if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
return false;
if (num_online_cpus() <= 1)
return false; /* Can't offline the last CPU. */
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: offlining %d\n",
torture_type, cpu);
starttime = jiffies;
(*n_offl_attempts)++;
ret = remove_cpu(cpu);
if (ret) {
s = "";
if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) {
// PCI probe frequently disables hotplug during boot.
(*n_offl_attempts)--;
s = " (-EBUSY forgiven during boot)";
}
if (verbose)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: offline %d failed%s: errno %d\n",
torture_type, cpu, s, ret);
} else {
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: offlined %d\n",
torture_type, cpu);
if (onoff_f)
onoff_f();
(*n_offl_successes)++;
delta = jiffies - starttime;
*sum_offl += delta;
if (*min_offl < 0) {
*min_offl = delta;
*max_offl = delta;
}
if (*min_offl > delta)
*min_offl = delta;
if (*max_offl < delta)
*max_offl = delta;
WRITE_ONCE(torture_online_cpus, torture_online_cpus - 1);
WARN_ON_ONCE(torture_online_cpus <= 0);
}
return true;
}
EXPORT_SYMBOL_GPL(torture_offline);
/*
* Attempt to bring a CPU online. Return false if the CPU is already
* online or if it is not subject to CPU-hotplug operations. The
* caller can detect other failures by looking at the statistics.
*/
bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
unsigned long *sum_onl, int *min_onl, int *max_onl)
{
unsigned long delta;
int ret;
char *s;
unsigned long starttime;
if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
return false;
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: onlining %d\n",
torture_type, cpu);
starttime = jiffies;
(*n_onl_attempts)++;
ret = add_cpu(cpu);
if (ret) {
s = "";
if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) {
// PCI probe frequently disables hotplug during boot.
(*n_onl_attempts)--;
s = " (-EBUSY forgiven during boot)";
}
if (verbose)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: online %d failed%s: errno %d\n",
torture_type, cpu, s, ret);
} else {
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
"torture_onoff task: onlined %d\n",
torture_type, cpu);
(*n_onl_successes)++;
delta = jiffies - starttime;
*sum_onl += delta;
if (*min_onl < 0) {
*min_onl = delta;
*max_onl = delta;
}
if (*min_onl > delta)
*min_onl = delta;
if (*max_onl < delta)
*max_onl = delta;
WRITE_ONCE(torture_online_cpus, torture_online_cpus + 1);
}
return true;
}
EXPORT_SYMBOL_GPL(torture_online);
/*
* Get everything online at the beginning and ends of tests.
*/
static void torture_online_all(char *phase)
{
int cpu;
int ret;
for_each_possible_cpu(cpu) {
if (cpu_online(cpu))
continue;
ret = add_cpu(cpu);
if (ret && verbose) {
pr_alert("%s" TORTURE_FLAG
"%s: %s online %d: errno %d\n",
__func__, phase, torture_type, cpu, ret);
}
}
}
/*
* Execute random CPU-hotplug operations at the interval specified
* by the onoff_interval.
*/
static int
torture_onoff(void *arg)
{
int cpu;
int maxcpu = -1;
DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("torture_onoff task started");
for_each_online_cpu(cpu)
maxcpu = cpu;
WARN_ON(maxcpu < 0);
torture_online_all("Initial");
if (maxcpu == 0) {
VERBOSE_TOROUT_STRING("Only one CPU, so CPU-hotplug testing is disabled");
goto stop;
}
if (onoff_holdoff > 0) {
VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
schedule_timeout_interruptible(onoff_holdoff);
VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
}
while (!torture_must_stop()) {
if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) {
schedule_timeout_interruptible(HZ / 10);
continue;
}
cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
if (!torture_offline(cpu,
&n_offline_attempts, &n_offline_successes,
&sum_offline, &min_offline, &max_offline))
torture_online(cpu,
&n_online_attempts, &n_online_successes,
&sum_online, &min_online, &max_online);
schedule_timeout_interruptible(onoff_interval);
}
stop:
torture_kthread_stopping("torture_onoff");
torture_online_all("Final");
return 0;
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
/*
* Initiate online-offline handling.
*/
int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f)
{
#ifdef CONFIG_HOTPLUG_CPU
onoff_holdoff = ooholdoff;
onoff_interval = oointerval;
onoff_f = f;
if (onoff_interval <= 0)
return 0;
return torture_create_kthread(torture_onoff, NULL, onoff_task);
#else /* #ifdef CONFIG_HOTPLUG_CPU */
return 0;
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
}
EXPORT_SYMBOL_GPL(torture_onoff_init);
/*
* Clean up after online/offline testing.
*/
static void torture_onoff_cleanup(void)
{
#ifdef CONFIG_HOTPLUG_CPU
if (onoff_task == NULL)
return;
VERBOSE_TOROUT_STRING("Stopping torture_onoff task");
kthread_stop(onoff_task);
onoff_task = NULL;
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
}
/*
* Print online/offline testing statistics.
*/
void torture_onoff_stats(void)
{
#ifdef CONFIG_HOTPLUG_CPU
pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
n_online_successes, n_online_attempts,
n_offline_successes, n_offline_attempts,
min_online, max_online,
min_offline, max_offline,
sum_online, sum_offline, HZ);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
}
EXPORT_SYMBOL_GPL(torture_onoff_stats);
/*
* Were all the online/offline operations successful?
*/
bool torture_onoff_failures(void)
{
#ifdef CONFIG_HOTPLUG_CPU
return n_online_successes != n_online_attempts ||
n_offline_successes != n_offline_attempts;
#else /* #ifdef CONFIG_HOTPLUG_CPU */
return false;
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
}
EXPORT_SYMBOL_GPL(torture_onoff_failures);
#define TORTURE_RANDOM_MULT 39916801 /* prime */
#define TORTURE_RANDOM_ADD 479001701 /* prime */
#define TORTURE_RANDOM_REFRESH 10000
/*
* Crude but fast random-number generator. Uses a linear congruential
* generator, with occasional help from cpu_clock().
*/
unsigned long
torture_random(struct torture_random_state *trsp)
{
if (--trsp->trs_count < 0) {
trsp->trs_state += (unsigned long)local_clock() + raw_smp_processor_id();
trsp->trs_count = TORTURE_RANDOM_REFRESH;
}
trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT +
TORTURE_RANDOM_ADD;
return swahw32(trsp->trs_state);
}
EXPORT_SYMBOL_GPL(torture_random);
/*
* Variables for shuffling. The idea is to ensure that each CPU stays
* idle for an extended period to test interactions with dyntick idle,
* as well as interactions with any per-CPU variables.
*/
struct shuffle_task {
struct list_head st_l;
struct task_struct *st_t;
};
static long shuffle_interval; /* In jiffies. */
static struct task_struct *shuffler_task;
static cpumask_var_t shuffle_tmp_mask;
static int shuffle_idle_cpu; /* Force all torture tasks off this CPU */
static struct list_head shuffle_task_list = LIST_HEAD_INIT(shuffle_task_list);
static DEFINE_MUTEX(shuffle_task_mutex);
/*
* Register a task to be shuffled. If there is no memory, just splat
* and don't bother registering.
*/
void torture_shuffle_task_register(struct task_struct *tp)
{
struct shuffle_task *stp;
if (WARN_ON_ONCE(tp == NULL))
return;
stp = kmalloc(sizeof(*stp), GFP_KERNEL);
if (WARN_ON_ONCE(stp == NULL))
return;
stp->st_t = tp;
mutex_lock(&shuffle_task_mutex);
list_add(&stp->st_l, &shuffle_task_list);
mutex_unlock(&shuffle_task_mutex);
}
EXPORT_SYMBOL_GPL(torture_shuffle_task_register);
/*
* Unregister all tasks, for example, at the end of the torture run.
*/
static void torture_shuffle_task_unregister_all(void)
{
struct shuffle_task *stp;
struct shuffle_task *p;
mutex_lock(&shuffle_task_mutex);
list_for_each_entry_safe(stp, p, &shuffle_task_list, st_l) {
list_del(&stp->st_l);
kfree(stp);
}
mutex_unlock(&shuffle_task_mutex);
}
/* Shuffle tasks such that we allow shuffle_idle_cpu to become idle.
* A special case is when shuffle_idle_cpu = -1, in which case we allow
* the tasks to run on all CPUs.
*/
static void torture_shuffle_tasks(void)
{
struct shuffle_task *stp;
cpumask_setall(shuffle_tmp_mask);
cpus_read_lock();
/* No point in shuffling if there is only one online CPU (ex: UP) */
if (num_online_cpus() == 1) {
cpus_read_unlock();
return;
}
/* Advance to the next CPU. Upon overflow, don't idle any CPUs. */
shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask);
if (shuffle_idle_cpu >= nr_cpu_ids)
shuffle_idle_cpu = -1;
else
cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
mutex_lock(&shuffle_task_mutex);
list_for_each_entry(stp, &shuffle_task_list, st_l)
set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
mutex_unlock(&shuffle_task_mutex);
cpus_read_unlock();
}
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
* system to become idle at a time and cut off its timer ticks. This is meant
* to test the support for such tickless idle CPU in RCU.
*/
static int torture_shuffle(void *arg)
{
VERBOSE_TOROUT_STRING("torture_shuffle task started");
do {
schedule_timeout_interruptible(shuffle_interval);
torture_shuffle_tasks();
torture_shutdown_absorb("torture_shuffle");
} while (!torture_must_stop());
torture_kthread_stopping("torture_shuffle");
return 0;
}
/*
* Start the shuffler, with shuffint in jiffies.
*/
int torture_shuffle_init(long shuffint)
{
shuffle_interval = shuffint;
shuffle_idle_cpu = -1;
if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
TOROUT_ERRSTRING("Failed to alloc mask");
return -ENOMEM;
}
/* Create the shuffler thread */
return torture_create_kthread(torture_shuffle, NULL, shuffler_task);
}
EXPORT_SYMBOL_GPL(torture_shuffle_init);
/*
* Stop the shuffling.
*/
static void torture_shuffle_cleanup(void)
{
torture_shuffle_task_unregister_all();
if (shuffler_task) {
VERBOSE_TOROUT_STRING("Stopping torture_shuffle task");
kthread_stop(shuffler_task);
free_cpumask_var(shuffle_tmp_mask);
}
shuffler_task = NULL;
}
/*
* Variables for auto-shutdown. This allows "lights out" torture runs
* to be fully scripted.
*/
static struct task_struct *shutdown_task;
static ktime_t shutdown_time; /* time to system shutdown. */
static void (*torture_shutdown_hook)(void);
/*
* Absorb kthreads into a kernel function that won't return, so that
* they won't ever access module text or data again.
*/
void torture_shutdown_absorb(const char *title)
{
while (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
pr_notice("torture thread %s parking due to system shutdown\n",
title);
schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
}
}
EXPORT_SYMBOL_GPL(torture_shutdown_absorb);
/*
* Cause the torture test to shutdown the system after the test has
* run for the time specified by the shutdown_secs parameter.
*/
static int torture_shutdown(void *arg)
{
ktime_t ktime_snap;
VERBOSE_TOROUT_STRING("torture_shutdown task started");
ktime_snap = ktime_get();
while (ktime_before(ktime_snap, shutdown_time) &&
!torture_must_stop()) {
if (verbose)
pr_alert("%s" TORTURE_FLAG
"torture_shutdown task: %llu ms remaining\n",
torture_type,
ktime_ms_delta(shutdown_time, ktime_snap));
set_current_state(TASK_INTERRUPTIBLE);
schedule_hrtimeout(&shutdown_time, HRTIMER_MODE_ABS);
ktime_snap = ktime_get();
}
if (torture_must_stop()) {
torture_kthread_stopping("torture_shutdown");
return 0;
}
/* OK, shut down the system. */
VERBOSE_TOROUT_STRING("torture_shutdown task shutting down system");
shutdown_task = NULL; /* Avoid self-kill deadlock. */
if (torture_shutdown_hook)
torture_shutdown_hook();
else
VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
if (ftrace_dump_at_shutdown)
rcu_ftrace_dump(DUMP_ALL);
kernel_power_off(); /* Shut down the system. */
return 0;
}
/*
* Start up the shutdown task.
*/
int torture_shutdown_init(int ssecs, void (*cleanup)(void))
{
torture_shutdown_hook = cleanup;
if (ssecs > 0) {
shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0));
return torture_create_kthread(torture_shutdown, NULL,
shutdown_task);
}
return 0;
}
EXPORT_SYMBOL_GPL(torture_shutdown_init);
/*
* Detect and respond to a system shutdown.
*/
static int torture_shutdown_notify(struct notifier_block *unused1,
unsigned long unused2, void *unused3)
{
mutex_lock(&fullstop_mutex);
if (READ_ONCE(fullstop) == FULLSTOP_DONTSTOP) {
VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected");
WRITE_ONCE(fullstop, FULLSTOP_SHUTDOWN);
} else {
pr_warn("Concurrent rmmod and shutdown illegal!\n");
}
mutex_unlock(&fullstop_mutex);
return NOTIFY_DONE;
}
static struct notifier_block torture_shutdown_nb = {
.notifier_call = torture_shutdown_notify,
};
/*
* Shut down the shutdown task. Say what??? Heh! This can happen if
* the torture module gets an rmmod before the shutdown time arrives. ;-)
*/
static void torture_shutdown_cleanup(void)
{
unregister_reboot_notifier(&torture_shutdown_nb);
if (shutdown_task != NULL) {
VERBOSE_TOROUT_STRING("Stopping torture_shutdown task");
kthread_stop(shutdown_task);
}
shutdown_task = NULL;
}
/*
* Variables for stuttering, which means to periodically pause and
* restart testing in order to catch bugs that appear when load is
* suddenly applied to or removed from the system.
*/
static struct task_struct *stutter_task;
static int stutter_pause_test;
static int stutter;
static int stutter_gap;
/*
* Block until the stutter interval ends. This must be called periodically
* by all running kthreads that need to be subject to stuttering.
*/
bool stutter_wait(const char *title)
{
unsigned int i = 0;
bool ret = false;
int spt;
rcu: Rename cond_resched_rcu_qs() to cond_resched_tasks_rcu_qs() Commit e31d28b6ab8f ("trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched()") substituted cond_resched() for the earlier call to cond_resched_rcu_qs(). However, the new-age cond_resched() does not do anything to help RCU-tasks grace periods because (1) RCU-tasks is only enabled when CONFIG_PREEMPT=y and (2) cond_resched() is a complete no-op when preemption is enabled. This situation results in hangs when running the trace benchmarks. A number of potential fixes were discussed on LKML (https://lkml.kernel.org/r/20180224151240.0d63a059@vmware.local.home), including making cond_resched() not be a no-op; making cond_resched() not be a no-op, but only when running tracing benchmarks; reverting the aforementioned commit (which works because cond_resched_rcu_qs() does provide an RCU-tasks quiescent state; and adding a call to the scheduler/RCU rcu_note_voluntary_context_switch() function. All were deemed unsatisfactory, either due to added cond_resched() overhead or due to magic functions inviting cargo culting. This commit renames cond_resched_rcu_qs() to cond_resched_tasks_rcu_qs(), which provides a clear hint as to what this function is doing and why and where it should be used, and then replaces the call to cond_resched() with cond_resched_tasks_rcu_qs() in the trace benchmark's benchmark_event_kthread() function. Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Nicholas Piggin <npiggin@gmail.com>
2018-03-03 08:35:27 +08:00
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
if (!ret) {
sched_set_normal(current, MAX_NICE);
ret = true;
}
if (spt == 1) {
schedule_timeout_interruptible(1);
} else if (spt == 2) {
while (READ_ONCE(stutter_pause_test)) {
if (!(i++ & 0xffff))
torture_hrtimeout_us(10, 0, NULL);
cond_resched();
}
} else {
schedule_timeout_interruptible(round_jiffies_relative(HZ));
}
torture_shutdown_absorb(title);
}
return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
/*
* Cause the torture test to "stutter", starting and stopping all
* threads periodically.
*/
static int torture_stutter(void *arg)
{
DEFINE_TORTURE_RANDOM(rand);
int wtime;
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
wtime = stutter;
if (stutter > 2) {
WRITE_ONCE(stutter_pause_test, 1);
wtime = stutter - 3;
torture_hrtimeout_jiffies(wtime, &rand);
wtime = 2;
}
WRITE_ONCE(stutter_pause_test, 2);
torture_hrtimeout_jiffies(wtime, NULL);
}
WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
torture_hrtimeout_jiffies(stutter_gap, NULL);
torture_shutdown_absorb("torture_stutter");
} while (!torture_must_stop());
torture_kthread_stopping("torture_stutter");
return 0;
}
/*
* Initialize and kick off the torture_stutter kthread.
*/
int torture_stutter_init(const int s, const int sgap)
{
stutter = s;
stutter_gap = sgap;
return torture_create_kthread(torture_stutter, NULL, stutter_task);
}
EXPORT_SYMBOL_GPL(torture_stutter_init);
/*
* Cleanup after the torture_stutter kthread.
*/
static void torture_stutter_cleanup(void)
{
if (!stutter_task)
return;
VERBOSE_TOROUT_STRING("Stopping torture_stutter task");
kthread_stop(stutter_task);
stutter_task = NULL;
}
/*
* Initialize torture module. Please note that this is -not- invoked via
* the usual module_init() mechanism, but rather by an explicit call from
* the client torture module. This call must be paired with a later
* torture_init_end().
*
* The runnable parameter points to a flag that controls whether or not
* the test is currently runnable. If there is no such flag, pass in NULL.
*/
bool torture_init_begin(char *ttype, int v)
{
mutex_lock(&fullstop_mutex);
if (torture_type != NULL) {
pr_alert("%s: Refusing %s init: %s running.\n",
__func__, ttype, torture_type);
pr_alert("%s: One torture test at a time!\n", __func__);
mutex_unlock(&fullstop_mutex);
return false;
}
torture_type = ttype;
verbose = v;
fullstop = FULLSTOP_DONTSTOP;
return true;
}
EXPORT_SYMBOL_GPL(torture_init_begin);
/*
* Tell the torture module that initialization is complete.
*/
torture: Remove __init from torture_init_begin/end Loading rcutorture as a module (as opposed to building it directly into the kernel) results in the following splat: [Wed Apr 16 15:29:33 2014] BUG: unable to handle kernel paging request at ffffffffa0003000 [Wed Apr 16 15:29:33 2014] IP: [<ffffffffa0003000>] 0xffffffffa0003000 [Wed Apr 16 15:29:33 2014] PGD 1c0f067 PUD 1c10063 PMD 378a6067 PTE 0 [Wed Apr 16 15:29:33 2014] Oops: 0010 [#1] SMP [Wed Apr 16 15:29:33 2014] Modules linked in: rcutorture(+) torture [Wed Apr 16 15:29:33 2014] CPU: 0 PID: 4257 Comm: modprobe Not tainted 3.15.0-rc1 #10 [Wed Apr 16 15:29:33 2014] Hardware name: innotek GmbH VirtualBox, BIOS VirtualBox 12/01/2006 [Wed Apr 16 15:29:33 2014] task: ffff8800db1e88d0 ti: ffff8800db25c000 task.ti: ffff8800db25c000 [Wed Apr 16 15:29:33 2014] RIP: 0010:[<ffffffffa0003000>] [<ffffffffa0003000>] 0xffffffffa0003000 [Wed Apr 16 15:29:33 2014] RSP: 0018:ffff8800db25dca0 EFLAGS: 00010282 [Wed Apr 16 15:29:33 2014] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [Wed Apr 16 15:29:33 2014] RDX: ffffffffa00090a8 RSI: 0000000000000001 RDI: ffffffffa0008337 [Wed Apr 16 15:29:33 2014] RBP: ffff8800db25dd50 R08: 0000000000000000 R09: 0000000000000000 [Wed Apr 16 15:29:33 2014] R10: ffffea000357b680 R11: ffffffff8113257a R12: ffffffffa000d000 [Wed Apr 16 15:29:33 2014] R13: ffffffffa00094c0 R14: ffffffffa0009510 R15: 0000000000000001 [Wed Apr 16 15:29:33 2014] FS: 00007fee30ce5700(0000) GS:ffff88021fc00000(0000) knlGS:0000000000000000 [Wed Apr 16 15:29:33 2014] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [Wed Apr 16 15:29:33 2014] CR2: ffffffffa0003000 CR3: 00000000d5eb1000 CR4: 00000000000006f0 [Wed Apr 16 15:29:33 2014] Stack: [Wed Apr 16 15:29:33 2014] ffffffffa000d02c 0000000000000000 ffff88021700d400 0000000000000000 [Wed Apr 16 15:29:33 2014] ffff8800db25dd40 ffffffff81647951 ffff8802162bd000 ffff88021541846c [Wed Apr 16 15:29:33 2014] 0000000000000000 ffffffff817dbe2d ffffffff817dbe2d 0000000000000001 [Wed Apr 16 15:29:33 2014] Call Trace: [Wed Apr 16 15:29:33 2014] [<ffffffffa000d02c>] ? rcu_torture_init+0x2c/0x8b4 [rcutorture] [Wed Apr 16 15:29:33 2014] [<ffffffff81647951>] ? netlink_broadcast_filtered+0x121/0x3a0 [Wed Apr 16 15:29:33 2014] [<ffffffff817dbe2d>] ? mutex_lock+0xd/0x2a [Wed Apr 16 15:29:33 2014] [<ffffffff817dbe2d>] ? mutex_lock+0xd/0x2a [Wed Apr 16 15:29:33 2014] [<ffffffff810e7022>] ? trace_module_notify+0x62/0x1d0 [Wed Apr 16 15:29:33 2014] [<ffffffffa000d000>] ? 0xffffffffa000cfff [Wed Apr 16 15:29:33 2014] [<ffffffff8100034a>] do_one_initcall+0xfa/0x140 [Wed Apr 16 15:29:33 2014] [<ffffffff8106b4ce>] ? __blocking_notifier_call_chain+0x5e/0x80 [Wed Apr 16 15:29:33 2014] [<ffffffff810b3481>] load_module+0x1931/0x21b0 [Wed Apr 16 15:29:33 2014] [<ffffffff810b0330>] ? show_initstate+0x50/0x50 [Wed Apr 16 15:29:33 2014] [<ffffffff810b3d9e>] SyS_init_module+0x9e/0xc0 [Wed Apr 16 15:29:33 2014] [<ffffffff817e4c22>] system_call_fastpath+0x16/0x1b [Wed Apr 16 15:29:33 2014] Code: Bad RIP value. [Wed Apr 16 15:29:33 2014] RIP [<ffffffffa0003000>] 0xffffffffa0003000 [Wed Apr 16 15:29:33 2014] RSP <ffff8800db25dca0> [Wed Apr 16 15:29:33 2014] CR2: ffffffffa0003000 [Wed Apr 16 15:29:33 2014] ---[ end trace 3e88c173037af84b ]--- This splat is due to the fact that torture_init_begin() and torture_init_end() are both marked with __init, despite their use at runtime. This commit therefore removes __init from both functions. Signed-off-by: Pranith Kumar <bobby.prani@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
2014-04-17 04:42:09 +08:00
void torture_init_end(void)
{
mutex_unlock(&fullstop_mutex);
register_reboot_notifier(&torture_shutdown_nb);
}
EXPORT_SYMBOL_GPL(torture_init_end);
/*
* Clean up torture module. Please note that this is -not- invoked via
* the usual module_exit() mechanism, but rather by an explicit call from
* the client torture module. Returns true if a race with system shutdown
* is detected, otherwise, all kthreads started by functions in this file
* will be shut down.
*
* This must be called before the caller starts shutting down its own
* kthreads.
*
* Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
* in order to correctly perform the cleanup. They are separated because
* threads can still need to reference the torture_type type, thus nullify
* only after completing all other relevant calls.
*/
bool torture_cleanup_begin(void)
{
mutex_lock(&fullstop_mutex);
if (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
pr_warn("Concurrent rmmod and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
schedule_timeout_uninterruptible(10);
return true;
}
WRITE_ONCE(fullstop, FULLSTOP_RMMOD);
mutex_unlock(&fullstop_mutex);
torture_shutdown_cleanup();
torture_shuffle_cleanup();
torture_stutter_cleanup();
torture_onoff_cleanup();
return false;
}
EXPORT_SYMBOL_GPL(torture_cleanup_begin);
void torture_cleanup_end(void)
{
mutex_lock(&fullstop_mutex);
torture_type = NULL;
mutex_unlock(&fullstop_mutex);
}
EXPORT_SYMBOL_GPL(torture_cleanup_end);
/*
* Is it time for the current torture test to stop?
*/
bool torture_must_stop(void)
{
return torture_must_stop_irq() || kthread_should_stop();
}
EXPORT_SYMBOL_GPL(torture_must_stop);
/*
* Is it time for the current torture test to stop? This is the irq-safe
* version, hence no check for kthread_should_stop().
*/
bool torture_must_stop_irq(void)
{
return READ_ONCE(fullstop) != FULLSTOP_DONTSTOP;
}
EXPORT_SYMBOL_GPL(torture_must_stop_irq);
/*
* Each kthread must wait for kthread_should_stop() before returning from
* its top-level function, otherwise segfaults ensue. This function
* prints a "stopping" message and waits for kthread_should_stop(), and
* should be called from all torture kthreads immediately prior to
* returning.
*/
void torture_kthread_stopping(char *title)
{
char buf[128];
snprintf(buf, sizeof(buf), "%s is stopping", title);
VERBOSE_TOROUT_STRING(buf);
while (!kthread_should_stop()) {
torture_shutdown_absorb(title);
torture: Fix hang during kthread shutdown phase During rcutorture shutdown, the rcu_torture_cleanup() function calls torture_cleanup_begin(), which sets the fullstop global variable to FULLSTOP_RMMOD. This causes the rcutorture threads for readers and fakewriters to exit all of their "while" loops and start shutting down. They then call torture_kthread_stopping(), which in turn waits for kthread_stop() to be called. However, rcu_torture_cleanup() has not yet called kthread_stop() on those threads, and before it gets a chance to do so, multiple instances of torture_kthread_stopping() invoke schedule_timeout_interruptible(1) in a tight loop. Tracing confirms that TIMER_SOFTIRQ can then continuously execute timer callbacks. If that TIMER_SOFTIRQ preempts the task executing rcu_torture_cleanup(), that task might never invoke kthread_stop(). This commit improves this situation by increasing the timeout passed to schedule_timeout_interruptible() from one jiffy to 1/20th of a second. This change prevents TIMER_SOFTIRQ from monopolizing its CPU, thus allowing rcu_torture_cleanup() to carry out the needed kthread_stop() invocations. Testing has shown 100 runs of TREE07 passing reliably, as oppose to the tens-of-percent failure rates seen beforehand. Cc: Paul McKenney <paulmck@kernel.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Zhouyi Zhou <zhouzhouyi@gmail.com> Cc: <stable@vger.kernel.org> # 6.0.x Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2023-01-01 14:15:55 +08:00
schedule_timeout_uninterruptible(HZ / 20);
}
}
EXPORT_SYMBOL_GPL(torture_kthread_stopping);
/*
* Create a generic torture kthread that is immediately runnable. If you
* need the kthread to be stopped so that you can do something to it before
* it starts, you will need to open-code your own.
*/
int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
char *f, struct task_struct **tp)
{
int ret = 0;
VERBOSE_TOROUT_STRING(m);
*tp = kthread_create(fn, arg, "%s", s);
if (IS_ERR(*tp)) {
ret = PTR_ERR(*tp);
TOROUT_ERRSTRING(f);
*tp = NULL;
return ret;
}
wake_up_process(*tp); // Process is sleeping, so ordering provided.
torture_shuffle_task_register(*tp);
return ret;
}
EXPORT_SYMBOL_GPL(_torture_create_kthread);
/*
* Stop a generic kthread, emitting a message.
*/
void _torture_stop_kthread(char *m, struct task_struct **tp)
{
if (*tp == NULL)
return;
VERBOSE_TOROUT_STRING(m);
kthread_stop(*tp);
*tp = NULL;
}
EXPORT_SYMBOL_GPL(_torture_stop_kthread);