clocksource: Limit number of CPUs checked for clock synchronization
Currently, if skew is detected on a clock marked CLOCK_SOURCE_VERIFY_PERCPU, that clock is checked on all CPUs. This is thorough, but might not be what you want on a system with a few tens of CPUs, let alone a few hundred of them. Therefore, by default check only up to eight randomly chosen CPUs. Also provide a new clocksource.verify_n_cpus kernel boot parameter. A value of -1 says to check all of the CPUs, and a non-negative value says to randomly select that number of CPUs, without concern about selecting the same CPU multiple times. However, make use of a cpumask so that a given CPU will be checked at most once. Suggested-by: Thomas Gleixner <tglx@linutronix.de> # For verify_n_cpus=1. Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Feng Tang <feng.tang@intel.com> Link: https://lore.kernel.org/r/20210527190124.440372-3-paulmck@kernel.org
This commit is contained in:
parent
7560c02bdf
commit
fa218f1cce
|
@ -587,6 +587,16 @@
|
|||
unstable. Defaults to three retries, that is,
|
||||
four attempts to read the clock under test.
|
||||
|
||||
clocksource.verify_n_cpus= [KNL]
|
||||
Limit the number of CPUs checked for clocksources
|
||||
marked with CLOCK_SOURCE_VERIFY_PERCPU that
|
||||
are marked unstable due to excessive skew.
|
||||
A negative value says to check all CPUs, while
|
||||
zero says not to check any. Values larger than
|
||||
nr_cpu_ids are silently truncated to nr_cpu_ids.
|
||||
The actual CPUs are chosen randomly, with
|
||||
no replacement if the same CPU is chosen twice.
|
||||
|
||||
clearcpuid=BITNUM[,BITNUM...] [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include "tick-internal.h"
|
||||
#include "timekeeping_internal.h"
|
||||
|
@ -193,6 +195,8 @@ void clocksource_mark_unstable(struct clocksource *cs)
|
|||
|
||||
static ulong max_cswd_read_retries = 3;
|
||||
module_param(max_cswd_read_retries, ulong, 0644);
|
||||
static int verify_n_cpus = 8;
|
||||
module_param(verify_n_cpus, int, 0644);
|
||||
|
||||
static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
|
||||
{
|
||||
|
@ -227,6 +231,55 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
|
|||
static u64 csnow_mid;
|
||||
static cpumask_t cpus_ahead;
|
||||
static cpumask_t cpus_behind;
|
||||
static cpumask_t cpus_chosen;
|
||||
|
||||
static void clocksource_verify_choose_cpus(void)
|
||||
{
|
||||
int cpu, i, n = verify_n_cpus;
|
||||
|
||||
if (n < 0) {
|
||||
/* Check all of the CPUs. */
|
||||
cpumask_copy(&cpus_chosen, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If no checking desired, or no other CPU to check, leave. */
|
||||
cpumask_clear(&cpus_chosen);
|
||||
if (n == 0 || num_online_cpus() <= 1)
|
||||
return;
|
||||
|
||||
/* Make sure to select at least one CPU other than the current CPU. */
|
||||
cpu = cpumask_next(-1, cpu_online_mask);
|
||||
if (cpu == smp_processor_id())
|
||||
cpu = cpumask_next(cpu, cpu_online_mask);
|
||||
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
|
||||
return;
|
||||
cpumask_set_cpu(cpu, &cpus_chosen);
|
||||
|
||||
/* Force a sane value for the boot parameter. */
|
||||
if (n > nr_cpu_ids)
|
||||
n = nr_cpu_ids;
|
||||
|
||||
/*
|
||||
* Randomly select the specified number of CPUs. If the same
|
||||
* CPU is selected multiple times, that CPU is checked only once,
|
||||
* and no replacement CPU is selected. This gracefully handles
|
||||
* situations where verify_n_cpus is greater than the number of
|
||||
* CPUs that are currently online.
|
||||
*/
|
||||
for (i = 1; i < n; i++) {
|
||||
cpu = prandom_u32() % nr_cpu_ids;
|
||||
cpu = cpumask_next(cpu - 1, cpu_online_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = cpumask_next(-1, cpu_online_mask);
|
||||
if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
|
||||
cpumask_set_cpu(cpu, &cpus_chosen);
|
||||
}
|
||||
|
||||
/* Don't verify ourselves. */
|
||||
cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
|
||||
}
|
||||
|
||||
static void clocksource_verify_one_cpu(void *csin)
|
||||
{
|
||||
|
@ -242,12 +295,22 @@ static void clocksource_verify_percpu(struct clocksource *cs)
|
|||
int cpu, testcpu;
|
||||
s64 delta;
|
||||
|
||||
if (verify_n_cpus == 0)
|
||||
return;
|
||||
cpumask_clear(&cpus_ahead);
|
||||
cpumask_clear(&cpus_behind);
|
||||
get_online_cpus();
|
||||
preempt_disable();
|
||||
clocksource_verify_choose_cpus();
|
||||
if (cpumask_weight(&cpus_chosen) == 0) {
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
|
||||
return;
|
||||
}
|
||||
testcpu = smp_processor_id();
|
||||
pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
|
||||
for_each_online_cpu(cpu) {
|
||||
pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
|
||||
for_each_cpu(cpu, &cpus_chosen) {
|
||||
if (cpu == testcpu)
|
||||
continue;
|
||||
csnow_begin = cs->read(cs);
|
||||
|
@ -267,6 +330,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
|
|||
cs_nsec_min = cs_nsec;
|
||||
}
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
if (!cpumask_empty(&cpus_ahead))
|
||||
pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
|
||||
cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
|
||||
|
@ -337,6 +401,12 @@ static void clocksource_watchdog(struct timer_list *unused)
|
|||
watchdog->name, wdnow, wdlast, watchdog->mask);
|
||||
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
|
||||
cs->name, csnow, cslast, cs->mask);
|
||||
if (curr_clocksource == cs)
|
||||
pr_warn(" '%s' is current clocksource.\n", cs->name);
|
||||
else if (curr_clocksource)
|
||||
pr_warn(" '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
|
||||
else
|
||||
pr_warn(" No current clocksource.\n");
|
||||
__clocksource_unstable(cs);
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue