Updates for timers and timekeeping core code:

- Expose CLOCK_TAI to instrumentation to aid with TSN debugging.
 
   - Ensure that the clockevent is stopped when there is no timer armed to
     avoid pointless wakeups.
 
   - Make the sched clock frequency handling and rounding consistent.
 
   - Provide a better debugobject hint for delayed works. The timer callback
     is always the same, which makes it difficult to identify the underlying
     work. Use the work function as a hint instead.
 
   - Move the timer specific sysctl code into the timer subsystem.
 
   - The usual set of improvements and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmKLPHMTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoZBoEACIURtS8w9PFZ6q/2mFq0pTYi/uI/HQ
 vqbB6gCbrjfL6QwInd7jxDc/UoqEOllG9pTaGdWx/0Gi9syDosEbeop7cvvt2xi+
 pReoEN1kVI3JAVrQFIAuGw4EMuzYB8PfuZkm1PdozcCP9qkgDmtippVxe05sFQ+/
 RPdA29vE3g63eXkSFBhEID23pQR8yKLbqVq6KcH87OipZedL+2fry3yB+/9sLuuU
 /PFLbI6B9f43S2sfo6szzpFkpd6tJlBlu02IrB6gh4IxKrslmZb5onpvcf6iT+19
 rFh5A15GFWoZUC8EjH1sBpATq3wA/jfGEOPWgy07N5SmobtJvWSM5yvT+gC3qXqm
 C/bjyjqXzLKftG7KIXo/hWewtsjdovMbdfcMBsGiatytNBZfI1GR/4Pq60/qpTHZ
 qJo35trOUcP6o1njphwONy3lisq78S7xaozpWO1hIMTcAqGgBkm/lOieGMM4hGnE
 Ps0Im3ZsOXNGllulN+3h+UHstM5/y6f/vzBsw7pfIG66i6KqebAiNjbMfHCr22sX
 7UavNCoFggUQgZVgUYX/AscdW4/Dwx6R5YUqj1EBqztknd70Ac4TqjaIz4Xa6ZER
 z+eQSSt5XqqV2eKWA4FsQYmCIc+BvQ4apSA6+whz9vmsvCYtB7zzSfeh+xkgcl1/
 Cc0N6G5+L9v0Gw==
 =De28
 -----END PGP SIGNATURE-----

Merge tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer and timekeeping updates from Thomas Gleixner:

 - Expose CLOCK_TAI to instrumentation to aid with TSN debugging.

 - Ensure that the clockevent is stopped when there is no timer armed to
   avoid pointless wakeups.

 - Make the sched clock frequency handling and rounding consistent.

 - Provide a better debugobject hint for delayed works. The timer
   callback is always the same, which makes it difficult to identify the
   underlying work. Use the work function as a hint instead.

 - Move the timer specific sysctl code into the timer subsystem.

 - The usual set of improvements and cleanups

* tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timers: Provide a better debugobjects hint for delayed works
  time/sched_clock: Fix formatting of frequency reporting code
  time/sched_clock: Use Hz as the unit for clock rate reporting below 4kHz
  time/sched_clock: Round the frequency reported to nearest rather than down
  timekeeping: Consolidate fast timekeeper
  timekeeping: Annotate ktime_get_boot_fast_ns() with data_race()
  timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped
  timekeeping: Introduce fast accessor to clock tai
  tracing/timer: Add missing argument documentation of trace points
  clocksource: Replace cpumask_weight() with cpumask_empty()
  timers: Move timer sysctl into the timer code
  clockevents: Use dedicated list iterator variable
  timers: Simplify calc_index()
  timers: Initialize base::next_expiry_recalc in timers_prepare_cpu()
This commit is contained in:
Linus Torvalds 2022-05-23 17:05:55 -07:00
commit 6e01f86fb2
11 changed files with 127 additions and 62 deletions

View File

@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
.. c:function:: u64 ktime_get_mono_fast_ns( void )
u64 ktime_get_raw_fast_ns( void )
u64 ktime_get_boot_fast_ns( void )
u64 ktime_get_tai_fast_ns( void )
u64 ktime_get_real_fast_ns( void )
These variants are safe to call from any context, including from

View File

@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void)
extern u64 ktime_get_mono_fast_ns(void);
extern u64 ktime_get_raw_fast_ns(void);
extern u64 ktime_get_boot_fast_ns(void);
extern u64 ktime_get_tai_fast_ns(void);
extern u64 ktime_get_real_fast_ns(void);
/*

View File

@ -196,14 +196,6 @@ extern void init_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
struct ctl_table;
extern unsigned int sysctl_timer_migration;
int timer_migration_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
unsigned long __round_jiffies(unsigned long j, int cpu);
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
unsigned long round_jiffies(unsigned long j);

View File

@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init,
* timer_start - called when the timer is started
* @timer: pointer to struct timer_list
* @expires: the timers expiry time
* @flags: the timers flags
*/
TRACE_EVENT(timer_start,
@ -84,6 +85,7 @@ TRACE_EVENT(timer_start,
/**
* timer_expire_entry - called immediately before the timer callback
* @timer: pointer to struct timer_list
* @baseclk: value of timer_base::clk when timer expires
*
* Allows to determine the timer latency.
*/
@ -190,7 +192,8 @@ TRACE_EVENT(hrtimer_init,
/**
* hrtimer_start - called when the hrtimer is started
* @hrtimer: pointer to struct hrtimer
* @hrtimer: pointer to struct hrtimer
* @mode: the hrtimers mode
*/
TRACE_EVENT(hrtimer_start,

View File

@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
{
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_BPF_SYSCALL
{
.procname = "unprivileged_bpf_disabled",

View File

@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev,
{
char name[CS_NAME_LEN];
ssize_t ret = sysfs_get_uname(buf, name, count);
struct clock_event_device *ce;
struct clock_event_device *ce = NULL, *iter;
if (ret < 0)
return ret;
@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev,
ret = -ENODEV;
mutex_lock(&clockevents_mutex);
raw_spin_lock_irq(&clockevents_lock);
list_for_each_entry(ce, &clockevent_devices, list) {
if (!strcmp(ce->name, name)) {
ret = __clockevents_try_unbind(ce, dev->id);
list_for_each_entry(iter, &clockevent_devices, list) {
if (!strcmp(iter->name, name)) {
ret = __clockevents_try_unbind(iter, dev->id);
ce = iter;
break;
}
}

View File

@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
cpus_read_lock();
preempt_disable();
clocksource_verify_choose_cpus();
if (cpumask_weight(&cpus_chosen) == 0) {
if (cpumask_empty(&cpus_chosen)) {
preempt_enable();
cpus_read_unlock();
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);

View File

@ -8,6 +8,7 @@
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/kernel.h>
#include <linux/math.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
@ -199,15 +200,13 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
r = rate;
if (r >= 4000000) {
r /= 1000000;
r = DIV_ROUND_CLOSEST(r, 1000000);
r_unit = 'M';
} else if (r >= 4000) {
r = DIV_ROUND_CLOSEST(r, 1000);
r_unit = 'k';
} else {
if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else {
r_unit = ' ';
}
r_unit = ' ';
}
/* Calculate the ns resolution of this counter */

View File

@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
else
tick_program_event(KTIME_MAX, 1);
return;
}
@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
tick_sched_do_timer(ts, now);
tick_sched_handle(ts, regs);
/* No need to reprogram if we are running tickless */
if (unlikely(ts->tick_stopped))
if (unlikely(ts->tick_stopped)) {
/*
* The clockevent device is not reprogrammed, so change the
* clock event device to ONESHOT_STOPPED to avoid spurious
* interrupts on devices which might not be truly one shot.
*/
tick_program_event(KTIME_MAX, 1);
return;
}
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);

View File

@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
memcpy(base + 1, base, sizeof(*base));
}
static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
{
u64 delta, cycles = tk_clock_read(tkr);
delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
return timekeeping_delta_to_ns(tkr, delta);
}
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
now += fast_tk_get_delta_ns(tkr);
} while (read_seqcount_latch_retry(&tkf->seq, seq));
return now;
@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
/**
* ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
*
* The same limitations as described for ktime_get_boot_fast_ns() apply. The
* mono time and the TAI offset are not read atomically which may yield wrong
* readouts. However, an update of the TAI offset is an rare event e.g., caused
* by settime or adjtimex with an offset. The user of this function has to deal
* with the possibility of wrong timestamps in post processing.
*/
u64 notrace ktime_get_tai_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
}
EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{
struct tk_read_base *tkr;
@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real);
delta = timekeeping_delta_to_ns(tkr,
clocksource_delta(tk_clock_read(tkr),
tkr->cycle_last, tkr->mask));
delta = fast_tk_get_delta_ns(tkr);
} while (read_seqcount_latch_retry(&tkf->seq, seq));
if (mono)

View File

@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/compat.h>
#include <linux/random.h>
#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work);
static DECLARE_WORK(timer_update_work, timer_update_keys);
#ifdef CONFIG_SMP
unsigned int sysctl_timer_migration = 1;
static unsigned int sysctl_timer_migration = 1;
DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
@ -234,7 +235,42 @@ static void timers_update_migration(void)
else
static_branch_disable(&timers_migration_enabled);
}
#else
#ifdef CONFIG_SYSCTL
static int timer_migration_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
mutex_lock(&timer_keys_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration();
mutex_unlock(&timer_keys_mutex);
return ret;
}
static struct ctl_table timer_sysctl[] = {
{
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{}
};
static int __init timer_sysctl_init(void)
{
register_sysctl("kernel", timer_sysctl);
return 0;
}
device_initcall(timer_sysctl_init);
#endif /* CONFIG_SYSCTL */
#else /* CONFIG_SMP */
static inline void timers_update_migration(void) { }
#endif /* !CONFIG_SMP */
@ -251,19 +287,6 @@ void timers_update_nohz(void)
schedule_work(&timer_update_work);
}
int timer_migration_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
mutex_lock(&timer_keys_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration();
mutex_unlock(&timer_keys_mutex);
return ret;
}
static inline bool is_timers_nohz_active(void)
{
return static_branch_unlikely(&timers_nohz_active);
@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
*
* Round up with level granularity to prevent this.
*/
expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
expires = (expires >> LVL_SHIFT(lvl)) + 1;
*bucket_expiry = expires << LVL_SHIFT(lvl);
return LVL_OFFS(lvl) + (expires & LVL_MASK);
}
@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
static const struct debug_obj_descr timer_debug_descr;
struct timer_hint {
void (*function)(struct timer_list *t);
long offset;
};
#define TIMER_HINT(fn, container, timr, hintfn) \
{ \
.function = fn, \
.offset = offsetof(container, hintfn) - \
offsetof(container, timr) \
}
static const struct timer_hint timer_hints[] = {
TIMER_HINT(delayed_work_timer_fn,
struct delayed_work, timer, work.func),
TIMER_HINT(kthread_delayed_work_timer_fn,
struct kthread_delayed_work, timer, work.func),
};
static void *timer_debug_hint(void *addr)
{
return ((struct timer_list *) addr)->function;
struct timer_list *timer = addr;
int i;
for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
if (timer_hints[i].function == timer->function) {
void (**fn)(void) = addr + timer_hints[i].offset;
return *fn;
}
}
return timer->function;
}
static bool timer_is_static_object(void *addr)
@ -1953,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu)
base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies;
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
base->next_expiry_recalc = false;
base->timers_pending = false;
base->is_idle = false;
}