diff --git a/MAINTAINERS b/MAINTAINERS index 050d0e77a2cf..b2f6b0463678 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9348,7 +9348,7 @@ M: Andreas Noever S: Maintained F: drivers/thunderbolt/ -TIMEKEEPING, CLOCKSOURCE CORE, NTP +TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz M: Thomas Gleixner L: linux-kernel@vger.kernel.org @@ -9361,6 +9361,7 @@ F: include/uapi/linux/time.h F: include/uapi/linux/timex.h F: kernel/time/clocksource.c F: kernel/time/time*.c +F: kernel/time/alarmtimer.c F: kernel/time/ntp.c F: tools/testing/selftests/timers/ diff --git a/include/linux/time.h b/include/linux/time.h index beebe3a02d43..297f09f23896 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,6 +125,32 @@ static inline bool timeval_valid(const struct timeval *tv) extern struct timespec timespec_trunc(struct timespec t, unsigned gran); +/* + * Validates if a timespec/timeval used to inject a time offset is valid. + * Offsets can be postive or negative. The value of the timeval/timespec + * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must + * always be non-negative. + */ +static inline bool timeval_inject_offset_valid(const struct timeval *tv) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec_inject_offset_valid(const struct timespec *ts) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more nanoseconds then a second */ + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 7fbba635a549..e840ed867a5d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -271,11 +271,27 @@ static int alarmtimer_suspend(struct device *dev) __pm_wakeup_event(ws, MSEC_PER_SEC); return ret; } + +static int alarmtimer_resume(struct device *dev) +{ + struct rtc_device *rtc; + + rtc = alarmtimer_get_rtcdev(); + if (rtc) + rtc_timer_cancel(rtc, &rtctimer); + return 0; +} + #else static int alarmtimer_suspend(struct device *dev) { return 0; } + +static int alarmtimer_resume(struct device *dev) +{ + return 0; +} #endif static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) @@ -800,6 +816,7 @@ out: /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { .suspend = alarmtimer_suspend, + .resume = alarmtimer_resume, }; static struct platform_driver alarmtimer_driver = { diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1347882d131e..664de539299b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -218,8 +218,8 @@ static void clocksource_watchdog(unsigned long data) /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { - pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", - cs->name); + pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", + smp_processor_id(), cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", watchdog->name, wdnow, wdlast, watchdog->mask); pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 149cc8086aea..36f2ca09aa5e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -16,8 +16,11 @@ #include #include #include +#include #include "ntp_internal.h" +#include "timekeeping_internal.h" + /* * NTP timekeeping variables: @@ -70,7 +73,7 @@ static long time_esterror = NTP_PHASE_LIMIT; static s64 time_freq; /* time at last adjustment (secs): */ -static long time_reftime; +static time64_t time_reftime; static long time_adjust; @@ -297,25 +300,27 @@ static void ntp_update_offset(long offset) if (!(time_status & STA_PLL)) return; - if (!(time_status & STA_NANO)) + if (!(time_status & STA_NANO)) { + /* Make sure the multiplication below won't overflow */ + offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC); offset *= NSEC_PER_USEC; + } /* * Scale the phase adjustment and * clamp to the operating range. */ - offset = min(offset, MAXPHASE); - offset = max(offset, -MAXPHASE); + offset = clamp(offset, -MAXPHASE, MAXPHASE); /* * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - secs = get_seconds() - time_reftime; + secs = (long)(__ktime_get_real_seconds() - time_reftime); if (unlikely(time_status & STA_FREQHOLD)) secs = 0; - time_reftime = get_seconds(); + time_reftime = __ktime_get_real_seconds(); offset64 = offset; freq_adj = ntp_update_offset_fll(offset64, secs); @@ -390,10 +395,11 @@ ktime_t ntp_get_next_leap(void) * * Also handles leap second processing, and returns leap offset */ -int second_overflow(unsigned long secs) +int second_overflow(time64_t secs) { s64 delta; int leap = 0; + s32 rem; /* * Leap second processing. If in leap-insert state at the end of the @@ -404,19 +410,19 @@ int second_overflow(unsigned long secs) case TIME_OK: if (time_status & STA_INS) { time_state = TIME_INS; - ntp_next_leap_sec = secs + SECS_PER_DAY - - (secs % SECS_PER_DAY); + div_s64_rem(secs, SECS_PER_DAY, &rem); + ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } else if (time_status & STA_DEL) { time_state = TIME_DEL; - ntp_next_leap_sec = secs + SECS_PER_DAY - - ((secs+1) % SECS_PER_DAY); + div_s64_rem(secs + 1, SECS_PER_DAY, &rem); + ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } break; case TIME_INS: if (!(time_status & STA_INS)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; - } else if (secs % SECS_PER_DAY == 0) { + } else if (secs == ntp_next_leap_sec) { leap = -1; time_state = TIME_OOP; printk(KERN_NOTICE @@ -427,7 +433,7 @@ int second_overflow(unsigned long secs) if (!(time_status & STA_DEL)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; - } else if ((secs + 1) % SECS_PER_DAY == 0) { + } else if (secs == ntp_next_leap_sec) { leap = 1; ntp_next_leap_sec = TIME64_MAX; time_state = TIME_WAIT; @@ -590,7 +596,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts) * reference time to current time. */ if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) - time_reftime = get_seconds(); + time_reftime = __ktime_get_real_seconds(); /* only set allowed bits */ time_status &= STA_RONLY; @@ -674,8 +680,14 @@ int ntp_validate_timex(struct timex *txc) return -EINVAL; } - if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) - return -EPERM; + if (txc->modes & ADJ_SETOFFSET) { + /* In order to inject time, you gotta be super-user! */ + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (!timeval_inject_offset_valid(&txc->time)) + return -EINVAL; + } /* * Check for potential multiplication overflows that can diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index af924470eac0..d8a7c11fa71a 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -6,7 +6,7 @@ extern void ntp_clear(void); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); -extern int second_overflow(unsigned long secs); +extern int second_overflow(time64_t secs); extern int ntp_validate_timex(struct timex *); extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); extern void __hardpps(const struct timespec64 *, const struct timespec64 *); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d563c1960302..34b4cedfa80d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -305,8 +305,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) delta = timekeeping_get_delta(tkr); - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); @@ -846,6 +845,19 @@ time64_t ktime_get_real_seconds(void) } EXPORT_SYMBOL_GPL(ktime_get_real_seconds); +/** + * __ktime_get_real_seconds - The same as ktime_get_real_seconds + * but without the sequence counter protect. This internal function + * is called just when timekeeping lock is already held. + */ +time64_t __ktime_get_real_seconds(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return tk->xtime_sec; +} + + #ifdef CONFIG_NTP_PPS /** @@ -959,7 +971,7 @@ int timekeeping_inject_offset(struct timespec *ts) struct timespec64 ts64, tmp; int ret = 0; - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + if (!timespec_inject_offset_valid(ts)) return -EINVAL; ts64 = timespec_to_timespec64(*ts); @@ -1592,9 +1604,12 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, { s64 interval = tk->cycle_interval; s64 xinterval = tk->xtime_interval; + u32 base = tk->tkr_mono.clock->mult; + u32 max = tk->tkr_mono.clock->maxadj; + u32 cur_adj = tk->tkr_mono.mult; s64 tick_error; bool negative; - u32 adj; + u32 adj_scale; /* Remove any current error adj from freq calculation */ if (tk->ntp_err_mult) @@ -1613,13 +1628,33 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, /* preserve the direction of correction */ negative = (tick_error < 0); - /* Sort out the magnitude of the correction */ + /* If any adjustment would pass the max, just return */ + if (negative && (cur_adj - 1) <= (base - max)) + return; + if (!negative && (cur_adj + 1) >= (base + max)) + return; + /* + * Sort out the magnitude of the correction, but + * avoid making so large a correction that we go + * over the max adjustment. + */ + adj_scale = 0; tick_error = abs(tick_error); - for (adj = 0; tick_error > interval; adj++) + while (tick_error > interval) { + u32 adj = 1 << (adj_scale + 1); + + /* Check if adjustment gets us within 1 unit from the max */ + if (negative && (cur_adj - adj) <= (base - max)) + break; + if (!negative && (cur_adj + adj) >= (base + max)) + break; + + adj_scale++; tick_error >>= 1; + } /* scale the corrections */ - timekeeping_apply_adjustment(tk, offset, negative, adj); + timekeeping_apply_adjustment(tk, offset, negative, adj_scale); } /* diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 4ea005a7f9da..e20466ffc208 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -26,4 +26,6 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask) } #endif +extern time64_t __ktime_get_real_seconds(void); + #endif /* _TIMEKEEPING_INTERNAL_H */ diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index 627ec7425f78..fd88e3025bed 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -97,7 +97,7 @@ int get_cur_clocksource(char *buf, size_t size) int change_clocksource(char *clocksource) { int fd; - size_t size; + ssize_t size; fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);