2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/kernel/time.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
|
|
*
|
|
|
|
* This file contains the interface functions for the various
|
|
|
|
* time related system calls: time, stime, gettimeofday, settimeofday,
|
|
|
|
* adjtime
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Modification history kernel/time.c
|
2007-10-18 18:06:03 +08:00
|
|
|
*
|
2005-04-17 06:20:36 +08:00
|
|
|
* 1993-09-02 Philip Gladstone
|
2013-06-04 15:40:24 +08:00
|
|
|
* Created file with time related functions from sched/core.c and adjtimex()
|
2005-04-17 06:20:36 +08:00
|
|
|
* 1993-10-08 Torsten Duwe
|
|
|
|
* adjtime interface update and CMOS clock write code
|
|
|
|
* 1995-08-13 Torsten Duwe
|
|
|
|
* kernel PLL updated to 1994-12-13 specs (rfc-1589)
|
|
|
|
* 1999-01-16 Ulrich Windl
|
|
|
|
* Introduced error checking for many cases in adjtimex().
|
|
|
|
* Updated NTP code according to technical memorandum Jan '96
|
|
|
|
* "A Kernel Model for Precision Timekeeping" by Dave Mills
|
|
|
|
* Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
|
|
|
|
* (Even though the technical memorandum forbids it)
|
|
|
|
* 2004-07-14 Christoph Lameter
|
|
|
|
* Added getnstimeofday to allow the posix timer functions to return
|
|
|
|
* with nanosecond accuracy
|
|
|
|
*/
|
|
|
|
|
2011-05-24 02:51:41 +08:00
|
|
|
#include <linux/export.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/timex.h>
|
2006-01-12 04:17:46 +08:00
|
|
|
#include <linux/capability.h>
|
2012-09-05 03:27:48 +08:00
|
|
|
#include <linux/timekeeper_internal.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/syscalls.h>
|
|
|
|
#include <linux/security.h>
|
|
|
|
#include <linux/fs.h>
|
2008-05-01 19:34:26 +08:00
|
|
|
#include <linux/math64.h>
|
2009-01-07 06:41:02 +08:00
|
|
|
#include <linux/ptrace.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
|
2015-05-18 20:19:12 +08:00
|
|
|
#include <generated/timeconst.h>
|
2014-07-17 05:04:02 +08:00
|
|
|
#include "timekeeping.h"
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
|
2007-10-18 18:06:03 +08:00
|
|
|
/*
|
2005-04-17 06:20:36 +08:00
|
|
|
* The timezone where the local system is located. Used as a default by some
|
|
|
|
* programs who obtain this value by using gettimeofday.
|
|
|
|
*/
|
|
|
|
struct timezone sys_tz;
|
|
|
|
|
|
|
|
EXPORT_SYMBOL(sys_tz);
|
|
|
|
|
|
|
|
#ifdef __ARCH_WANT_SYS_TIME
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sys_time() can be implemented in user-level using
|
|
|
|
* sys_gettimeofday(). Is this for backwards compatibility? If so,
|
|
|
|
* why not move it into the appropriate arch directory (for those
|
|
|
|
* architectures that need it).
|
|
|
|
*/
|
2009-01-14 21:14:03 +08:00
|
|
|
SYSCALL_DEFINE1(time, time_t __user *, tloc)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-10-16 22:09:20 +08:00
|
|
|
time_t i = get_seconds();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (tloc) {
|
2007-07-21 04:28:54 +08:00
|
|
|
if (put_user(i,tloc))
|
2009-01-07 06:41:02 +08:00
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2009-01-07 06:41:02 +08:00
|
|
|
force_successful_syscall_return();
|
2005-04-17 06:20:36 +08:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sys_stime() can be implemented in user-level using
|
|
|
|
* sys_settimeofday(). Is this for backwards compatibility? If so,
|
|
|
|
* why not move it into the appropriate arch directory (for those
|
|
|
|
* architectures that need it).
|
|
|
|
*/
|
2007-10-18 18:06:03 +08:00
|
|
|
|
2009-01-14 21:14:03 +08:00
|
|
|
SYSCALL_DEFINE1(stime, time_t __user *, tptr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct timespec tv;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (get_user(tv.tv_sec, tptr))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
tv.tv_nsec = 0;
|
|
|
|
|
|
|
|
err = security_settime(&tv, NULL);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
do_settimeofday(&tv);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* __ARCH_WANT_SYS_TIME */
|
|
|
|
|
2009-01-14 21:14:03 +08:00
|
|
|
SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
|
|
|
|
struct timezone __user *, tz)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
if (likely(tv != NULL)) {
|
|
|
|
struct timeval ktv;
|
|
|
|
do_gettimeofday(&ktv);
|
|
|
|
if (copy_to_user(tv, &ktv, sizeof(ktv)))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
if (unlikely(tz != NULL)) {
|
|
|
|
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-02-09 06:59:53 +08:00
|
|
|
/*
|
|
|
|
* Indicates if there is an offset between the system clock and the hardware
|
|
|
|
* clock/persistent clock/rtc.
|
|
|
|
*/
|
|
|
|
int persistent_clock_is_local;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Adjust the time obtained from the CMOS to be UTC time instead of
|
|
|
|
* local time.
|
2007-10-18 18:06:03 +08:00
|
|
|
*
|
2005-04-17 06:20:36 +08:00
|
|
|
* This is ugly, but preferable to the alternatives. Otherwise we
|
|
|
|
* would either need to write a program to do it in /etc/rc (and risk
|
2007-10-18 18:06:03 +08:00
|
|
|
* confusion if the program gets run more than once; it would also be
|
2005-04-17 06:20:36 +08:00
|
|
|
* hard to make the program warp the clock precisely n hours) or
|
|
|
|
* compile in the timezone information into the kernel. Bad, bad....
|
|
|
|
*
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
* - TYT, 1992-01-01
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* The best thing to do is to keep the CMOS clock in universal time (UTC)
|
|
|
|
* as real UNIX machines always do it. This avoids all headaches about
|
|
|
|
* daylight saving times and warping kernel clocks.
|
|
|
|
*/
|
2005-07-28 02:46:09 +08:00
|
|
|
static inline void warp_clock(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2012-12-06 22:03:34 +08:00
|
|
|
if (sys_tz.tz_minuteswest != 0) {
|
|
|
|
struct timespec adjust;
|
2010-05-23 14:14:45 +08:00
|
|
|
|
2013-02-09 06:59:53 +08:00
|
|
|
persistent_clock_is_local = 1;
|
2013-02-23 04:33:29 +08:00
|
|
|
adjust.tv_sec = sys_tz.tz_minuteswest * 60;
|
|
|
|
adjust.tv_nsec = 0;
|
|
|
|
timekeeping_inject_offset(&adjust);
|
2012-12-06 22:03:34 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In case for some reason the CMOS clock has not already been running
|
|
|
|
* in UTC, but in some local time: The first time we set the timezone,
|
|
|
|
* we will warp the clock so that it is ticking UTC time instead of
|
|
|
|
* local time. Presumably, if someone is setting the timezone then we
|
|
|
|
* are running in an environment where the programs understand about
|
|
|
|
* timezones. This should be done at boot time in the /etc/rc script,
|
|
|
|
* as soon as possible, so that the clock can be set right. Otherwise,
|
|
|
|
* various programs will get confused when the clock gets warped.
|
|
|
|
*/
|
|
|
|
|
2011-02-01 21:50:58 +08:00
|
|
|
int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
static int firsttime = 1;
|
|
|
|
int error = 0;
|
|
|
|
|
2006-02-01 02:16:55 +08:00
|
|
|
if (tv && !timespec_valid(tv))
|
2006-01-10 12:52:29 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
error = security_settime(tv, tz);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
if (tz) {
|
2014-12-02 12:04:06 +08:00
|
|
|
/* Verify we're witin the +-15 hrs range */
|
|
|
|
if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
sys_tz = *tz;
|
2007-10-18 18:04:57 +08:00
|
|
|
update_vsyscall_tz();
|
2005-04-17 06:20:36 +08:00
|
|
|
if (firsttime) {
|
|
|
|
firsttime = 0;
|
|
|
|
if (!tv)
|
|
|
|
warp_clock();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (tv)
|
|
|
|
return do_settimeofday(tv);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-01-14 21:14:03 +08:00
|
|
|
SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
|
|
|
|
struct timezone __user *, tz)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct timeval user_tv;
|
|
|
|
struct timespec new_ts;
|
|
|
|
struct timezone new_tz;
|
|
|
|
|
|
|
|
if (tv) {
|
|
|
|
if (copy_from_user(&user_tv, tv, sizeof(*tv)))
|
|
|
|
return -EFAULT;
|
2014-12-04 08:22:48 +08:00
|
|
|
|
|
|
|
if (!timeval_valid(&user_tv))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
new_ts.tv_sec = user_tv.tv_sec;
|
|
|
|
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
|
|
|
|
}
|
|
|
|
if (tz) {
|
|
|
|
if (copy_from_user(&new_tz, tz, sizeof(*tz)))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
|
|
|
|
}
|
|
|
|
|
2009-01-14 21:14:03 +08:00
|
|
|
SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct timex txc; /* Local copy of parameter */
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Copy the user data space into the kernel copy
|
|
|
|
* structure. But bear in mind that the structures
|
|
|
|
* may change
|
|
|
|
*/
|
|
|
|
if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
|
|
|
|
return -EFAULT;
|
|
|
|
ret = do_adjtimex(&txc);
|
|
|
|
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* current_fs_time - Return FS time
|
|
|
|
* @sb: Superblock.
|
|
|
|
*
|
2006-04-01 07:41:22 +08:00
|
|
|
* Return the current time truncated to the time granularity supported by
|
2005-04-17 06:20:36 +08:00
|
|
|
* the fs.
|
|
|
|
*/
|
|
|
|
struct timespec current_fs_time(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct timespec now = current_kernel_time();
|
|
|
|
return timespec_trunc(now, sb->s_time_gran);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(current_fs_time);
|
|
|
|
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
/*
|
|
|
|
* Convert jiffies to milliseconds and back.
|
|
|
|
*
|
|
|
|
* Avoid unnecessary multiplications/divisions in the
|
|
|
|
* two most common HZ cases:
|
|
|
|
*/
|
2013-02-22 08:42:40 +08:00
|
|
|
unsigned int jiffies_to_msecs(const unsigned long j)
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
{
|
|
|
|
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
|
|
|
|
return (MSEC_PER_SEC / HZ) * j;
|
|
|
|
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
|
|
|
|
return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
|
|
|
|
#else
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
# if BITS_PER_LONG == 32
|
2008-05-03 07:18:42 +08:00
|
|
|
return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
# else
|
|
|
|
return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
|
|
|
|
# endif
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(jiffies_to_msecs);
|
|
|
|
|
2013-02-22 08:42:40 +08:00
|
|
|
unsigned int jiffies_to_usecs(const unsigned long j)
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
{
|
2014-10-10 08:44:01 +08:00
|
|
|
/*
|
|
|
|
* Hz usually doesn't go much further MSEC_PER_SEC.
|
|
|
|
* jiffies_to_usecs() and usecs_to_jiffies() depend on that.
|
|
|
|
*/
|
|
|
|
BUILD_BUG_ON(HZ > USEC_PER_SEC);
|
|
|
|
|
|
|
|
#if !(USEC_PER_SEC % HZ)
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
return (USEC_PER_SEC / HZ) * j;
|
|
|
|
#else
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
# if BITS_PER_LONG == 32
|
2008-05-03 07:18:42 +08:00
|
|
|
return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
# else
|
|
|
|
return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
|
|
|
|
# endif
|
Optimize timespec_trunc()
The first thing done by timespec_trunc() is :
if (gran <= jiffies_to_usecs(1) * 1000)
This should really be a test against a constant known at compile time.
Alas, it isnt. jiffies_to_usec() was unilined so C compiler emits a function
call and a multiply to compute : a CONSTANT.
mov $0x1,%edi
mov %rbx,0xffffffffffffffe8(%rbp)
mov %r12,0xfffffffffffffff0(%rbp)
mov %edx,%ebx
mov %rsi,0xffffffffffffffc8(%rbp)
mov %rsi,%r12
callq ffffffff80232010 <jiffies_to_usecs>
imul $0x3e8,%eax,%eax
cmp %ebx,%eax
This patch reorders kernel/time.c a bit so that jiffies_to_usecs() is defined
before timespec_trunc() so that compiler now generates :
cmp $0x3d0900,%edx (HZ=250 on my machine)
This gives a better code (timespec_trunc() becoming a leaf function), and
shorter kernel size as well.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-08 15:25:32 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(jiffies_to_usecs);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
2006-04-01 07:41:22 +08:00
|
|
|
* timespec_trunc - Truncate timespec to a granularity
|
2005-04-17 06:20:36 +08:00
|
|
|
* @t: Timespec
|
2006-04-01 07:41:22 +08:00
|
|
|
* @gran: Granularity in ns.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
time: Fix nanosecond file time rounding in timespec_trunc()
timespec_trunc() avoids rounding if granularity <= nanoseconds-per-jiffie
(or TICK_NSEC). This optimization assumes that:
1. current_kernel_time().tv_nsec is already rounded to TICK_NSEC (i.e.
with HZ=1000 you'd get 1000000, 2000000, 3000000... but never 1000001).
This is no longer true (probably since hrtimers introduced in 2.6.16).
2. TICK_NSEC is evenly divisible by all possible granularities. This may
be true for HZ=100, 250, 1000, but obviously not for HZ=300 /
TICK_NSEC=3333333 (introduced in 2.6.20).
Thus, sub-second portions of in-core file times are not rounded to on-disk
granularity. I.e. file times may change when the inode is re-read from disk
or when the file system is remounted.
This affects all file systems with file time granularities > 1 ns and < 1s,
e.g. CEPH (1000 ns), UDF (1000 ns), CIFS (100 ns), NTFS (100 ns) and FUSE
(configurable from user mode via struct fuse_init_out.time_gran).
Steps to reproduce with e.g. UDF:
$ dd if=/dev/zero of=udfdisk count=10000 && mkudffs udfdisk
$ mkdir udf && mount udfdisk udf
$ touch udf/test && stat -c %y udf/test
2015-06-09 10:22:56.130006767 +0200
$ umount udf && mount udfdisk udf
$ stat -c %y udf/test
2015-06-09 10:22:56.130006000 +0200
Remounting truncates the mtime to 1 µs.
Fix the rounding in timespec_trunc() and update the documentation.
timespec_trunc() is exclusively used to calculate inode's [acm]time (mostly
via current_fs_time()), and always with super_block.s_time_gran as second
argument. So this can safely be changed without side effects.
Note: This does _not_ fix the issue for FAT's 2 second mtime resolution,
as super_block.s_time_gran isn't prepared to handle different ctime /
mtime / atime resolutions nor resolutions > 1 second.
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2015-06-25 20:13:55 +08:00
|
|
|
* Truncate a timespec to a granularity. Always rounds down. gran must
|
|
|
|
* not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
struct timespec timespec_trunc(struct timespec t, unsigned gran)
|
|
|
|
{
|
time: Fix nanosecond file time rounding in timespec_trunc()
timespec_trunc() avoids rounding if granularity <= nanoseconds-per-jiffie
(or TICK_NSEC). This optimization assumes that:
1. current_kernel_time().tv_nsec is already rounded to TICK_NSEC (i.e.
with HZ=1000 you'd get 1000000, 2000000, 3000000... but never 1000001).
This is no longer true (probably since hrtimers introduced in 2.6.16).
2. TICK_NSEC is evenly divisible by all possible granularities. This may
be true for HZ=100, 250, 1000, but obviously not for HZ=300 /
TICK_NSEC=3333333 (introduced in 2.6.20).
Thus, sub-second portions of in-core file times are not rounded to on-disk
granularity. I.e. file times may change when the inode is re-read from disk
or when the file system is remounted.
This affects all file systems with file time granularities > 1 ns and < 1s,
e.g. CEPH (1000 ns), UDF (1000 ns), CIFS (100 ns), NTFS (100 ns) and FUSE
(configurable from user mode via struct fuse_init_out.time_gran).
Steps to reproduce with e.g. UDF:
$ dd if=/dev/zero of=udfdisk count=10000 && mkudffs udfdisk
$ mkdir udf && mount udfdisk udf
$ touch udf/test && stat -c %y udf/test
2015-06-09 10:22:56.130006767 +0200
$ umount udf && mount udfdisk udf
$ stat -c %y udf/test
2015-06-09 10:22:56.130006000 +0200
Remounting truncates the mtime to 1 µs.
Fix the rounding in timespec_trunc() and update the documentation.
timespec_trunc() is exclusively used to calculate inode's [acm]time (mostly
via current_fs_time()), and always with super_block.s_time_gran as second
argument. So this can safely be changed without side effects.
Note: This does _not_ fix the issue for FAT's 2 second mtime resolution,
as super_block.s_time_gran isn't prepared to handle different ctime /
mtime / atime resolutions nor resolutions > 1 second.
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2015-06-25 20:13:55 +08:00
|
|
|
/* Avoid division in the common cases 1 ns and 1 s. */
|
|
|
|
if (gran == 1) {
|
2005-04-17 06:20:36 +08:00
|
|
|
/* nothing */
|
time: Fix nanosecond file time rounding in timespec_trunc()
timespec_trunc() avoids rounding if granularity <= nanoseconds-per-jiffie
(or TICK_NSEC). This optimization assumes that:
1. current_kernel_time().tv_nsec is already rounded to TICK_NSEC (i.e.
with HZ=1000 you'd get 1000000, 2000000, 3000000... but never 1000001).
This is no longer true (probably since hrtimers introduced in 2.6.16).
2. TICK_NSEC is evenly divisible by all possible granularities. This may
be true for HZ=100, 250, 1000, but obviously not for HZ=300 /
TICK_NSEC=3333333 (introduced in 2.6.20).
Thus, sub-second portions of in-core file times are not rounded to on-disk
granularity. I.e. file times may change when the inode is re-read from disk
or when the file system is remounted.
This affects all file systems with file time granularities > 1 ns and < 1s,
e.g. CEPH (1000 ns), UDF (1000 ns), CIFS (100 ns), NTFS (100 ns) and FUSE
(configurable from user mode via struct fuse_init_out.time_gran).
Steps to reproduce with e.g. UDF:
$ dd if=/dev/zero of=udfdisk count=10000 && mkudffs udfdisk
$ mkdir udf && mount udfdisk udf
$ touch udf/test && stat -c %y udf/test
2015-06-09 10:22:56.130006767 +0200
$ umount udf && mount udfdisk udf
$ stat -c %y udf/test
2015-06-09 10:22:56.130006000 +0200
Remounting truncates the mtime to 1 µs.
Fix the rounding in timespec_trunc() and update the documentation.
timespec_trunc() is exclusively used to calculate inode's [acm]time (mostly
via current_fs_time()), and always with super_block.s_time_gran as second
argument. So this can safely be changed without side effects.
Note: This does _not_ fix the issue for FAT's 2 second mtime resolution,
as super_block.s_time_gran isn't prepared to handle different ctime /
mtime / atime resolutions nor resolutions > 1 second.
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2015-06-25 20:13:55 +08:00
|
|
|
} else if (gran == NSEC_PER_SEC) {
|
2005-04-17 06:20:36 +08:00
|
|
|
t.tv_nsec = 0;
|
time: Fix nanosecond file time rounding in timespec_trunc()
timespec_trunc() avoids rounding if granularity <= nanoseconds-per-jiffie
(or TICK_NSEC). This optimization assumes that:
1. current_kernel_time().tv_nsec is already rounded to TICK_NSEC (i.e.
with HZ=1000 you'd get 1000000, 2000000, 3000000... but never 1000001).
This is no longer true (probably since hrtimers introduced in 2.6.16).
2. TICK_NSEC is evenly divisible by all possible granularities. This may
be true for HZ=100, 250, 1000, but obviously not for HZ=300 /
TICK_NSEC=3333333 (introduced in 2.6.20).
Thus, sub-second portions of in-core file times are not rounded to on-disk
granularity. I.e. file times may change when the inode is re-read from disk
or when the file system is remounted.
This affects all file systems with file time granularities > 1 ns and < 1s,
e.g. CEPH (1000 ns), UDF (1000 ns), CIFS (100 ns), NTFS (100 ns) and FUSE
(configurable from user mode via struct fuse_init_out.time_gran).
Steps to reproduce with e.g. UDF:
$ dd if=/dev/zero of=udfdisk count=10000 && mkudffs udfdisk
$ mkdir udf && mount udfdisk udf
$ touch udf/test && stat -c %y udf/test
2015-06-09 10:22:56.130006767 +0200
$ umount udf && mount udfdisk udf
$ stat -c %y udf/test
2015-06-09 10:22:56.130006000 +0200
Remounting truncates the mtime to 1 µs.
Fix the rounding in timespec_trunc() and update the documentation.
timespec_trunc() is exclusively used to calculate inode's [acm]time (mostly
via current_fs_time()), and always with super_block.s_time_gran as second
argument. So this can safely be changed without side effects.
Note: This does _not_ fix the issue for FAT's 2 second mtime resolution,
as super_block.s_time_gran isn't prepared to handle different ctime /
mtime / atime resolutions nor resolutions > 1 second.
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2015-06-25 20:13:55 +08:00
|
|
|
} else if (gran > 1 && gran < NSEC_PER_SEC) {
|
2005-04-17 06:20:36 +08:00
|
|
|
t.tv_nsec -= t.tv_nsec % gran;
|
time: Fix nanosecond file time rounding in timespec_trunc()
timespec_trunc() avoids rounding if granularity <= nanoseconds-per-jiffie
(or TICK_NSEC). This optimization assumes that:
1. current_kernel_time().tv_nsec is already rounded to TICK_NSEC (i.e.
with HZ=1000 you'd get 1000000, 2000000, 3000000... but never 1000001).
This is no longer true (probably since hrtimers introduced in 2.6.16).
2. TICK_NSEC is evenly divisible by all possible granularities. This may
be true for HZ=100, 250, 1000, but obviously not for HZ=300 /
TICK_NSEC=3333333 (introduced in 2.6.20).
Thus, sub-second portions of in-core file times are not rounded to on-disk
granularity. I.e. file times may change when the inode is re-read from disk
or when the file system is remounted.
This affects all file systems with file time granularities > 1 ns and < 1s,
e.g. CEPH (1000 ns), UDF (1000 ns), CIFS (100 ns), NTFS (100 ns) and FUSE
(configurable from user mode via struct fuse_init_out.time_gran).
Steps to reproduce with e.g. UDF:
$ dd if=/dev/zero of=udfdisk count=10000 && mkudffs udfdisk
$ mkdir udf && mount udfdisk udf
$ touch udf/test && stat -c %y udf/test
2015-06-09 10:22:56.130006767 +0200
$ umount udf && mount udfdisk udf
$ stat -c %y udf/test
2015-06-09 10:22:56.130006000 +0200
Remounting truncates the mtime to 1 µs.
Fix the rounding in timespec_trunc() and update the documentation.
timespec_trunc() is exclusively used to calculate inode's [acm]time (mostly
via current_fs_time()), and always with super_block.s_time_gran as second
argument. So this can safely be changed without side effects.
Note: This does _not_ fix the issue for FAT's 2 second mtime resolution,
as super_block.s_time_gran isn't prepared to handle different ctime /
mtime / atime resolutions nor resolutions > 1 second.
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2015-06-25 20:13:55 +08:00
|
|
|
} else {
|
|
|
|
WARN(1, "illegal file time granularity: %u", gran);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(timespec_trunc);
|
|
|
|
|
2014-11-18 19:15:18 +08:00
|
|
|
/*
|
|
|
|
* mktime64 - Converts date to seconds.
|
|
|
|
* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
|
2006-01-10 12:52:22 +08:00
|
|
|
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
|
|
|
|
* => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
|
|
|
|
*
|
|
|
|
* [For the Julian calendar (which was used in Russia before 1917,
|
|
|
|
* Britain & colonies before 1752, anywhere else before 1582,
|
|
|
|
* and is still in use by some communities) leave out the
|
|
|
|
* -year/100+year/400 terms, and add 10.]
|
|
|
|
*
|
|
|
|
* This algorithm was first published by Gauss (I think).
|
2016-02-24 22:37:53 +08:00
|
|
|
*
|
|
|
|
* A leap second can be indicated by calling this function with sec as
|
|
|
|
* 60 (allowable under ISO 8601). The leap second is treated the same
|
|
|
|
* as the following second since they don't exist in UNIX time.
|
|
|
|
*
|
|
|
|
* An encoding of midnight at the end of the day as 24:00:00 - ie. midnight
|
|
|
|
* tomorrow - (allowable under ISO 8601) is supported.
|
2006-01-10 12:52:22 +08:00
|
|
|
*/
|
2014-11-18 19:15:18 +08:00
|
|
|
time64_t mktime64(const unsigned int year0, const unsigned int mon0,
|
|
|
|
const unsigned int day, const unsigned int hour,
|
|
|
|
const unsigned int min, const unsigned int sec)
|
2006-01-10 12:52:22 +08:00
|
|
|
{
|
2006-01-10 12:52:23 +08:00
|
|
|
unsigned int mon = mon0, year = year0;
|
|
|
|
|
|
|
|
/* 1..12 -> 11,12,1..10 */
|
|
|
|
if (0 >= (int) (mon -= 2)) {
|
|
|
|
mon += 12; /* Puts Feb last since it has leap day */
|
2006-01-10 12:52:22 +08:00
|
|
|
year -= 1;
|
|
|
|
}
|
|
|
|
|
2014-11-18 19:15:18 +08:00
|
|
|
return ((((time64_t)
|
2006-01-10 12:52:22 +08:00
|
|
|
(year/4 - year/100 + year/400 + 367*mon/12 + day) +
|
|
|
|
year*365 - 719499
|
2016-02-24 22:37:53 +08:00
|
|
|
)*24 + hour /* now have hours - midnight tomorrow handled here */
|
2006-01-10 12:52:22 +08:00
|
|
|
)*60 + min /* now have minutes */
|
|
|
|
)*60 + sec; /* finally seconds */
|
|
|
|
}
|
2014-11-18 19:15:18 +08:00
|
|
|
EXPORT_SYMBOL(mktime64);
|
2006-01-10 12:52:24 +08:00
|
|
|
|
2006-01-10 12:52:22 +08:00
|
|
|
/**
|
|
|
|
* set_normalized_timespec - set timespec sec and nsec parts and normalize
|
|
|
|
*
|
|
|
|
* @ts: pointer to timespec variable to be set
|
|
|
|
* @sec: seconds to set
|
|
|
|
* @nsec: nanoseconds to set
|
|
|
|
*
|
|
|
|
* Set seconds and nanoseconds field of a timespec variable and
|
|
|
|
* normalize to the timespec storage format
|
|
|
|
*
|
|
|
|
* Note: The tv_nsec part is always in the range of
|
avoid overflows in kernel/time.c
When the conversion factor between jiffies and milli- or microseconds is
not a single multiply or divide, as for the case of HZ == 300, we currently
do a multiply followed by a divide. The intervening result, however, is
subject to overflows, especially since the fraction is not simplified (for
HZ == 300, we multiply by 300 and divide by 1000).
This is exposed to the user when passing a large timeout to poll(), for
example.
This patch replaces the multiply-divide with a reciprocal multiplication on
32-bit platforms. When the input is an unsigned long, there is no portable
way to do this on 64-bit platforms there is no portable way to do this
since it requires a 128-bit intermediate result (which gcc does support on
64-bit platforms but may generate libgcc calls, e.g. on 64-bit s390), but
since the output is a 32-bit integer in the cases affected, just simplify
the multiply-divide (*3/10 instead of *300/1000).
The reciprocal multiply used can have off-by-one errors in the upper half
of the valid output range. This could be avoided at the expense of having
to deal with a potential 65-bit intermediate result. Since the intent is
to avoid overflow problems and most of the other time conversions are only
semiexact, the off-by-one errors were considered an acceptable tradeoff.
At Ralf Baechle's suggestion, this version uses a Perl script to compute
the necessary constants. We already have dependencies on Perl for kernel
compiles. This does, however, require the Perl module Math::BigInt, which
is included in the standard Perl distribution starting with version 5.8.0.
In order to support older versions of Perl, include a table of canned
constants in the script itself, and structure the script so that
Math::BigInt isn't required if pulling values from said table.
Running the script requires that the HZ value is available from the
Makefile. Thus, this patch also adds the Kconfig variable CONFIG_HZ to the
architectures which didn't already have it (alpha, cris, frv, h8300, m32r,
m68k, m68knommu, sparc, v850, and xtensa.) It does *not* touch the sh or
sh64 architectures, since Paul Mundt has dealt with those separately in the
sh tree.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ralf Baechle <ralf@linux-mips.org>,
Cc: Sam Ravnborg <sam@ravnborg.org>,
Cc: Paul Mundt <lethal@linux-sh.org>,
Cc: Richard Henderson <rth@twiddle.net>,
Cc: Michael Starvik <starvik@axis.com>,
Cc: David Howells <dhowells@redhat.com>,
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>,
Cc: Hirokazu Takata <takata@linux-m32r.org>,
Cc: Geert Uytterhoeven <geert@linux-m68k.org>,
Cc: Roman Zippel <zippel@linux-m68k.org>,
Cc: William L. Irwin <sparclinux@vger.kernel.org>,
Cc: Chris Zankel <chris@zankel.net>,
Cc: H. Peter Anvin <hpa@zytor.com>,
Cc: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:26 +08:00
|
|
|
* 0 <= tv_nsec < NSEC_PER_SEC
|
2006-01-10 12:52:22 +08:00
|
|
|
* For negative values only the tv_sec field is negative !
|
|
|
|
*/
|
2009-09-15 05:37:40 +08:00
|
|
|
void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
|
2006-01-10 12:52:22 +08:00
|
|
|
{
|
|
|
|
while (nsec >= NSEC_PER_SEC) {
|
2009-09-15 05:37:40 +08:00
|
|
|
/*
|
|
|
|
* The following asm() prevents the compiler from
|
|
|
|
* optimising this loop into a modulo operation. See
|
|
|
|
* also __iter_div_u64_rem() in include/linux/time.h
|
|
|
|
*/
|
|
|
|
asm("" : "+rm"(nsec));
|
2006-01-10 12:52:22 +08:00
|
|
|
nsec -= NSEC_PER_SEC;
|
|
|
|
++sec;
|
|
|
|
}
|
|
|
|
while (nsec < 0) {
|
2009-09-15 05:37:40 +08:00
|
|
|
asm("" : "+rm"(nsec));
|
2006-01-10 12:52:22 +08:00
|
|
|
nsec += NSEC_PER_SEC;
|
|
|
|
--sec;
|
|
|
|
}
|
|
|
|
ts->tv_sec = sec;
|
|
|
|
ts->tv_nsec = nsec;
|
|
|
|
}
|
2008-04-22 10:45:12 +08:00
|
|
|
EXPORT_SYMBOL(set_normalized_timespec);
|
2006-01-10 12:52:22 +08:00
|
|
|
|
2006-01-10 12:52:30 +08:00
|
|
|
/**
|
|
|
|
* ns_to_timespec - Convert nanoseconds to timespec
|
|
|
|
* @nsec: the nanoseconds value to be converted
|
|
|
|
*
|
|
|
|
* Returns the timespec representation of the nsec parameter.
|
|
|
|
*/
|
2006-03-26 17:38:11 +08:00
|
|
|
struct timespec ns_to_timespec(const s64 nsec)
|
2006-01-10 12:52:30 +08:00
|
|
|
{
|
|
|
|
struct timespec ts;
|
2008-05-01 19:34:31 +08:00
|
|
|
s32 rem;
|
2006-01-10 12:52:30 +08:00
|
|
|
|
2006-02-03 19:04:20 +08:00
|
|
|
if (!nsec)
|
|
|
|
return (struct timespec) {0, 0};
|
|
|
|
|
2008-05-01 19:34:31 +08:00
|
|
|
ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
|
|
|
|
if (unlikely(rem < 0)) {
|
|
|
|
ts.tv_sec--;
|
|
|
|
rem += NSEC_PER_SEC;
|
|
|
|
}
|
|
|
|
ts.tv_nsec = rem;
|
2006-01-10 12:52:30 +08:00
|
|
|
|
|
|
|
return ts;
|
|
|
|
}
|
2007-03-25 12:35:33 +08:00
|
|
|
EXPORT_SYMBOL(ns_to_timespec);
|
2006-01-10 12:52:30 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* ns_to_timeval - Convert nanoseconds to timeval
|
|
|
|
* @nsec: the nanoseconds value to be converted
|
|
|
|
*
|
|
|
|
* Returns the timeval representation of the nsec parameter.
|
|
|
|
*/
|
2006-03-26 17:38:11 +08:00
|
|
|
struct timeval ns_to_timeval(const s64 nsec)
|
2006-01-10 12:52:30 +08:00
|
|
|
{
|
|
|
|
struct timespec ts = ns_to_timespec(nsec);
|
|
|
|
struct timeval tv;
|
|
|
|
|
|
|
|
tv.tv_sec = ts.tv_sec;
|
|
|
|
tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
|
|
|
|
|
|
|
|
return tv;
|
|
|
|
}
|
2007-04-20 07:16:32 +08:00
|
|
|
EXPORT_SYMBOL(ns_to_timeval);
|
2006-01-10 12:52:30 +08:00
|
|
|
|
2014-07-17 05:03:59 +08:00
|
|
|
#if BITS_PER_LONG == 32
|
|
|
|
/**
|
|
|
|
* set_normalized_timespec - set timespec sec and nsec parts and normalize
|
|
|
|
*
|
|
|
|
* @ts: pointer to timespec variable to be set
|
|
|
|
* @sec: seconds to set
|
|
|
|
* @nsec: nanoseconds to set
|
|
|
|
*
|
|
|
|
* Set seconds and nanoseconds field of a timespec variable and
|
|
|
|
* normalize to the timespec storage format
|
|
|
|
*
|
|
|
|
* Note: The tv_nsec part is always in the range of
|
|
|
|
* 0 <= tv_nsec < NSEC_PER_SEC
|
|
|
|
* For negative values only the tv_sec field is negative !
|
|
|
|
*/
|
|
|
|
void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
|
|
|
|
{
|
|
|
|
while (nsec >= NSEC_PER_SEC) {
|
|
|
|
/*
|
|
|
|
* The following asm() prevents the compiler from
|
|
|
|
* optimising this loop into a modulo operation. See
|
|
|
|
* also __iter_div_u64_rem() in include/linux/time.h
|
|
|
|
*/
|
|
|
|
asm("" : "+rm"(nsec));
|
|
|
|
nsec -= NSEC_PER_SEC;
|
|
|
|
++sec;
|
|
|
|
}
|
|
|
|
while (nsec < 0) {
|
|
|
|
asm("" : "+rm"(nsec));
|
|
|
|
nsec += NSEC_PER_SEC;
|
|
|
|
--sec;
|
|
|
|
}
|
|
|
|
ts->tv_sec = sec;
|
|
|
|
ts->tv_nsec = nsec;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(set_normalized_timespec64);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ns_to_timespec64 - Convert nanoseconds to timespec64
|
|
|
|
* @nsec: the nanoseconds value to be converted
|
|
|
|
*
|
|
|
|
* Returns the timespec64 representation of the nsec parameter.
|
|
|
|
*/
|
|
|
|
struct timespec64 ns_to_timespec64(const s64 nsec)
|
|
|
|
{
|
|
|
|
struct timespec64 ts;
|
|
|
|
s32 rem;
|
|
|
|
|
|
|
|
if (!nsec)
|
|
|
|
return (struct timespec64) {0, 0};
|
|
|
|
|
|
|
|
ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
|
|
|
|
if (unlikely(rem < 0)) {
|
|
|
|
ts.tv_sec--;
|
|
|
|
rem += NSEC_PER_SEC;
|
|
|
|
}
|
|
|
|
ts.tv_nsec = rem;
|
|
|
|
|
|
|
|
return ts;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ns_to_timespec64);
|
|
|
|
#endif
|
2015-05-18 20:19:13 +08:00
|
|
|
/**
|
|
|
|
* msecs_to_jiffies: - convert milliseconds to jiffies
|
|
|
|
* @m: time in milliseconds
|
|
|
|
*
|
|
|
|
* conversion is done as follows:
|
2007-02-16 17:27:28 +08:00
|
|
|
*
|
|
|
|
* - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
|
|
|
|
*
|
|
|
|
* - 'too large' values [that would result in larger than
|
|
|
|
* MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
|
|
|
|
*
|
|
|
|
* - all other values are converted to jiffies by either multiplying
|
2015-05-18 20:19:13 +08:00
|
|
|
* the input value by a factor or dividing it with a factor and
|
|
|
|
* handling any 32-bit overflows.
|
|
|
|
* for the details see __msecs_to_jiffies()
|
2007-02-16 17:27:28 +08:00
|
|
|
*
|
2015-05-18 20:19:13 +08:00
|
|
|
* msecs_to_jiffies() checks for the passed in value being a constant
|
|
|
|
* via __builtin_constant_p() allowing gcc to eliminate most of the
|
|
|
|
* code, __msecs_to_jiffies() is called if the value passed does not
|
|
|
|
* allow constant folding and the actual conversion must be done at
|
|
|
|
* runtime.
|
|
|
|
* the _msecs_to_jiffies helpers are the HZ dependent conversion
|
|
|
|
* routines found in include/linux/jiffies.h
|
2007-02-16 17:27:28 +08:00
|
|
|
*/
|
2015-05-18 20:19:13 +08:00
|
|
|
unsigned long __msecs_to_jiffies(const unsigned int m)
|
2007-02-16 17:27:27 +08:00
|
|
|
{
|
2007-02-16 17:27:28 +08:00
|
|
|
/*
|
|
|
|
* Negative value, means infinite timeout:
|
|
|
|
*/
|
|
|
|
if ((int)m < 0)
|
2007-02-16 17:27:27 +08:00
|
|
|
return MAX_JIFFY_OFFSET;
|
2015-05-18 20:19:13 +08:00
|
|
|
return _msecs_to_jiffies(m);
|
2007-02-16 17:27:27 +08:00
|
|
|
}
|
2015-05-18 20:19:13 +08:00
|
|
|
EXPORT_SYMBOL(__msecs_to_jiffies);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
2015-05-29 01:09:55 +08:00
|
|
|
unsigned long __usecs_to_jiffies(const unsigned int u)
|
2007-02-16 17:27:27 +08:00
|
|
|
{
|
|
|
|
if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
|
|
|
|
return MAX_JIFFY_OFFSET;
|
2015-05-29 01:09:55 +08:00
|
|
|
return _usecs_to_jiffies(u);
|
2007-02-16 17:27:27 +08:00
|
|
|
}
|
2015-05-29 01:09:55 +08:00
|
|
|
EXPORT_SYMBOL(__usecs_to_jiffies);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The TICK_NSEC - 1 rounds up the value to the next resolution. Note
|
|
|
|
* that a remainder subtract here would not do the right thing as the
|
|
|
|
* resolution values don't fall on second boundries. I.e. the line:
|
|
|
|
* nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
* Note that due to the small error in the multiplier here, this
|
|
|
|
* rounding is incorrect for sufficiently large values of tv_nsec, but
|
|
|
|
* well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
|
|
|
|
* OK.
|
2007-02-16 17:27:27 +08:00
|
|
|
*
|
|
|
|
* Rather, we just shift the bits off the right.
|
|
|
|
*
|
|
|
|
* The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
|
|
|
|
* value to a scaled second value.
|
|
|
|
*/
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
static unsigned long
|
2015-07-29 20:18:31 +08:00
|
|
|
__timespec64_to_jiffies(u64 sec, long nsec)
|
2007-02-16 17:27:27 +08:00
|
|
|
{
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
nsec = nsec + TICK_NSEC - 1;
|
2007-02-16 17:27:27 +08:00
|
|
|
|
|
|
|
if (sec >= MAX_SEC_IN_JIFFIES){
|
|
|
|
sec = MAX_SEC_IN_JIFFIES;
|
|
|
|
nsec = 0;
|
|
|
|
}
|
2015-07-29 20:18:31 +08:00
|
|
|
return ((sec * SEC_CONVERSION) +
|
2007-02-16 17:27:27 +08:00
|
|
|
(((u64)nsec * NSEC_CONVERSION) >>
|
|
|
|
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
|
|
|
|
|
|
|
|
}
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
|
2015-07-29 20:18:31 +08:00
|
|
|
static unsigned long
|
|
|
|
__timespec_to_jiffies(unsigned long sec, long nsec)
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
{
|
2015-07-29 20:18:31 +08:00
|
|
|
return __timespec64_to_jiffies((u64)sec, nsec);
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
}
|
|
|
|
|
2015-07-29 20:18:31 +08:00
|
|
|
unsigned long
|
|
|
|
timespec64_to_jiffies(const struct timespec64 *value)
|
|
|
|
{
|
|
|
|
return __timespec64_to_jiffies(value->tv_sec, value->tv_nsec);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(timespec64_to_jiffies);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
|
|
|
void
|
2015-07-29 20:18:31 +08:00
|
|
|
jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value)
|
2007-02-16 17:27:27 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Convert jiffies to nanoseconds and separate with
|
|
|
|
* one divide.
|
|
|
|
*/
|
2008-05-01 19:34:31 +08:00
|
|
|
u32 rem;
|
|
|
|
value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
|
|
|
|
NSEC_PER_SEC, &rem);
|
|
|
|
value->tv_nsec = rem;
|
2007-02-16 17:27:27 +08:00
|
|
|
}
|
2015-07-29 20:18:31 +08:00
|
|
|
EXPORT_SYMBOL(jiffies_to_timespec64);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
/*
|
|
|
|
* We could use a similar algorithm to timespec_to_jiffies (with a
|
|
|
|
* different multiplier for usec instead of nsec). But this has a
|
|
|
|
* problem with rounding: we can't exactly add TICK_NSEC - 1 to the
|
|
|
|
* usec value, since it's not necessarily integral.
|
|
|
|
*
|
|
|
|
* We could instead round in the intermediate scaled representation
|
|
|
|
* (i.e. in units of 1/2^(large scale) jiffies) but that's also
|
|
|
|
* perilous: the scaling introduces a small positive error, which
|
|
|
|
* combined with a division-rounding-upward (i.e. adding 2^(scale) - 1
|
|
|
|
* units to the intermediate before shifting) leads to accidental
|
|
|
|
* overflow and overestimates.
|
2007-02-16 17:27:27 +08:00
|
|
|
*
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
* At the cost of one additional multiplication by a constant, just
|
|
|
|
* use the timespec implementation.
|
2007-02-16 17:27:27 +08:00
|
|
|
*/
|
|
|
|
unsigned long
|
|
|
|
timeval_to_jiffies(const struct timeval *value)
|
|
|
|
{
|
jiffies: Fix timeval conversion to jiffies
timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:
setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);
would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.) Doing
this repeatedly would cause unbounded growth in val. So fix the math.
Here's what was wrong with the conversion: we essentially computed
(eliding seconds)
jiffies = usec * (NSEC_PER_USEC/TICK_NSEC)
by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:
jiffies = (usec * x) >> USEC_JIFFIE_SC
and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)
In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.
We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies. This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.
Tested: the following program:
int main() {
struct itimerval zero = {{0, 0}, {0, 0}};
/* Initially set to 10 ms. */
struct itimerval initial = zero;
initial.it_interval.tv_usec = 10000;
setitimer(ITIMER_PROF, &initial, NULL);
/* Save and restore several times. */
for (size_t i = 0; i < 10; ++i) {
struct itimerval prev;
setitimer(ITIMER_PROF, &zero, &prev);
/* on old kernels, this goes up by TICK_USEC every iteration */
printf("previous value: %ld %ld %ld %ld\n",
prev.it_interval.tv_sec, prev.it_interval.tv_usec,
prev.it_value.tv_sec, prev.it_value.tv_usec);
setitimer(ITIMER_PROF, &prev, NULL);
}
return 0;
}
Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2014-09-05 05:17:16 +08:00
|
|
|
return __timespec_to_jiffies(value->tv_sec,
|
|
|
|
value->tv_usec * NSEC_PER_USEC);
|
2007-02-16 17:27:27 +08:00
|
|
|
}
|
2007-04-05 04:20:54 +08:00
|
|
|
EXPORT_SYMBOL(timeval_to_jiffies);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
|
|
|
void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Convert jiffies to nanoseconds and separate with
|
|
|
|
* one divide.
|
|
|
|
*/
|
2008-05-01 19:34:31 +08:00
|
|
|
u32 rem;
|
2007-02-16 17:27:27 +08:00
|
|
|
|
2008-05-01 19:34:31 +08:00
|
|
|
value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
|
|
|
|
NSEC_PER_SEC, &rem);
|
|
|
|
value->tv_usec = rem / NSEC_PER_USEC;
|
2007-02-16 17:27:27 +08:00
|
|
|
}
|
2007-04-05 04:20:54 +08:00
|
|
|
EXPORT_SYMBOL(jiffies_to_timeval);
|
2007-02-16 17:27:27 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert jiffies/jiffies_64 to clock_t and back.
|
|
|
|
*/
|
2011-09-21 04:53:39 +08:00
|
|
|
clock_t jiffies_to_clock_t(unsigned long x)
|
2007-02-16 17:27:27 +08:00
|
|
|
{
|
|
|
|
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
|
2008-02-06 17:38:04 +08:00
|
|
|
# if HZ < USER_HZ
|
|
|
|
return x * (USER_HZ / HZ);
|
|
|
|
# else
|
2007-02-16 17:27:27 +08:00
|
|
|
return x / (HZ / USER_HZ);
|
2008-02-06 17:38:04 +08:00
|
|
|
# endif
|
2007-02-16 17:27:27 +08:00
|
|
|
#else
|
2008-05-01 19:34:26 +08:00
|
|
|
return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
|
2007-02-16 17:27:27 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(jiffies_to_clock_t);
|
|
|
|
|
|
|
|
unsigned long clock_t_to_jiffies(unsigned long x)
|
|
|
|
{
|
|
|
|
#if (HZ % USER_HZ)==0
|
|
|
|
if (x >= ~0UL / (HZ / USER_HZ))
|
|
|
|
return ~0UL;
|
|
|
|
return x * (HZ / USER_HZ);
|
|
|
|
#else
|
|
|
|
/* Don't worry about loss of precision here .. */
|
|
|
|
if (x >= ~0UL / HZ * USER_HZ)
|
|
|
|
return ~0UL;
|
|
|
|
|
|
|
|
/* .. but do try to contain it here */
|
2008-05-01 19:34:26 +08:00
|
|
|
return div_u64((u64)x * HZ, USER_HZ);
|
2007-02-16 17:27:27 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(clock_t_to_jiffies);
|
|
|
|
|
|
|
|
u64 jiffies_64_to_clock_t(u64 x)
|
|
|
|
{
|
|
|
|
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
|
2008-02-06 17:38:04 +08:00
|
|
|
# if HZ < USER_HZ
|
2008-05-01 19:34:26 +08:00
|
|
|
x = div_u64(x * USER_HZ, HZ);
|
2008-02-06 17:38:06 +08:00
|
|
|
# elif HZ > USER_HZ
|
2008-05-01 19:34:26 +08:00
|
|
|
x = div_u64(x, HZ / USER_HZ);
|
2008-02-06 17:38:06 +08:00
|
|
|
# else
|
|
|
|
/* Nothing to do */
|
2008-02-06 17:38:04 +08:00
|
|
|
# endif
|
2007-02-16 17:27:27 +08:00
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* There are better ways that don't overflow early,
|
|
|
|
* but even this doesn't overflow in hundreds of years
|
|
|
|
* in 64 bits, so..
|
|
|
|
*/
|
2008-05-01 19:34:26 +08:00
|
|
|
x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
|
2007-02-16 17:27:27 +08:00
|
|
|
#endif
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(jiffies_64_to_clock_t);
|
|
|
|
|
|
|
|
u64 nsec_to_clock_t(u64 x)
|
|
|
|
{
|
|
|
|
#if (NSEC_PER_SEC % USER_HZ) == 0
|
2008-05-01 19:34:26 +08:00
|
|
|
return div_u64(x, NSEC_PER_SEC / USER_HZ);
|
2007-02-16 17:27:27 +08:00
|
|
|
#elif (USER_HZ % 512) == 0
|
2008-05-01 19:34:26 +08:00
|
|
|
return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
|
2007-02-16 17:27:27 +08:00
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
|
|
|
|
* overflow after 64.99 years.
|
|
|
|
* exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
|
|
|
|
*/
|
2008-05-01 19:34:26 +08:00
|
|
|
return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
|
2007-02-16 17:27:27 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2009-11-26 13:49:27 +08:00
|
|
|
/**
|
2010-12-22 09:09:01 +08:00
|
|
|
* nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
|
2009-11-26 13:49:27 +08:00
|
|
|
*
|
|
|
|
* @n: nsecs in u64
|
|
|
|
*
|
|
|
|
* Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
|
|
|
|
* And this doesn't return MAX_JIFFY_OFFSET since this function is designed
|
|
|
|
* for scheduler, not for use in device drivers to calculate timeout value.
|
|
|
|
*
|
|
|
|
* note:
|
|
|
|
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
|
|
|
|
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
|
|
|
|
*/
|
2010-12-22 09:09:01 +08:00
|
|
|
u64 nsecs_to_jiffies64(u64 n)
|
2009-11-26 13:49:27 +08:00
|
|
|
{
|
|
|
|
#if (NSEC_PER_SEC % HZ) == 0
|
|
|
|
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
|
|
|
|
return div_u64(n, NSEC_PER_SEC / HZ);
|
|
|
|
#elif (HZ % 512) == 0
|
|
|
|
/* overflow after 292 years if HZ = 1024 */
|
|
|
|
return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* Generic case - optimized for cases where HZ is a multiple of 3.
|
|
|
|
* overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
|
|
|
|
*/
|
|
|
|
return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
|
|
|
|
#endif
|
|
|
|
}
|
2014-12-04 18:12:54 +08:00
|
|
|
EXPORT_SYMBOL(nsecs_to_jiffies64);
|
2009-11-26 13:49:27 +08:00
|
|
|
|
2010-12-22 09:09:01 +08:00
|
|
|
/**
|
|
|
|
* nsecs_to_jiffies - Convert nsecs in u64 to jiffies
|
|
|
|
*
|
|
|
|
* @n: nsecs in u64
|
|
|
|
*
|
|
|
|
* Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
|
|
|
|
* And this doesn't return MAX_JIFFY_OFFSET since this function is designed
|
|
|
|
* for scheduler, not for use in device drivers to calculate timeout value.
|
|
|
|
*
|
|
|
|
* note:
|
|
|
|
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
|
|
|
|
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
|
|
|
|
*/
|
|
|
|
unsigned long nsecs_to_jiffies(u64 n)
|
|
|
|
{
|
|
|
|
return (unsigned long)nsecs_to_jiffies64(n);
|
|
|
|
}
|
2014-07-17 05:04:31 +08:00
|
|
|
EXPORT_SYMBOL_GPL(nsecs_to_jiffies);
|
2010-12-22 09:09:01 +08:00
|
|
|
|
2008-08-31 23:09:53 +08:00
|
|
|
/*
|
|
|
|
* Add two timespec values and do a safety check for overflow.
|
|
|
|
* It's assumed that both values are valid (>= 0)
|
|
|
|
*/
|
|
|
|
struct timespec timespec_add_safe(const struct timespec lhs,
|
|
|
|
const struct timespec rhs)
|
|
|
|
{
|
|
|
|
struct timespec res;
|
|
|
|
|
|
|
|
set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
|
|
|
|
lhs.tv_nsec + rhs.tv_nsec);
|
|
|
|
|
|
|
|
if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
|
|
|
|
res.tv_sec = TIME_T_MAX;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|