x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The "unsigned seq" would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Link: http://lkml.kernel.org/r/1395094933-14252-11-git-send-email-stefani@seibold.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
Stefani Seibold 2014-03-17 23:22:10 +01:00 committed by H. Peter Anvin
parent 7a59ed415f
commit 7c03156f34
5 changed files with 155 additions and 67 deletions

View File

@ -1,30 +1,73 @@
#ifndef _ASM_X86_VGTOD_H #ifndef _ASM_X86_VGTOD_H
#define _ASM_X86_VGTOD_H #define _ASM_X86_VGTOD_H
#include <asm/vsyscall.h> #include <linux/compiler.h>
#include <linux/clocksource.h> #include <linux/clocksource.h>
#ifdef BUILD_VDSO32_64
typedef u64 gtod_long_t;
#else
typedef unsigned long gtod_long_t;
#endif
/*
* vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
* so be carefull by modifying this structure.
*/
struct vsyscall_gtod_data { struct vsyscall_gtod_data {
seqcount_t seq; unsigned seq;
struct { /* extract of a clocksource struct */ int vclock_mode;
int vclock_mode; cycle_t cycle_last;
cycle_t cycle_last; cycle_t mask;
cycle_t mask; u32 mult;
u32 mult; u32 shift;
u32 shift;
} clock;
/* open coded 'struct timespec' */ /* open coded 'struct timespec' */
time_t wall_time_sec;
u64 wall_time_snsec; u64 wall_time_snsec;
gtod_long_t wall_time_sec;
gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec; u64 monotonic_time_snsec;
time_t monotonic_time_sec; gtod_long_t wall_time_coarse_sec;
gtod_long_t wall_time_coarse_nsec;
gtod_long_t monotonic_time_coarse_sec;
gtod_long_t monotonic_time_coarse_nsec;
struct timezone sys_tz; int tz_minuteswest;
struct timespec wall_time_coarse; int tz_dsttime;
struct timespec monotonic_time_coarse;
}; };
extern struct vsyscall_gtod_data vsyscall_gtod_data; extern struct vsyscall_gtod_data vsyscall_gtod_data;
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;
repeat:
ret = ACCESS_ONCE(s->seq);
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
smp_rmb();
return ret;
}
static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
unsigned start)
{
smp_rmb();
return unlikely(s->seq != start);
}
static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
{
++s->seq;
smp_wmb();
}
static inline void gtod_write_end(struct vsyscall_gtod_data *s)
{
smp_wmb();
++s->seq;
}
#endif /* _ASM_X86_VGTOD_H */ #endif /* _ASM_X86_VGTOD_H */

View File

@ -16,6 +16,9 @@
* you mess up, the linker will catch it.) * you mess up, the linker will catch it.)
*/ */
#ifndef _ASM_X86_VVAR_H
#define _ASM_X86_VVAR_H
#if defined(__VVAR_KERNEL_LDS) #if defined(__VVAR_KERNEL_LDS)
/* The kernel linker script defines its own magic to put vvars in the /* The kernel linker script defines its own magic to put vvars in the
@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
#undef DECLARE_VVAR #undef DECLARE_VVAR
#endif

View File

@ -4,6 +4,7 @@
* *
* Modified for x86 32 bit architecture by * Modified for x86 32 bit architecture by
* Stefani Seibold <stefani@seibold.net> * Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
* *
* Thanks to hpa@transmeta.com for some useful hint. * Thanks to hpa@transmeta.com for some useful hint.
* Special thanks to Ingo Molnar for his early experience with * Special thanks to Ingo Molnar for his early experience with
@ -13,26 +14,28 @@
#include <linux/timekeeper_internal.h> #include <linux/timekeeper_internal.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
#include <asm/vvar.h>
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void) void update_vsyscall_tz(void)
{ {
vsyscall_gtod_data.sys_tz = sys_tz; vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
} }
void update_vsyscall(struct timekeeper *tk) void update_vsyscall(struct timekeeper *tk)
{ {
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
write_seqcount_begin(&vdata->seq); gtod_write_begin(vdata);
/* copy vsyscall data */ /* copy vsyscall data */
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; vdata->vclock_mode = tk->clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->clock->cycle_last; vdata->cycle_last = tk->clock->cycle_last;
vdata->clock.mask = tk->clock->mask; vdata->mask = tk->clock->mask;
vdata->clock.mult = tk->mult; vdata->mult = tk->mult;
vdata->clock.shift = tk->shift; vdata->shift = tk->shift;
vdata->wall_time_sec = tk->xtime_sec; vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->xtime_nsec; vdata->wall_time_snsec = tk->xtime_nsec;
@ -49,11 +52,18 @@ void update_vsyscall(struct timekeeper *tk)
vdata->monotonic_time_sec++; vdata->monotonic_time_sec++;
} }
vdata->wall_time_coarse.tv_sec = tk->xtime_sec; vdata->wall_time_coarse_sec = tk->xtime_sec;
vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift);
vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, vdata->monotonic_time_coarse_sec =
tk->wall_to_monotonic); vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_coarse_nsec =
vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
write_seqcount_end(&vdata->seq); while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
vdata->monotonic_time_coarse_sec++;
}
gtod_write_end(vdata);
} }

View File

@ -14,16 +14,14 @@
/* Disable profiling for userspace code: */ /* Disable profiling for userspace code: */
#define DISABLE_BRANCH_PROFILING #define DISABLE_BRANCH_PROFILING
#include <linux/kernel.h>
#include <uapi/linux/time.h> #include <uapi/linux/time.h>
#include <linux/string.h>
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
#include <asm/hpet.h> #include <asm/hpet.h>
#include <asm/vvar.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/io.h> #include <asm/msr.h>
#include <asm/pvclock.h> #include <linux/math64.h>
#include <linux/time.h>
#define gtod (&VVAR(vsyscall_gtod_data)) #define gtod (&VVAR(vsyscall_gtod_data))
@ -31,11 +29,23 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t); extern time_t __vdso_time(time_t *t);
#ifdef CONFIG_HPET_TIMER
static inline u32 read_hpet_counter(const volatile void *addr)
{
return *(const volatile u32 *) (addr + HPET_COUNTER);
}
#endif
#ifndef BUILD_VDSO32 #ifndef BUILD_VDSO32
#include <linux/kernel.h>
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>
static notrace cycle_t vread_hpet(void) static notrace cycle_t vread_hpet(void)
{ {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
} }
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@ -116,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE; *mode = VCLOCK_NONE;
/* refer to tsc.c read_tsc() comment for rationale */ /* refer to tsc.c read_tsc() comment for rationale */
last = gtod->clock.cycle_last; last = gtod->cycle_last;
if (likely(ret >= last)) if (likely(ret >= last))
return ret; return ret;
@ -133,7 +143,7 @@ extern u8 hpet_page
#ifdef CONFIG_HPET_TIMER #ifdef CONFIG_HPET_TIMER
static notrace cycle_t vread_hpet(void) static notrace cycle_t vread_hpet(void)
{ {
return readl((const void __iomem *)(&hpet_page + HPET_COUNTER)); return read_hpet_counter((const void *)(&hpet_page));
} }
#endif #endif
@ -193,7 +203,7 @@ notrace static cycle_t vread_tsc(void)
rdtsc_barrier(); rdtsc_barrier();
ret = (cycle_t)__native_read_tsc(); ret = (cycle_t)__native_read_tsc();
last = gtod->clock.cycle_last; last = gtod->cycle_last;
if (likely(ret >= last)) if (likely(ret >= last))
return ret; return ret;
@ -214,20 +224,21 @@ notrace static inline u64 vgetsns(int *mode)
{ {
u64 v; u64 v;
cycles_t cycles; cycles_t cycles;
if (gtod->clock.vclock_mode == VCLOCK_TSC)
if (gtod->vclock_mode == VCLOCK_TSC)
cycles = vread_tsc(); cycles = vread_tsc();
#ifdef CONFIG_HPET_TIMER #ifdef CONFIG_HPET_TIMER
else if (gtod->clock.vclock_mode == VCLOCK_HPET) else if (gtod->vclock_mode == VCLOCK_HPET)
cycles = vread_hpet(); cycles = vread_hpet();
#endif #endif
#ifdef CONFIG_PARAVIRT_CLOCK #ifdef CONFIG_PARAVIRT_CLOCK
else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode); cycles = vread_pvclock(mode);
#endif #endif
else else
return 0; return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; v = (cycles - gtod->cycle_last) & gtod->mask;
return v * gtod->clock.mult; return v * gtod->mult;
} }
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
@ -237,17 +248,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
u64 ns; u64 ns;
int mode; int mode;
ts->tv_nsec = 0;
do { do {
seq = raw_read_seqcount_begin(&gtod->seq); seq = gtod_read_begin(gtod);
mode = gtod->clock.vclock_mode; mode = gtod->vclock_mode;
ts->tv_sec = gtod->wall_time_sec; ts->tv_sec = gtod->wall_time_sec;
ns = gtod->wall_time_snsec; ns = gtod->wall_time_snsec;
ns += vgetsns(&mode); ns += vgetsns(&mode);
ns >>= gtod->clock.shift; ns >>= gtod->shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq))); } while (unlikely(gtod_read_retry(gtod, seq)));
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
timespec_add_ns(ts, ns);
return mode; return mode;
} }
@ -257,16 +269,17 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
u64 ns; u64 ns;
int mode; int mode;
ts->tv_nsec = 0;
do { do {
seq = raw_read_seqcount_begin(&gtod->seq); seq = gtod_read_begin(gtod);
mode = gtod->clock.vclock_mode; mode = gtod->vclock_mode;
ts->tv_sec = gtod->monotonic_time_sec; ts->tv_sec = gtod->monotonic_time_sec;
ns = gtod->monotonic_time_snsec; ns = gtod->monotonic_time_snsec;
ns += vgetsns(&mode); ns += vgetsns(&mode);
ns >>= gtod->clock.shift; ns >>= gtod->shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq))); } while (unlikely(gtod_read_retry(gtod, seq)));
timespec_add_ns(ts, ns);
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return mode; return mode;
} }
@ -275,20 +288,20 @@ notrace static void do_realtime_coarse(struct timespec *ts)
{ {
unsigned long seq; unsigned long seq;
do { do {
seq = raw_read_seqcount_begin(&gtod->seq); seq = gtod_read_begin(gtod);
ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_sec = gtod->wall_time_coarse_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; ts->tv_nsec = gtod->wall_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq))); } while (unlikely(gtod_read_retry(gtod, seq)));
} }
notrace static void do_monotonic_coarse(struct timespec *ts) notrace static void do_monotonic_coarse(struct timespec *ts)
{ {
unsigned long seq; unsigned long seq;
do { do {
seq = raw_read_seqcount_begin(&gtod->seq); seq = gtod_read_begin(gtod);
ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_sec = gtod->monotonic_time_coarse_sec;
ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq))); } while (unlikely(gtod_read_retry(gtod, seq)));
} }
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
@ -322,17 +335,13 @@ int clock_gettime(clockid_t, struct timespec *)
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{ {
if (likely(tv != NULL)) { if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
offsetof(struct timespec, tv_nsec) ||
sizeof(*tv) != sizeof(struct timespec));
if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
return vdso_fallback_gtod(tv, tz); return vdso_fallback_gtod(tv, tz);
tv->tv_usec /= 1000; tv->tv_usec /= 1000;
} }
if (unlikely(tz != NULL)) { if (unlikely(tz != NULL)) {
/* Avoid memcpy. Some old compilers fail to inline it */ tz->tz_minuteswest = gtod->tz_minuteswest;
tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; tz->tz_dsttime = gtod->tz_dsttime;
tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
} }
return 0; return 0;

View File

@ -6,4 +6,25 @@
#undef CONFIG_X86_PPRO_FENCE #undef CONFIG_X86_PPRO_FENCE
#ifdef CONFIG_X86_64
/*
* in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
* configuration
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
#define CONFIG_X86_32 1
#define CONFIG_PAGE_OFFSET 0
#define CONFIG_ILLEGAL_POINTER_VALUE 0
#define CONFIG_NR_CPUS 1
#define BUILD_VDSO32_64
#endif
#include "../vclock_gettime.c" #include "../vclock_gettime.c"