x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO
Newer KVM won't be exposing PVCLOCK_COUNTS_FROM_ZERO anymore. The purpose of that flags was to start counting system time from 0 when the KVM clock has been initialized. We can achieve the same by selecting one read as the initial point. A simple subtraction will work unless the KVM clock count overflows earlier (has smaller width) than scheduler's cycle count. We should be safe till x86_128. Because PVCLOCK_COUNTS_FROM_ZERO was enabled only on new hypervisors, setting sched clock as stable based on PVCLOCK_TSC_STABLE_BIT might regress on older ones. I presume we don't need to change kvm_clock_read instead of introducing kvm_sched_clock_read. A problem could arise in case sched_clock is expected to return the same value as get_cycles, but we should have merged those clocks in that case. Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
1cea0ce68e
commit
72c930dcfc
|
@ -32,6 +32,7 @@
|
|||
static int kvmclock = 1;
|
||||
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
|
||||
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
|
||||
static cycle_t kvm_sched_clock_offset;
|
||||
|
||||
static int parse_no_kvmclock(char *arg)
|
||||
{
|
||||
|
@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
|
|||
return kvm_clock_read();
|
||||
}
|
||||
|
||||
static cycle_t kvm_sched_clock_read(void)
|
||||
{
|
||||
return kvm_clock_read() - kvm_sched_clock_offset;
|
||||
}
|
||||
|
||||
static inline void kvm_sched_clock_init(bool stable)
|
||||
{
|
||||
if (!stable) {
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_sched_clock_offset = kvm_clock_read();
|
||||
pv_time_ops.sched_clock = kvm_sched_clock_read;
|
||||
set_sched_clock_stable();
|
||||
|
||||
printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
|
||||
kvm_sched_clock_offset);
|
||||
|
||||
BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
|
||||
sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't do that, there is the possibility that the guest
|
||||
* will calibrate under heavy load - thus, getting a lower lpj -
|
||||
|
@ -248,7 +272,17 @@ void __init kvmclock_init(void)
|
|||
memblock_free(mem, size);
|
||||
return;
|
||||
}
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||
|
||||
cpu = get_cpu();
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
|
||||
kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
|
||||
put_cpu();
|
||||
|
||||
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
|
||||
x86_platform.get_wallclock = kvm_get_wallclock;
|
||||
x86_platform.set_wallclock = kvm_set_wallclock;
|
||||
|
@ -265,16 +299,6 @@ void __init kvmclock_init(void)
|
|||
kvm_get_preset_lpj();
|
||||
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
|
||||
pv_info.name = "KVM";
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(~0);
|
||||
|
||||
cpu = get_cpu();
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
if (flags & PVCLOCK_COUNTS_FROM_ZERO)
|
||||
set_sched_clock_stable();
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
|
|
Loading…
Reference in New Issue