x86/kvm: Pass stable clocksource to guests when running nested on Hyper-V
Currently, KVM is able to work in 'masterclock' mode passing PVCLOCK_TSC_STABLE_BIT to guests when the clocksource which is used on the host is TSC. When running nested on Hyper-V the guest normally uses a different one: TSC page which is resistant to TSC frequency changes on events like L1 migration. Add support for it in KVM. The only non-trivial change is in vgettsc(): when updating the gtod copy both the clock readout and tsc value have to be updated now. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Stephen Hemminger <sthemmin@microsoft.com> Cc: kvm@vger.kernel.org Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: "Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com> Cc: Roman Kagan <rkagan@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: devel@linuxdriverproject.org Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Cathy Avery <cavery@redhat.com> Cc: Mohammed Gamal <mmorsy@redhat.com> Link: https://lkml.kernel.org/r/20180124132337.30138-7-vkuznets@redhat.com
This commit is contained in:
parent
51d4e5daa3
commit
b0c39dc68e
|
@ -67,6 +67,7 @@
|
||||||
#include <asm/pvclock.h>
|
#include <asm/pvclock.h>
|
||||||
#include <asm/div64.h>
|
#include <asm/div64.h>
|
||||||
#include <asm/irq_remapping.h>
|
#include <asm/irq_remapping.h>
|
||||||
|
#include <asm/mshyperv.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
@ -1377,6 +1378,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
||||||
return tsc;
|
return tsc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int gtod_is_based_on_tsc(int mode)
|
||||||
|
{
|
||||||
|
return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
@ -1396,7 +1402,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||||
* perform request to enable masterclock.
|
* perform request to enable masterclock.
|
||||||
*/
|
*/
|
||||||
if (ka->use_master_clock ||
|
if (ka->use_master_clock ||
|
||||||
(gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
|
(gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
|
||||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||||
|
|
||||||
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
|
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
|
||||||
|
@ -1459,6 +1465,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||||
vcpu->arch.tsc_offset = offset;
|
vcpu->arch.tsc_offset = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool kvm_check_tsc_unstable(void)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/*
|
||||||
|
* TSC is marked unstable when we're running on Hyper-V,
|
||||||
|
* 'TSC page' clocksource is good.
|
||||||
|
*/
|
||||||
|
if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
return check_tsc_unstable();
|
||||||
|
}
|
||||||
|
|
||||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||||
{
|
{
|
||||||
struct kvm *kvm = vcpu->kvm;
|
struct kvm *kvm = vcpu->kvm;
|
||||||
|
@ -1504,7 +1523,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||||
*/
|
*/
|
||||||
if (synchronizing &&
|
if (synchronizing &&
|
||||||
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
|
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
|
||||||
if (!check_tsc_unstable()) {
|
if (!kvm_check_tsc_unstable()) {
|
||||||
offset = kvm->arch.cur_tsc_offset;
|
offset = kvm->arch.cur_tsc_offset;
|
||||||
pr_debug("kvm: matched tsc offset for %llu\n", data);
|
pr_debug("kvm: matched tsc offset for %llu\n", data);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1604,18 +1623,43 @@ static u64 read_tsc(void)
|
||||||
return last;
|
return last;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 vgettsc(u64 *cycle_now)
|
static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
|
||||||
{
|
{
|
||||||
long v;
|
long v;
|
||||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||||
|
u64 tsc_pg_val;
|
||||||
|
|
||||||
*cycle_now = read_tsc();
|
switch (gtod->clock.vclock_mode) {
|
||||||
|
case VCLOCK_HVCLOCK:
|
||||||
|
tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
|
||||||
|
tsc_timestamp);
|
||||||
|
if (tsc_pg_val != U64_MAX) {
|
||||||
|
/* TSC page valid */
|
||||||
|
*mode = VCLOCK_HVCLOCK;
|
||||||
|
v = (tsc_pg_val - gtod->clock.cycle_last) &
|
||||||
|
gtod->clock.mask;
|
||||||
|
} else {
|
||||||
|
/* TSC page invalid */
|
||||||
|
*mode = VCLOCK_NONE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case VCLOCK_TSC:
|
||||||
|
*mode = VCLOCK_TSC;
|
||||||
|
*tsc_timestamp = read_tsc();
|
||||||
|
v = (*tsc_timestamp - gtod->clock.cycle_last) &
|
||||||
|
gtod->clock.mask;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
*mode = VCLOCK_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*mode == VCLOCK_NONE)
|
||||||
|
*tsc_timestamp = v = 0;
|
||||||
|
|
||||||
v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
|
|
||||||
return v * gtod->clock.mult;
|
return v * gtod->clock.mult;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_monotonic_boot(s64 *t, u64 *cycle_now)
|
static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
|
||||||
{
|
{
|
||||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||||
unsigned long seq;
|
unsigned long seq;
|
||||||
|
@ -1624,9 +1668,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
|
||||||
|
|
||||||
do {
|
do {
|
||||||
seq = read_seqcount_begin(>od->seq);
|
seq = read_seqcount_begin(>od->seq);
|
||||||
mode = gtod->clock.vclock_mode;
|
|
||||||
ns = gtod->nsec_base;
|
ns = gtod->nsec_base;
|
||||||
ns += vgettsc(cycle_now);
|
ns += vgettsc(tsc_timestamp, &mode);
|
||||||
ns >>= gtod->clock.shift;
|
ns >>= gtod->clock.shift;
|
||||||
ns += gtod->boot_ns;
|
ns += gtod->boot_ns;
|
||||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||||
|
@ -1635,7 +1678,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
|
||||||
return mode;
|
return mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_realtime(struct timespec *ts, u64 *cycle_now)
|
static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
|
||||||
{
|
{
|
||||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||||
unsigned long seq;
|
unsigned long seq;
|
||||||
|
@ -1644,10 +1687,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
|
||||||
|
|
||||||
do {
|
do {
|
||||||
seq = read_seqcount_begin(>od->seq);
|
seq = read_seqcount_begin(>od->seq);
|
||||||
mode = gtod->clock.vclock_mode;
|
|
||||||
ts->tv_sec = gtod->wall_time_sec;
|
ts->tv_sec = gtod->wall_time_sec;
|
||||||
ns = gtod->nsec_base;
|
ns = gtod->nsec_base;
|
||||||
ns += vgettsc(cycle_now);
|
ns += vgettsc(tsc_timestamp, &mode);
|
||||||
ns >>= gtod->clock.shift;
|
ns >>= gtod->clock.shift;
|
||||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||||
|
|
||||||
|
@ -1657,25 +1699,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
|
||||||
return mode;
|
return mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns true if host is using tsc clocksource */
|
/* returns true if host is using TSC based clocksource */
|
||||||
static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
|
static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
|
||||||
{
|
{
|
||||||
/* checked again under seqlock below */
|
/* checked again under seqlock below */
|
||||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
|
return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
|
||||||
|
tsc_timestamp));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns true if host is using tsc clocksource */
|
/* returns true if host is using TSC based clocksource */
|
||||||
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
|
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
|
||||||
u64 *cycle_now)
|
u64 *tsc_timestamp)
|
||||||
{
|
{
|
||||||
/* checked again under seqlock below */
|
/* checked again under seqlock below */
|
||||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return do_realtime(ts, cycle_now) == VCLOCK_TSC;
|
return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -2869,13 +2912,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
|
if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
|
||||||
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
|
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
|
||||||
rdtsc() - vcpu->arch.last_host_tsc;
|
rdtsc() - vcpu->arch.last_host_tsc;
|
||||||
if (tsc_delta < 0)
|
if (tsc_delta < 0)
|
||||||
mark_tsc_unstable("KVM discovered backwards TSC");
|
mark_tsc_unstable("KVM discovered backwards TSC");
|
||||||
|
|
||||||
if (check_tsc_unstable()) {
|
if (kvm_check_tsc_unstable()) {
|
||||||
u64 offset = kvm_compute_tsc_offset(vcpu,
|
u64 offset = kvm_compute_tsc_offset(vcpu,
|
||||||
vcpu->arch.last_guest_tsc);
|
vcpu->arch.last_guest_tsc);
|
||||||
kvm_vcpu_write_tsc_offset(vcpu, offset);
|
kvm_vcpu_write_tsc_offset(vcpu, offset);
|
||||||
|
@ -6110,9 +6153,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
|
||||||
update_pvclock_gtod(tk);
|
update_pvclock_gtod(tk);
|
||||||
|
|
||||||
/* disable master clock if host does not trust, or does not
|
/* disable master clock if host does not trust, or does not
|
||||||
* use, TSC clocksource
|
* use, TSC based clocksource.
|
||||||
*/
|
*/
|
||||||
if (gtod->clock.vclock_mode != VCLOCK_TSC &&
|
if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
|
||||||
atomic_read(&kvm_guest_has_master_clock) != 0)
|
atomic_read(&kvm_guest_has_master_clock) != 0)
|
||||||
queue_work(system_long_wq, &pvclock_gtod_work);
|
queue_work(system_long_wq, &pvclock_gtod_work);
|
||||||
|
|
||||||
|
@ -7767,7 +7810,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||||
{
|
{
|
||||||
struct kvm_vcpu *vcpu;
|
struct kvm_vcpu *vcpu;
|
||||||
|
|
||||||
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
|
if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
|
||||||
printk_once(KERN_WARNING
|
printk_once(KERN_WARNING
|
||||||
"kvm: SMP vm created on host with unstable TSC; "
|
"kvm: SMP vm created on host with unstable TSC; "
|
||||||
"guest TSC will not be reliable\n");
|
"guest TSC will not be reliable\n");
|
||||||
|
@ -7924,7 +7967,7 @@ int kvm_arch_hardware_enable(void)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
local_tsc = rdtsc();
|
local_tsc = rdtsc();
|
||||||
stable = !check_tsc_unstable();
|
stable = !kvm_check_tsc_unstable();
|
||||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
if (!stable && vcpu->cpu == smp_processor_id())
|
if (!stable && vcpu->cpu == smp_processor_id())
|
||||||
|
|
Loading…
Reference in New Issue