kvm: vmx: hook preemption timer support
Hook the VMX preemption timer to the "hv timer" functionality added by the previous patch. This includes: checking if the feature is supported, if the feature is broken on the CPU, the hooks to setup/clean the VMX preemption timer, arming the timer on vmentry and handling the vmexit. A module parameter states if the VMX preemption timer should be utilized. Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com> [Move hv_deadline_tsc to struct vcpu_vmx, use -1 as the "unset" value. Put all VMX bits here. Enable it by default #yolo. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
bc22512bb2
commit
64672c95ea
|
@ -1079,6 +1079,8 @@ extern u32 kvm_max_guest_tsc_khz;
|
|||
extern u8 kvm_tsc_scaling_ratio_frac_bits;
|
||||
/* maximum allowed value of TSC scaling ratio */
|
||||
extern u64 kvm_max_tsc_scaling_ratio;
|
||||
/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
|
||||
extern u64 kvm_default_tsc_scaling_ratio;
|
||||
|
||||
enum emulation_result {
|
||||
EMULATE_DONE, /* no further processing */
|
||||
|
|
|
@ -110,6 +110,13 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
|
|||
|
||||
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
|
||||
|
||||
/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
|
||||
static int __read_mostly cpu_preemption_timer_multi;
|
||||
static bool __read_mostly enable_preemption_timer = 1;
|
||||
#ifdef CONFIG_X86_64
|
||||
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
|
||||
#endif
|
||||
|
||||
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
|
||||
#define KVM_VM_CR0_ALWAYS_ON \
|
||||
|
@ -597,6 +604,9 @@ struct vcpu_vmx {
|
|||
#define PML_ENTITY_NUM 512
|
||||
struct page *pml_pg;
|
||||
|
||||
/* apic deadline value in host tsc */
|
||||
u64 hv_deadline_tsc;
|
||||
|
||||
u64 current_tsc_ratio;
|
||||
|
||||
bool guest_pkru_valid;
|
||||
|
@ -1056,6 +1066,61 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
|
|||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Comment's format: document - errata name - stepping - processor name.
|
||||
* Refer from
|
||||
* https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
|
||||
*/
|
||||
static u32 vmx_preemption_cpu_tfms[] = {
|
||||
/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
|
||||
0x000206E6,
|
||||
/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
|
||||
/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
|
||||
/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
|
||||
0x00020652,
|
||||
/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
|
||||
0x00020655,
|
||||
/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
|
||||
/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
|
||||
/*
|
||||
* 320767.pdf - AAP86 - B1 -
|
||||
* i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
|
||||
*/
|
||||
0x000106E5,
|
||||
/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
|
||||
0x000106A0,
|
||||
/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
|
||||
0x000106A1,
|
||||
/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
|
||||
0x000106A4,
|
||||
/* 321333.pdf - AAM126 - D0 - Xeon 3500 */
|
||||
/* 321324.pdf - AAK139 - D0 - Xeon 5500 */
|
||||
/* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
|
||||
0x000106A5,
|
||||
};
|
||||
|
||||
static inline bool cpu_has_broken_vmx_preemption_timer(void)
|
||||
{
|
||||
u32 eax = cpuid_eax(0x00000001), i;
|
||||
|
||||
/* Clear the reserved bits */
|
||||
eax &= ~(0x3U << 14 | 0xfU << 28);
|
||||
for (i = 0; i < sizeof(vmx_preemption_cpu_tfms)/sizeof(u32); i++)
|
||||
if (eax == vmx_preemption_cpu_tfms[i])
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_preemption_timer(void)
|
||||
{
|
||||
if (cpu_has_broken_vmx_preemption_timer())
|
||||
return false;
|
||||
|
||||
return vmcs_config.pin_based_exec_ctrl &
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_posted_intr(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
|
||||
|
@ -3308,7 +3373,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
return -EIO;
|
||||
|
||||
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
|
||||
opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
|
||||
opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
||||
&_pin_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
|
@ -4781,6 +4847,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
|||
|
||||
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
|
||||
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
|
||||
/* Enable the preemption timer dynamically */
|
||||
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
return pin_based_exec_ctrl;
|
||||
}
|
||||
|
||||
|
@ -4899,6 +4967,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
|
||||
/* Control */
|
||||
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
|
||||
|
||||
|
@ -6389,6 +6458,17 @@ static __init int hardware_setup(void)
|
|||
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
||||
}
|
||||
|
||||
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
|
||||
u64 vmx_msr;
|
||||
|
||||
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
||||
cpu_preemption_timer_multi =
|
||||
vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
|
||||
} else {
|
||||
kvm_x86_ops->set_hv_timer = NULL;
|
||||
kvm_x86_ops->cancel_hv_timer = NULL;
|
||||
}
|
||||
|
||||
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
|
||||
|
||||
return alloc_kvm_area();
|
||||
|
@ -7564,6 +7644,12 @@ static int handle_pcommit(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_lapic_expired_hv_timer(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
|
@ -7615,6 +7701,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|||
[EXIT_REASON_XRSTORS] = handle_xrstors,
|
||||
[EXIT_REASON_PML_FULL] = handle_pml_full,
|
||||
[EXIT_REASON_PCOMMIT] = handle_pcommit,
|
||||
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
|
@ -8623,6 +8710,26 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
|||
msrs[i].host);
|
||||
}
|
||||
|
||||
void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u64 tscl;
|
||||
u32 delta_tsc;
|
||||
|
||||
if (vmx->hv_deadline_tsc == -1)
|
||||
return;
|
||||
|
||||
tscl = rdtsc();
|
||||
if (vmx->hv_deadline_tsc > tscl)
|
||||
/* sure to be 32 bit only because checked on set_hv_timer */
|
||||
delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
|
||||
cpu_preemption_timer_multi);
|
||||
else
|
||||
delta_tsc = 0;
|
||||
|
||||
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
|
||||
}
|
||||
|
||||
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -8672,6 +8779,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
atomic_switch_perf_msrs(vmx);
|
||||
debugctlmsr = get_debugctlmsr();
|
||||
|
||||
vmx_arm_hv_timer(vcpu);
|
||||
|
||||
vmx->__launched = vmx->loaded_vmcs->launched;
|
||||
asm(
|
||||
/* Store host registers */
|
||||
|
@ -10662,6 +10771,64 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
|
|||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
|
||||
static inline int u64_shl_div_u64(u64 a, unsigned int shift,
|
||||
u64 divisor, u64 *result)
|
||||
{
|
||||
u64 low = a << shift, high = a >> (64 - shift);
|
||||
|
||||
/* To avoid the overflow on divq */
|
||||
if (high >= divisor)
|
||||
return 1;
|
||||
|
||||
/* Low hold the result, high hold rem which is discarded */
|
||||
asm("divq %2\n\t" : "=a" (low), "=d" (high) :
|
||||
"rm" (divisor), "0" (low), "1" (high));
|
||||
*result = low;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u64 tscl = rdtsc(), delta_tsc;
|
||||
|
||||
delta_tsc = guest_deadline_tsc - kvm_read_l1_tsc(vcpu, tscl);
|
||||
|
||||
/* Convert to host delta tsc if tsc scaling is enabled */
|
||||
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
|
||||
u64_shl_div_u64(delta_tsc,
|
||||
kvm_tsc_scaling_ratio_frac_bits,
|
||||
vcpu->arch.tsc_scaling_ratio,
|
||||
&delta_tsc))
|
||||
return -ERANGE;
|
||||
|
||||
/*
|
||||
* If the delta tsc can't fit in the 32 bit after the multi shift,
|
||||
* we can't use the preemption timer.
|
||||
* It's possible that it fits on later vmentries, but checking
|
||||
* on every vmentry is costly so we just use an hrtimer.
|
||||
*/
|
||||
if (delta_tsc >> (cpu_preemption_timer_multi + 32))
|
||||
return -ERANGE;
|
||||
|
||||
vmx->hv_deadline_tsc = tscl + delta_tsc;
|
||||
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
if (ple_gap)
|
||||
|
@ -10777,6 +10944,9 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
|
|||
if (pi_pre_block(vcpu))
|
||||
return 1;
|
||||
|
||||
if (kvm_lapic_hv_timer_in_use(vcpu))
|
||||
kvm_lapic_switch_to_sw_timer(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -10823,6 +10993,9 @@ static void pi_post_block(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void vmx_post_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_x86_ops->set_hv_timer)
|
||||
kvm_lapic_switch_to_hv_timer(vcpu);
|
||||
|
||||
pi_post_block(vcpu);
|
||||
}
|
||||
|
||||
|
@ -11038,6 +11211,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||
.pmu_ops = &intel_pmu_ops,
|
||||
|
||||
.update_pi_irte = vmx_update_pi_irte,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.set_hv_timer = vmx_set_hv_timer,
|
||||
.cancel_hv_timer = vmx_cancel_hv_timer,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
|
|
|
@ -114,7 +114,8 @@ u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
|
|||
EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
|
||||
u64 __read_mostly kvm_max_tsc_scaling_ratio;
|
||||
EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
|
||||
static u64 __read_mostly kvm_default_tsc_scaling_ratio;
|
||||
u64 __read_mostly kvm_default_tsc_scaling_ratio;
|
||||
EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
|
||||
|
||||
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
|
||||
static u32 __read_mostly tsc_tolerance_ppm = 250;
|
||||
|
|
Loading…
Reference in New Issue