KVM: VMX: Add PML support in VMX
This patch adds PML support in VMX. A new module parameter 'enable_pml' is added to allow user to enable/disable it manually. Signed-off-by: Kai Huang <kai.huang@linux.intel.com> Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
88178fd4f7
commit
843e433057
|
@ -69,6 +69,7 @@
|
|||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
#define SECONDARY_EXEC_XSAVES 0x00100000
|
||||
|
||||
|
||||
|
@ -121,6 +122,7 @@ enum vmcs_field {
|
|||
GUEST_LDTR_SELECTOR = 0x0000080c,
|
||||
GUEST_TR_SELECTOR = 0x0000080e,
|
||||
GUEST_INTR_STATUS = 0x00000810,
|
||||
GUEST_PML_INDEX = 0x00000812,
|
||||
HOST_ES_SELECTOR = 0x00000c00,
|
||||
HOST_CS_SELECTOR = 0x00000c02,
|
||||
HOST_SS_SELECTOR = 0x00000c04,
|
||||
|
@ -140,6 +142,8 @@ enum vmcs_field {
|
|||
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
|
||||
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
|
||||
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
|
||||
PML_ADDRESS = 0x0000200e,
|
||||
PML_ADDRESS_HIGH = 0x0000200f,
|
||||
TSC_OFFSET = 0x00002010,
|
||||
TSC_OFFSET_HIGH = 0x00002011,
|
||||
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
|
||||
|
|
|
@ -73,6 +73,7 @@
|
|||
#define EXIT_REASON_XSETBV 55
|
||||
#define EXIT_REASON_APIC_WRITE 56
|
||||
#define EXIT_REASON_INVPCID 58
|
||||
#define EXIT_REASON_PML_FULL 62
|
||||
#define EXIT_REASON_XSAVES 63
|
||||
#define EXIT_REASON_XRSTORS 64
|
||||
|
||||
|
|
|
@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
|
|||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* Tracepoint for PML full VMEXIT.
|
||||
*/
|
||||
TRACE_EVENT(kvm_pml_full,
|
||||
TP_PROTO(unsigned int vcpu_id),
|
||||
TP_ARGS(vcpu_id),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_ple_window,
|
||||
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
|
||||
TP_ARGS(grow, vcpu_id, new, old),
|
||||
|
|
|
@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
|
|||
|
||||
static u64 __read_mostly host_xss;
|
||||
|
||||
static bool __read_mostly enable_pml = 1;
|
||||
module_param_named(pml, enable_pml, bool, S_IRUGO);
|
||||
|
||||
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
|
||||
#define KVM_VM_CR0_ALWAYS_ON \
|
||||
|
@ -516,6 +519,10 @@ struct vcpu_vmx {
|
|||
/* Dynamic PLE window. */
|
||||
int ple_window;
|
||||
bool ple_window_dirty;
|
||||
|
||||
/* Support for PML */
|
||||
#define PML_ENTITY_NUM 512
|
||||
struct page *pml_pg;
|
||||
};
|
||||
|
||||
enum segment_cache_field {
|
||||
|
@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
|
|||
SECONDARY_EXEC_SHADOW_VMCS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_pml(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
|
||||
}
|
||||
|
||||
static inline bool report_flexpriority(void)
|
||||
{
|
||||
return flexpriority_enabled;
|
||||
|
@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||||
SECONDARY_EXEC_SHADOW_VMCS |
|
||||
SECONDARY_EXEC_XSAVES;
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
SECONDARY_EXEC_ENABLE_PML;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
&_cpu_based_2nd_exec_control) < 0)
|
||||
|
@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
|||
a current VMCS12
|
||||
*/
|
||||
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
|
||||
/* PML is enabled/disabled in creating/destorying vcpu */
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
|
||||
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
|
@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
|
|||
|
||||
update_ple_window_actual_max();
|
||||
|
||||
/*
|
||||
* Only enable PML when hardware supports PML feature, and both EPT
|
||||
* and EPT A/D bit features are enabled -- PML depends on them to work.
|
||||
*/
|
||||
if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
|
||||
enable_pml = 0;
|
||||
|
||||
if (!enable_pml) {
|
||||
kvm_x86_ops->slot_enable_log_dirty = NULL;
|
||||
kvm_x86_ops->slot_disable_log_dirty = NULL;
|
||||
kvm_x86_ops->flush_log_dirty = NULL;
|
||||
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
||||
}
|
||||
|
||||
return alloc_kvm_area();
|
||||
|
||||
out7:
|
||||
|
@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
|
|||
return pi_test_pir(vector, &vmx->pi_desc);
|
||||
}
|
||||
|
||||
static int handle_pml_full(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qualification;
|
||||
|
||||
trace_kvm_pml_full(vcpu->vcpu_id);
|
||||
|
||||
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
|
||||
/*
|
||||
* PML buffer FULL happened while executing iret from NMI,
|
||||
* "blocked by NMI" bit has to be set before next VM entry.
|
||||
*/
|
||||
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
cpu_has_virtual_nmis() &&
|
||||
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||||
GUEST_INTR_STATE_NMI);
|
||||
|
||||
/*
|
||||
* PML buffer already flushed at beginning of VMEXIT. Nothing to do
|
||||
* here.., and there's no userspace involvement needed for PML.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
|
@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|||
[EXIT_REASON_INVVPID] = handle_invvpid,
|
||||
[EXIT_REASON_XSAVES] = handle_xsaves,
|
||||
[EXIT_REASON_XRSTORS] = handle_xrstors,
|
||||
[EXIT_REASON_PML_FULL] = handle_pml_full,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
|
@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
|
|||
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
}
|
||||
|
||||
static int vmx_enable_pml(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct page *pml_pg;
|
||||
u32 exec_control;
|
||||
|
||||
pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!pml_pg)
|
||||
return -ENOMEM;
|
||||
|
||||
vmx->pml_pg = pml_pg;
|
||||
|
||||
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
|
||||
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
||||
|
||||
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
exec_control |= SECONDARY_EXEC_ENABLE_PML;
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vmx_disable_pml(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exec_control;
|
||||
|
||||
ASSERT(vmx->pml_pg);
|
||||
__free_page(vmx->pml_pg);
|
||||
vmx->pml_pg = NULL;
|
||||
|
||||
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
}
|
||||
|
||||
static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct kvm *kvm = vmx->vcpu.kvm;
|
||||
u64 *pml_buf;
|
||||
u16 pml_idx;
|
||||
|
||||
pml_idx = vmcs_read16(GUEST_PML_INDEX);
|
||||
|
||||
/* Do nothing if PML buffer is empty */
|
||||
if (pml_idx == (PML_ENTITY_NUM - 1))
|
||||
return;
|
||||
|
||||
/* PML index always points to next available PML buffer entity */
|
||||
if (pml_idx >= PML_ENTITY_NUM)
|
||||
pml_idx = 0;
|
||||
else
|
||||
pml_idx++;
|
||||
|
||||
pml_buf = page_address(vmx->pml_pg);
|
||||
for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
|
||||
u64 gpa;
|
||||
|
||||
gpa = pml_buf[pml_idx];
|
||||
WARN_ON(gpa & (PAGE_SIZE - 1));
|
||||
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/* reset PML index */
|
||||
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
|
||||
* Called before reporting dirty_bitmap to userspace.
|
||||
*/
|
||||
static void kvm_flush_pml_buffers(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
/*
|
||||
* We only need to kick vcpu out of guest mode here, as PML buffer
|
||||
* is flushed at beginning of all VMEXITs, and it's obvious that only
|
||||
* vcpus running in guest are possible to have unflushed GPAs in PML
|
||||
* buffer.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* The guest has exited. See if we can fix it or if we need userspace
|
||||
* assistance.
|
||||
|
@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|||
u32 exit_reason = vmx->exit_reason;
|
||||
u32 vectoring_info = vmx->idt_vectoring_info;
|
||||
|
||||
/*
|
||||
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
|
||||
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
|
||||
* querying dirty_bitmap, we only need to kick all vcpus out of guest
|
||||
* mode as if vcpus is in root mode, the PML buffer must has been
|
||||
* flushed already.
|
||||
*/
|
||||
if (enable_pml)
|
||||
vmx_flush_pml_buffer(vmx);
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
|
@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (enable_pml)
|
||||
vmx_disable_pml(vmx);
|
||||
free_vpid(vmx);
|
||||
leave_guest_mode(vcpu);
|
||||
vmx_load_vmcs01(vcpu);
|
||||
|
@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
vmx->nested.current_vmptr = -1ull;
|
||||
vmx->nested.current_vmcs12 = NULL;
|
||||
|
||||
/*
|
||||
* If PML is turned on, failure on enabling PML just results in failure
|
||||
* of creating the vcpu, therefore we can simplify PML logic (by
|
||||
* avoiding dealing with cases, such as enabling PML partially on vcpus
|
||||
* for the guest, etc.
|
||||
*/
|
||||
if (enable_pml) {
|
||||
err = vmx_enable_pml(vmx);
|
||||
if (err)
|
||||
goto free_vmcs;
|
||||
}
|
||||
|
||||
return &vmx->vcpu;
|
||||
|
||||
free_vmcs:
|
||||
|
@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
|||
shrink_ple_window(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
||||
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
|
||||
}
|
||||
|
||||
static void vmx_slot_disable_log_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_slot_set_dirty(kvm, slot);
|
||||
}
|
||||
|
||||
static void vmx_flush_log_dirty(struct kvm *kvm)
|
||||
{
|
||||
kvm_flush_pml_buffers(kvm);
|
||||
}
|
||||
|
||||
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
gfn_t offset, unsigned long mask)
|
||||
{
|
||||
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.cpu_has_kvm_support = cpu_has_kvm_support,
|
||||
.disabled_by_bios = vmx_disabled_by_bios,
|
||||
|
@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||
.check_nested_events = vmx_check_nested_events,
|
||||
|
||||
.sched_in = vmx_sched_in,
|
||||
|
||||
.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
|
||||
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
|
||||
.flush_log_dirty = vmx_flush_log_dirty,
|
||||
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
|
|
|
@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
|
|||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||
|
|
Loading…
Reference in New Issue