KVM: x86: enable dirty log gradually in small chunks
It could take kvm->mmu_lock for an extended period of time when enabling dirty log for the first time. The main cost is to clear all the D-bits of last level SPTEs. This situation can benefit from manual dirty log protect as well, which can reduce the mmu_lock time taken. The sequence is like this: 1. Initialize all the bits of the dirty bitmap to 1 when enabling dirty log for the first time 2. Only write protect the huge pages 3. KVM_GET_DIRTY_LOG returns the dirty bitmap info 4. KVM_CLEAR_DIRTY_LOG will clear D-bit for each of the leaf level SPTEs gradually in small chunks Under the Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz environment, I did some tests with a 128G windows VM and counted the time taken of memory_global_dirty_log_start, here is the numbers: VM Size Before After optimization 128G 460ms 10ms Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
0be4435207
commit
3c9bd4006b
|
@ -5707,8 +5707,13 @@ and injected exceptions.
|
||||||
:Architectures: x86, arm, arm64, mips
|
:Architectures: x86, arm, arm64, mips
|
||||||
:Parameters: args[0] whether feature should be enabled or not
|
:Parameters: args[0] whether feature should be enabled or not
|
||||||
|
|
||||||
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
|
Valid flags are::
|
||||||
clear and write-protect all pages that are returned as dirty.
|
|
||||||
|
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
|
||||||
|
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
|
||||||
|
|
||||||
|
With KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is set, KVM_GET_DIRTY_LOG will not
|
||||||
|
automatically clear and write-protect all pages that are returned as dirty.
|
||||||
Rather, userspace will have to do this operation separately using
|
Rather, userspace will have to do this operation separately using
|
||||||
KVM_CLEAR_DIRTY_LOG.
|
KVM_CLEAR_DIRTY_LOG.
|
||||||
|
|
||||||
|
@ -5719,12 +5724,19 @@ than requiring to sync a full memslot; this ensures that KVM does not
|
||||||
take spinlocks for an extended period of time. Second, in some cases a
|
take spinlocks for an extended period of time. Second, in some cases a
|
||||||
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
|
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
|
||||||
userspace actually using the data in the page. Pages can be modified
|
userspace actually using the data in the page. Pages can be modified
|
||||||
during this time, which is inefficint for both the guest and userspace:
|
during this time, which is inefficient for both the guest and userspace:
|
||||||
the guest will incur a higher penalty due to write protection faults,
|
the guest will incur a higher penalty due to write protection faults,
|
||||||
while userspace can see false reports of dirty pages. Manual reprotection
|
while userspace can see false reports of dirty pages. Manual reprotection
|
||||||
helps reducing this time, improving guest performance and reducing the
|
helps reducing this time, improving guest performance and reducing the
|
||||||
number of dirty log false positives.
|
number of dirty log false positives.
|
||||||
|
|
||||||
|
With KVM_DIRTY_LOG_INITIALLY_SET set, all the bits of the dirty bitmap
|
||||||
|
will be initialized to 1 when created. This also improves performance because
|
||||||
|
dirty logging can be enabled gradually in small chunks on the first call
|
||||||
|
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
|
||||||
|
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
|
||||||
|
x86 for now).
|
||||||
|
|
||||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
|
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
|
||||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
|
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
|
||||||
it hard or impossible to use it correctly. The availability of
|
it hard or impossible to use it correctly. The availability of
|
||||||
|
|
|
@ -49,6 +49,9 @@
|
||||||
|
|
||||||
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
|
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
|
||||||
|
|
||||||
|
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||||
|
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||||
|
|
||||||
/* x86-specific vcpu->requests bit members */
|
/* x86-specific vcpu->requests bit members */
|
||||||
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
|
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
|
||||||
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
|
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
|
||||||
|
@ -1306,7 +1309,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||||
|
|
||||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *memslot);
|
struct kvm_memory_slot *memslot,
|
||||||
|
int start_level);
|
||||||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||||
const struct kvm_memory_slot *memslot);
|
const struct kvm_memory_slot *memslot);
|
||||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||||
|
|
|
@ -5864,13 +5864,14 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *memslot)
|
struct kvm_memory_slot *memslot,
|
||||||
|
int start_level)
|
||||||
{
|
{
|
||||||
bool flush;
|
bool flush;
|
||||||
|
|
||||||
spin_lock(&kvm->mmu_lock);
|
spin_lock(&kvm->mmu_lock);
|
||||||
flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
|
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
|
||||||
false);
|
start_level, PT_MAX_HUGEPAGE_LEVEL, false);
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -7280,7 +7280,8 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||||
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
|
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot)
|
struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||||
|
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
||||||
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
|
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9916,7 +9916,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||||
{
|
{
|
||||||
/* Still write protect RO slot */
|
/* Still write protect RO slot */
|
||||||
if (new->flags & KVM_MEM_READONLY) {
|
if (new->flags & KVM_MEM_READONLY) {
|
||||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9951,10 +9951,23 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||||
* See the comments in fast_page_fault().
|
* See the comments in fast_page_fault().
|
||||||
*/
|
*/
|
||||||
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||||
if (kvm_x86_ops->slot_enable_log_dirty)
|
if (kvm_x86_ops->slot_enable_log_dirty) {
|
||||||
kvm_x86_ops->slot_enable_log_dirty(kvm, new);
|
kvm_x86_ops->slot_enable_log_dirty(kvm, new);
|
||||||
else
|
} else {
|
||||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
int level =
|
||||||
|
kvm_dirty_log_manual_protect_and_init_set(kvm) ?
|
||||||
|
PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're with initial-all-set, we don't need
|
||||||
|
* to write protect any small page because
|
||||||
|
* they're reported as dirty already. However
|
||||||
|
* we still need to write-protect huge pages
|
||||||
|
* so that the page split can happen lazily on
|
||||||
|
* the first write to the huge page.
|
||||||
|
*/
|
||||||
|
kvm_mmu_slot_remove_write_access(kvm, new, level);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (kvm_x86_ops->slot_disable_log_dirty)
|
if (kvm_x86_ops->slot_disable_log_dirty)
|
||||||
kvm_x86_ops->slot_disable_log_dirty(kvm, new);
|
kvm_x86_ops->slot_disable_log_dirty(kvm, new);
|
||||||
|
|
|
@ -360,6 +360,10 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem
|
||||||
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
|
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef KVM_DIRTY_LOG_MANUAL_CAPS
|
||||||
|
#define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
struct kvm_s390_adapter_int {
|
struct kvm_s390_adapter_int {
|
||||||
u64 ind_addr;
|
u64 ind_addr;
|
||||||
u64 summary_addr;
|
u64 summary_addr;
|
||||||
|
@ -493,7 +497,7 @@ struct kvm {
|
||||||
#endif
|
#endif
|
||||||
long tlbs_dirty;
|
long tlbs_dirty;
|
||||||
struct list_head devices;
|
struct list_head devices;
|
||||||
bool manual_dirty_log_protect;
|
u64 manual_dirty_log_protect;
|
||||||
struct dentry *debugfs_dentry;
|
struct dentry *debugfs_dentry;
|
||||||
struct kvm_stat_data **debugfs_stat_data;
|
struct kvm_stat_data **debugfs_stat_data;
|
||||||
struct srcu_struct srcu;
|
struct srcu_struct srcu;
|
||||||
|
@ -527,6 +531,11 @@ struct kvm {
|
||||||
#define vcpu_err(vcpu, fmt, ...) \
|
#define vcpu_err(vcpu, fmt, ...) \
|
||||||
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
|
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
|
||||||
|
|
||||||
|
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
|
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
|
||||||
{
|
{
|
||||||
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
|
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
|
||||||
|
|
|
@ -1628,4 +1628,7 @@ struct kvm_hyperv_eventfd {
|
||||||
#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
|
#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
|
||||||
#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
|
#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
|
||||||
|
|
||||||
|
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
|
||||||
|
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
|
||||||
|
|
||||||
#endif /* __LINUX_KVM_H */
|
#endif /* __LINUX_KVM_H */
|
||||||
|
|
|
@ -858,7 +858,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
||||||
* Allocation size is twice as large as the actual dirty bitmap size.
|
* Allocation size is twice as large as the actual dirty bitmap size.
|
||||||
* See kvm_vm_ioctl_get_dirty_log() why this is needed.
|
* See kvm_vm_ioctl_get_dirty_log() why this is needed.
|
||||||
*/
|
*/
|
||||||
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||||
{
|
{
|
||||||
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
|
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
|
||||||
|
|
||||||
|
@ -1288,9 +1288,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||||
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
|
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
|
||||||
new.dirty_bitmap = NULL;
|
new.dirty_bitmap = NULL;
|
||||||
else if (!new.dirty_bitmap) {
|
else if (!new.dirty_bitmap) {
|
||||||
r = kvm_create_dirty_bitmap(&new);
|
r = kvm_alloc_dirty_bitmap(&new);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||||
|
bitmap_set(new.dirty_bitmap, 0, new.npages);
|
||||||
}
|
}
|
||||||
|
|
||||||
r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
|
r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
|
||||||
|
@ -3529,9 +3532,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||||
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
||||||
case KVM_CAP_CHECK_EXTENSION_VM:
|
case KVM_CAP_CHECK_EXTENSION_VM:
|
||||||
case KVM_CAP_ENABLE_CAP_VM:
|
case KVM_CAP_ENABLE_CAP_VM:
|
||||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
|
||||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
|
||||||
#endif
|
|
||||||
return 1;
|
return 1;
|
||||||
#ifdef CONFIG_KVM_MMIO
|
#ifdef CONFIG_KVM_MMIO
|
||||||
case KVM_CAP_COALESCED_MMIO:
|
case KVM_CAP_COALESCED_MMIO:
|
||||||
|
@ -3539,6 +3539,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||||
case KVM_CAP_COALESCED_PIO:
|
case KVM_CAP_COALESCED_PIO:
|
||||||
return 1;
|
return 1;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
|
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
||||||
|
return KVM_DIRTY_LOG_MANUAL_CAPS;
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
||||||
case KVM_CAP_IRQ_ROUTING:
|
case KVM_CAP_IRQ_ROUTING:
|
||||||
return KVM_MAX_IRQ_ROUTES;
|
return KVM_MAX_IRQ_ROUTES;
|
||||||
|
@ -3566,11 +3570,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||||
{
|
{
|
||||||
switch (cap->cap) {
|
switch (cap->cap) {
|
||||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: {
|
||||||
if (cap->flags || (cap->args[0] & ~1))
|
u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE;
|
||||||
|
|
||||||
|
if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE)
|
||||||
|
allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS;
|
||||||
|
|
||||||
|
if (cap->flags || (cap->args[0] & ~allowed_options))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
kvm->manual_dirty_log_protect = cap->args[0];
|
kvm->manual_dirty_log_protect = cap->args[0];
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
||||||
|
|
Loading…
Reference in New Issue