KVM: x86: enable dirty log gradually in small chunks
It could take kvm->mmu_lock for an extended period of time when enabling dirty log for the first time. The main cost is to clear all the D-bits of last level SPTEs. This situation can benefit from manual dirty log protect as well, which can reduce the mmu_lock time taken. The sequence is like this: 1. Initialize all the bits of the dirty bitmap to 1 when enabling dirty log for the first time 2. Only write protect the huge pages 3. KVM_GET_DIRTY_LOG returns the dirty bitmap info 4. KVM_CLEAR_DIRTY_LOG will clear D-bit for each of the leaf level SPTEs gradually in small chunks Under the Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz environment, I did some tests with a 128G windows VM and counted the time taken of memory_global_dirty_log_start, here is the numbers: VM Size Before After optimization 128G 460ms 10ms Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
0be4435207
commit
3c9bd4006b
|
@ -5707,8 +5707,13 @@ and injected exceptions.
|
|||
:Architectures: x86, arm, arm64, mips
|
||||
:Parameters: args[0] whether feature should be enabled or not
|
||||
|
||||
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
|
||||
clear and write-protect all pages that are returned as dirty.
|
||||
Valid flags are::
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
|
||||
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
|
||||
|
||||
With KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is set, KVM_GET_DIRTY_LOG will not
|
||||
automatically clear and write-protect all pages that are returned as dirty.
|
||||
Rather, userspace will have to do this operation separately using
|
||||
KVM_CLEAR_DIRTY_LOG.
|
||||
|
||||
|
@ -5719,12 +5724,19 @@ than requiring to sync a full memslot; this ensures that KVM does not
|
|||
take spinlocks for an extended period of time. Second, in some cases a
|
||||
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
|
||||
userspace actually using the data in the page. Pages can be modified
|
||||
during this time, which is inefficint for both the guest and userspace:
|
||||
during this time, which is inefficient for both the guest and userspace:
|
||||
the guest will incur a higher penalty due to write protection faults,
|
||||
while userspace can see false reports of dirty pages. Manual reprotection
|
||||
helps reducing this time, improving guest performance and reducing the
|
||||
number of dirty log false positives.
|
||||
|
||||
With KVM_DIRTY_LOG_INITIALLY_SET set, all the bits of the dirty bitmap
|
||||
will be initialized to 1 when created. This also improves performance because
|
||||
dirty logging can be enabled gradually in small chunks on the first call
|
||||
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
|
||||
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
|
||||
x86 for now).
|
||||
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
|
||||
it hard or impossible to use it correctly. The availability of
|
||||
|
|
|
@ -49,6 +49,9 @@
|
|||
|
||||
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
|
||||
/* x86-specific vcpu->requests bit members */
|
||||
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
|
||||
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
|
||||
|
@ -1306,7 +1309,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
|||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
struct kvm_memory_slot *memslot,
|
||||
int start_level);
|
||||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
|
|
|
@ -5864,13 +5864,14 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
|
|||
}
|
||||
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
struct kvm_memory_slot *memslot,
|
||||
int start_level)
|
||||
{
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
|
||||
false);
|
||||
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
|
||||
start_level, PT_MAX_HUGEPAGE_LEVEL, false);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
|
|
|
@ -7280,7 +7280,8 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
|||
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
||||
if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
||||
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
|
||||
}
|
||||
|
||||
|
|
|
@ -9916,7 +9916,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
|||
{
|
||||
/* Still write protect RO slot */
|
||||
if (new->flags & KVM_MEM_READONLY) {
|
||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
||||
kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -9951,10 +9951,23 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
|||
* See the comments in fast_page_fault().
|
||||
*/
|
||||
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (kvm_x86_ops->slot_enable_log_dirty)
|
||||
if (kvm_x86_ops->slot_enable_log_dirty) {
|
||||
kvm_x86_ops->slot_enable_log_dirty(kvm, new);
|
||||
else
|
||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
||||
} else {
|
||||
int level =
|
||||
kvm_dirty_log_manual_protect_and_init_set(kvm) ?
|
||||
PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
|
||||
|
||||
/*
|
||||
* If we're with initial-all-set, we don't need
|
||||
* to write protect any small page because
|
||||
* they're reported as dirty already. However
|
||||
* we still need to write-protect huge pages
|
||||
* so that the page split can happen lazily on
|
||||
* the first write to the huge page.
|
||||
*/
|
||||
kvm_mmu_slot_remove_write_access(kvm, new, level);
|
||||
}
|
||||
} else {
|
||||
if (kvm_x86_ops->slot_disable_log_dirty)
|
||||
kvm_x86_ops->slot_disable_log_dirty(kvm, new);
|
||||
|
|
|
@ -360,6 +360,10 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem
|
|||
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
|
||||
}
|
||||
|
||||
#ifndef KVM_DIRTY_LOG_MANUAL_CAPS
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
|
||||
#endif
|
||||
|
||||
struct kvm_s390_adapter_int {
|
||||
u64 ind_addr;
|
||||
u64 summary_addr;
|
||||
|
@ -493,7 +497,7 @@ struct kvm {
|
|||
#endif
|
||||
long tlbs_dirty;
|
||||
struct list_head devices;
|
||||
bool manual_dirty_log_protect;
|
||||
u64 manual_dirty_log_protect;
|
||||
struct dentry *debugfs_dentry;
|
||||
struct kvm_stat_data **debugfs_stat_data;
|
||||
struct srcu_struct srcu;
|
||||
|
@ -527,6 +531,11 @@ struct kvm {
|
|||
#define vcpu_err(vcpu, fmt, ...) \
|
||||
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
|
||||
|
||||
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
|
||||
{
|
||||
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
|
||||
}
|
||||
|
||||
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
|
||||
{
|
||||
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
|
||||
|
|
|
@ -1628,4 +1628,7 @@ struct kvm_hyperv_eventfd {
|
|||
#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
|
||||
#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
|
||||
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
|
|
@ -858,7 +858,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
|||
* Allocation size is twice as large as the actual dirty bitmap size.
|
||||
* See kvm_vm_ioctl_get_dirty_log() why this is needed.
|
||||
*/
|
||||
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
|
@ -1288,9 +1288,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
|
||||
new.dirty_bitmap = NULL;
|
||||
else if (!new.dirty_bitmap) {
|
||||
r = kvm_create_dirty_bitmap(&new);
|
||||
r = kvm_alloc_dirty_bitmap(&new);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
bitmap_set(new.dirty_bitmap, 0, new.npages);
|
||||
}
|
||||
|
||||
r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
|
||||
|
@ -3529,9 +3532,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
|||
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
||||
case KVM_CAP_CHECK_EXTENSION_VM:
|
||||
case KVM_CAP_ENABLE_CAP_VM:
|
||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
||||
#endif
|
||||
return 1;
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
|
@ -3539,6 +3539,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
|||
case KVM_CAP_COALESCED_PIO:
|
||||
return 1;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
||||
return KVM_DIRTY_LOG_MANUAL_CAPS;
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
||||
case KVM_CAP_IRQ_ROUTING:
|
||||
return KVM_MAX_IRQ_ROUTES;
|
||||
|
@ -3566,11 +3570,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
|||
{
|
||||
switch (cap->cap) {
|
||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
|
||||
if (cap->flags || (cap->args[0] & ~1))
|
||||
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: {
|
||||
u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE;
|
||||
|
||||
if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE)
|
||||
allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS;
|
||||
|
||||
if (cap->flags || (cap->args[0] & ~allowed_options))
|
||||
return -EINVAL;
|
||||
kvm->manual_dirty_log_protect = cap->args[0];
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
||||
|
|
Loading…
Reference in New Issue