diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index abd7c32126ce..32427ea160df 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8019,8 +8019,8 @@ guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf (0x40000001). Otherwise, a guest may use the paravirtual features regardless of what has actually been exposed through the CPUID leaf. -8.29 KVM_CAP_DIRTY_LOG_RING ---------------------------- +8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL +---------------------------------------------------------- :Architectures: x86 :Parameters: args[0] - size of the dirty log ring @@ -8078,6 +8078,11 @@ on to the next GFN. The userspace should continue to do this until the flags of a GFN have the DIRTY bit cleared, meaning that it has harvested all the dirty GFNs that were available. +Note that on weakly ordered architectures, userspace accesses to the +ring buffer (and more specifically the 'flags' field) must be ordered, +using load-acquire/store-release accessors when available, or any +other memory barrier that will ensure this ordering. + It's not necessary for userspace to harvest the all dirty GFNs at once. However it must collect the dirty GFNs in sequence, i.e., the userspace program cannot skip one dirty GFN to collect the one next to it. @@ -8106,6 +8111,14 @@ KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual machine will switch to ring-buffer dirty page tracking and further KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. +NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that +should be exposed by weakly ordered architecture, in order to indicate +the additional memory ordering requirements imposed on userspace when +reading the state of an entry and mutating it from DIRTY to HARVESTED. +Architecture with TSO-like ordering (such as x86) are allowed to +expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL +to userspace. + 8.30 KVM_CAP_XEN_HVM -------------------- diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index e3cbd7706136..67be7f217e37 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,7 +28,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE select HAVE_KVM_IRQFD - select HAVE_KVM_DIRTY_RING + select HAVE_KVM_DIRTY_RING_TSO + select HAVE_KVM_DIRTY_RING_ACQ_REL select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..0d5d4419139a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1177,6 +1177,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 +#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 9c883c94d478..b5234d6efbe1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "kvm_util.h" #include "test_util.h" @@ -264,7 +265,8 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) static bool dirty_ring_supported(void) { - return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING); + return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) || + kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL)); } static void dirty_ring_create_vm_done(struct kvm_vm *vm) @@ -279,12 +281,12 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm) static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) { - return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; + return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY; } static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) { - gfn->flags = KVM_DIRTY_GFN_F_RESET; + smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET); } static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9889fe0d8919..411a4c0bc81c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -82,7 +82,10 @@ unsigned int kvm_check_cap(long cap) void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) { - vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); + if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); + else + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); vm->dirty_ring_size = ring_size; } diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index a8c5c9f06b3c..800f9470e36b 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -19,6 +19,20 @@ config HAVE_KVM_IRQ_ROUTING config HAVE_KVM_DIRTY_RING bool +# Only strongly ordered architectures can select this, as it doesn't +# put any explicit constraint on userspace ordering. They can also +# select the _ACQ_REL version. +config HAVE_KVM_DIRTY_RING_TSO + bool + select HAVE_KVM_DIRTY_RING + depends on X86 + +# Weakly ordered architectures can only select this, advertising +# to userspace the additional ordering requirements. +config HAVE_KVM_DIRTY_RING_ACQ_REL + bool + select HAVE_KVM_DIRTY_RING + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index f4c2a6eb1666..d6fabf238032 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -74,7 +74,7 @@ int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) { - gfn->flags = 0; + smp_store_release(&gfn->flags, 0); } static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) @@ -84,7 +84,7 @@ static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) { - return gfn->flags & KVM_DIRTY_GFN_F_RESET; + return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET; } int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 584a5bab3af3..5b064dbadaf4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4475,7 +4475,13 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_NR_MEMSLOTS: return KVM_USER_MEM_SLOTS; case KVM_CAP_DIRTY_LOG_RING: -#ifdef CONFIG_HAVE_KVM_DIRTY_RING +#ifdef CONFIG_HAVE_KVM_DIRTY_RING_TSO + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); +#else + return 0; +#endif + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: +#ifdef CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); #else return 0; @@ -4580,6 +4586,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return 0; } case KVM_CAP_DIRTY_LOG_RING: + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap);