From 1365039d0cb32c0cf96eb9f750f4277c9a90f87d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 4 Nov 2014 08:31:16 +0100 Subject: [PATCH 1/4] KVM: s390: Fix ipte locking ipte_unlock_siif uses cmpxchg to replace the in-memory data of the ipte lock together with ACCESS_ONCE for the intial read. union ipte_control { unsigned long val; struct { unsigned long k : 1; unsigned long kh : 31; unsigned long kg : 32; }; }; [...] static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; ic = &vcpu->kvm->arch.sca->ipte_control; do { new = old = ACCESS_ONCE(*ic); new.kh--; if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } The new value, is loaded twice from memory with gcc 4.7.2 of fedora 18, despite the ACCESS_ONCE: ---> l %r4,0(%r3) <--- load first 32 bit of lock (k and kh) in r4 alfi %r4,2147483647 <--- add -1 to r4 llgtr %r4,%r4 <--- zero out the sign bit of r4 lg %r1,0(%r3) <--- load all 64 bit of lock into new lgr %r2,%r1 <--- load the same into old risbg %r1,%r4,1,31,32 <--- shift and insert r4 into the bits 1-31 of new llihf %r4,2147483647 ngrk %r4,%r1,%r4 jne aa0 nihh %r1,32767 lgr %r4,%r2 csg %r4,%r1,0(%r3) cgr %r2,%r4 jne a70 If the memory value changes between the first load (l) and the second load (lg) we are broken. If that happens VCPU threads will hang (unkillable) in handle_ipte_interlock. Andreas Krebbel analyzed this and tracked it down to a compiler bug in that version: "while it is not that obvious the C99 standard basically forbids duplicating the memory access also in that case. For an argumentation of a similiar case please see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=22278#c43 For the implementation-defined cases regarding volatile there are some GCC-specific clarifications which can be found here: https://gcc.gnu.org/onlinedocs/gcc/Volatiles.html#Volatiles I've tracked down the problem with a reduced testcase. The problem was that during a tree level optimization (SRA - scalar replacement of aggregates) the volatile marker is lost. And an RTL level optimizer (CSE - common subexpression elimination) then propagated the memory read into its second use introducing another access to the memory location. So indeed Christian's suspicion that the union access has something to do with it is correct (since it triggered the SRA optimization). This issue has been reported and fixed in the GCC 4.8 development cycle: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145" This patch replaces the ACCESS_ONCE scheme with a barrier() based scheme that should work for all supported compilers. Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # v3.16+ --- arch/s390/kvm/gaccess.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0f961a1c64b3..6dc0ad9c7050 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -229,10 +229,12 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); while (old.k) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); } new = old; new.k = 1; @@ -251,7 +253,9 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); + new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); wake_up(&vcpu->kvm->arch.ipte_wq); @@ -265,10 +269,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); while (old.kg) { cond_resched(); - old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); } new = old; new.k = 1; @@ -282,7 +288,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - new = old = ACCESS_ONCE(*ic); + old = *ic; + barrier(); + new = old; new.kh--; if (!new.kh) new.k = 0; From 2dca485f8740208604543c3960be31a5dd3ea603 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 31 Oct 2014 09:24:20 +0100 Subject: [PATCH 2/4] KVM: s390: flush CPU on load control some control register changes will flush some aspects of the CPU, e.g. POP explicitely mentions that for CR9-CR11 "TLBs may be cleared". Instead of trying to be clever and only flush on specific CRs, let play safe and flush on all lctl(g) as future machines might define new bits in CRs. Load control intercept should not happen that often. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Cc: stable@vger.kernel.org --- arch/s390/kvm/priv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 72bb2dd8b9cd..9c565b6b4ccb 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -791,7 +791,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -863,7 +863,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } From fc56eb66c348febef6c7bbd2c3918410cafd6313 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 29 Oct 2014 10:07:16 +0100 Subject: [PATCH 3/4] KVM: s390: fix handling of lctl[g]/stctl[g] According to the architecture all instructions are suppressing if memory access is prohibited due to DAT protection, unless stated otherwise for an instruction. The lctl[g]/stctl[g] implementations handled this incorrectly since control register handling was done piecemeal, which means they had terminating instead of suppressing semantics. This patch fixes this. Signed-off-by: Heiko Carstens Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 68 +++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9c565b6b4ccb..9bde32f66eb5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -762,8 +762,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u32 val = 0; - int reg, rc; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; vcpu->stat.instruction_lctl++; @@ -779,14 +779,15 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - ga += 4; + vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; @@ -799,9 +800,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int reg, rc, nr_regs; + u32 ctl_array[16]; u64 ga; - u32 val; - int reg, rc; vcpu->stat.instruction_stctl++; @@ -817,26 +818,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 4; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static int handle_lctlg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_lctlg++; @@ -848,17 +847,17 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); + nr_regs = ((reg3 - reg1) & 0xf) + 1; + rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + reg = reg1; + nr_regs = 0; do { - rc = read_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - vcpu->arch.sie_block->gcr[reg] = val; - ga += 8; + vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++]; if (reg == reg3) break; reg = (reg + 1) % 16; @@ -871,8 +870,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 ga, val; - int reg, rc; + int reg, rc, nr_regs; + u64 ctl_array[16]; + u64 ga; vcpu->stat.instruction_stctg++; @@ -884,23 +884,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - reg = reg1; - VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); + reg = reg1; + nr_regs = 0; do { - val = vcpu->arch.sie_block->gcr[reg]; - rc = write_guest(vcpu, ga, &val, sizeof(val)); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - ga += 8; + ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg]; if (reg == reg3) break; reg = (reg + 1) % 16; } while (1); - - return 0; + rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } static const intercept_handler_t eb_handlers[256] = { From 365dc1633521a32d55d839f56b41bb9a531d957a Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 31 Oct 2014 16:44:10 +0100 Subject: [PATCH 4/4] KVM: fix vm device attribute documentation Documentation uses incorrect attribute names for some vm device attributes: fix this. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/vm.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 0d16f96c0eac..d426fc87fe93 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -12,14 +12,14 @@ specific. 1. GROUP: KVM_S390_VM_MEM_CTRL Architectures: s390 -1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL +1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA Parameters: none -Returns: -EBUSY if already a vcpus is defined, otherwise 0 +Returns: -EBUSY if a vcpu is already defined, otherwise 0 -Enables CMMA for the virtual machine +Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. -1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA -Parameteres: none +1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA +Parameters: none Returns: 0 Clear the CMMA status for all guest pages, so any pages the guest marked