2009-10-30 13:47:12 +08:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
* Copyright SUSE Linux Products GmbH 2009
|
|
|
|
*
|
|
|
|
* Authors: Alexander Graf <agraf@suse.de>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/highmem.h>
|
|
|
|
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <asm/kvm_ppc.h>
|
|
|
|
#include <asm/kvm_book3s.h>
|
2013-06-22 15:16:32 +08:00
|
|
|
#include <asm/mmu-hash64.h>
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
/* #define DEBUG_MMU */
|
|
|
|
|
|
|
|
#ifdef DEBUG_MMU
|
|
|
|
#define dprintk(X...) printk(KERN_INFO X)
|
|
|
|
#else
|
|
|
|
#define dprintk(X...) do { } while(0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
kvmppc_set_msr(vcpu, MSR_SF);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
|
2011-06-29 08:17:33 +08:00
|
|
|
struct kvm_vcpu *vcpu,
|
2009-10-30 13:47:12 +08:00
|
|
|
gva_t eaddr)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
u64 esid = GET_ESID(eaddr);
|
|
|
|
u64 esid_1t = GET_ESID_1T(eaddr);
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
for (i = 0; i < vcpu->arch.slb_nr; i++) {
|
2009-10-30 13:47:12 +08:00
|
|
|
u64 cmp_esid = esid;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (!vcpu->arch.slb[i].valid)
|
2009-10-30 13:47:12 +08:00
|
|
|
continue;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (vcpu->arch.slb[i].tb)
|
2009-10-30 13:47:12 +08:00
|
|
|
cmp_esid = esid_1t;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (vcpu->arch.slb[i].esid == cmp_esid)
|
|
|
|
return &vcpu->arch.slb[i];
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
|
|
|
|
eaddr, esid, esid_1t);
|
2011-06-29 08:17:33 +08:00
|
|
|
for (i = 0; i < vcpu->arch.slb_nr; i++) {
|
|
|
|
if (vcpu->arch.slb[i].vsid)
|
2010-01-10 10:27:47 +08:00
|
|
|
dprintk(" %d: %c%c%c %llx %llx\n", i,
|
2011-06-29 08:17:33 +08:00
|
|
|
vcpu->arch.slb[i].valid ? 'v' : ' ',
|
|
|
|
vcpu->arch.slb[i].large ? 'l' : ' ',
|
|
|
|
vcpu->arch.slb[i].tb ? 't' : ' ',
|
|
|
|
vcpu->arch.slb[i].esid,
|
|
|
|
vcpu->arch.slb[i].vsid);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
|
|
|
|
{
|
|
|
|
return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
|
|
|
|
{
|
|
|
|
return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
|
|
|
|
{
|
|
|
|
eaddr &= kvmppc_slb_offset_mask(slb);
|
|
|
|
|
|
|
|
return (eaddr >> VPN_SHIFT) |
|
|
|
|
((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
|
|
|
|
}
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|
|
|
bool data)
|
|
|
|
{
|
|
|
|
struct kvmppc_slb *slb;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
|
2009-10-30 13:47:12 +08:00
|
|
|
if (!slb)
|
|
|
|
return 0;
|
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
return kvmppc_slb_calc_vpn(slb, eaddr);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
static int mmu_pagesize(int mmu_pg)
|
|
|
|
{
|
|
|
|
switch (mmu_pg) {
|
|
|
|
case MMU_PAGE_64K:
|
|
|
|
return 16;
|
|
|
|
case MMU_PAGE_16M:
|
|
|
|
return 24;
|
|
|
|
}
|
|
|
|
return 12;
|
|
|
|
}
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
|
|
|
|
{
|
2013-09-20 12:52:44 +08:00
|
|
|
return mmu_pagesize(slbe->base_page_size);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
|
|
|
|
{
|
|
|
|
int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
|
2013-06-22 15:16:32 +08:00
|
|
|
|
|
|
|
return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
2013-09-20 12:52:49 +08:00
|
|
|
static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
|
2009-10-30 13:47:12 +08:00
|
|
|
struct kvmppc_slb *slbe, gva_t eaddr,
|
|
|
|
bool second)
|
|
|
|
{
|
2013-09-20 12:52:49 +08:00
|
|
|
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
2009-10-30 13:47:12 +08:00
|
|
|
u64 hash, pteg, htabsize;
|
2013-06-22 15:16:32 +08:00
|
|
|
u32 ssize;
|
2009-10-30 13:47:12 +08:00
|
|
|
hva_t r;
|
2013-06-22 15:16:32 +08:00
|
|
|
u64 vpn;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
|
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
|
|
|
|
ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
|
|
|
|
hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
|
2009-10-30 13:47:12 +08:00
|
|
|
if (second)
|
|
|
|
hash = ~hash;
|
|
|
|
hash &= ((1ULL << 39ULL) - 1ULL);
|
|
|
|
hash &= htabsize;
|
|
|
|
hash <<= 7ULL;
|
|
|
|
|
|
|
|
pteg = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
|
|
|
|
pteg |= hash;
|
|
|
|
|
|
|
|
dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
|
|
|
|
page, vcpu_book3s->sdr1, pteg, slbe->vsid);
|
|
|
|
|
2011-08-08 21:06:55 +08:00
|
|
|
/* When running a PAPR guest, SDR1 contains a HVA address instead
|
|
|
|
of a GPA */
|
2013-09-20 12:52:49 +08:00
|
|
|
if (vcpu->arch.papr_enabled)
|
2011-08-08 21:06:55 +08:00
|
|
|
r = pteg;
|
|
|
|
else
|
2013-09-20 12:52:49 +08:00
|
|
|
r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
|
2011-08-08 21:06:55 +08:00
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
if (kvm_is_error_hva(r))
|
|
|
|
return r;
|
|
|
|
return r | (pteg & ~PAGE_MASK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
|
|
|
|
{
|
|
|
|
int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
|
|
|
|
u64 avpn;
|
|
|
|
|
|
|
|
avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
|
2013-06-22 15:16:32 +08:00
|
|
|
avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
if (p < 16)
|
|
|
|
avpn >>= ((80 - p) - 56) - 8; /* 16 - p */
|
2009-10-30 13:47:12 +08:00
|
|
|
else
|
2013-09-20 12:52:44 +08:00
|
|
|
avpn <<= p - 16;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
return avpn;
|
|
|
|
}
|
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
/*
|
|
|
|
* Return page size encoded in the second word of a HPTE, or
|
|
|
|
* -1 for an invalid encoding for the base page size indicated by
|
|
|
|
* the SLB entry. This doesn't handle mixed pagesize segments yet.
|
|
|
|
*/
|
|
|
|
static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
|
|
|
|
{
|
|
|
|
switch (slbe->base_page_size) {
|
|
|
|
case MMU_PAGE_64K:
|
|
|
|
if ((r & 0xf000) == 0x1000)
|
|
|
|
return MMU_PAGE_64K;
|
|
|
|
break;
|
|
|
|
case MMU_PAGE_16M:
|
|
|
|
if ((r & 0xff000) == 0)
|
|
|
|
return MMU_PAGE_16M;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
KVM: PPC: Book3S PR: Better handling of host-side read-only pages
Currently we request write access to all pages that get mapped into the
guest, even if the guest is only loading from the page. This reduces
the effectiveness of KSM because it means that we unshare every page we
access. Also, we always set the changed (C) bit in the guest HPTE if
it allows writing, even for a guest load.
This fixes both these problems. We pass an 'iswrite' flag to the
mmu.xlate() functions and to kvmppc_mmu_map_page() to indicate whether
the access is a load or a store. The mmu.xlate() functions now only
set C for stores. kvmppc_gfn_to_pfn() now calls gfn_to_pfn_prot()
instead of gfn_to_pfn() so that it can indicate whether we need write
access to the page, and get back a 'writable' flag to indicate whether
the page is writable or not. If that 'writable' flag is clear, we then
make the host HPTE read-only even if the guest HPTE allowed writing.
This means that we can get a protection fault when the guest writes to a
page that it has mapped read-write but which is read-only on the host
side (perhaps due to KSM having merged the page). Thus we now call
kvmppc_handle_pagefault() for protection faults as well as HPTE not found
faults. In kvmppc_handle_pagefault(), if the access was allowed by the
guest HPTE and we thus need to install a new host HPTE, we then need to
remove the old host HPTE if there is one. This is done with a new
function, kvmppc_mmu_unmap_page(), which uses kvmppc_mmu_pte_vflush() to
find and remove the old host HPTE.
Since the memslot-related functions require the KVM SRCU read lock to
be held, this adds srcu_read_lock/unlock pairs around the calls to
kvmppc_handle_pagefault().
Finally, this changes kvmppc_mmu_book3s_32_xlate_pte() to not ignore
guest HPTEs that don't permit access, and to return -EPERM for accesses
that are not permitted by the page protections.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:51 +08:00
|
|
|
struct kvmppc_pte *gpte, bool data,
|
|
|
|
bool iswrite)
|
2009-10-30 13:47:12 +08:00
|
|
|
{
|
|
|
|
struct kvmppc_slb *slbe;
|
|
|
|
hva_t ptegp;
|
|
|
|
u64 pteg[16];
|
|
|
|
u64 avpn = 0;
|
2013-08-06 12:18:00 +08:00
|
|
|
u64 v, r;
|
|
|
|
u64 v_val, v_mask;
|
|
|
|
u64 eaddr_mask;
|
2009-10-30 13:47:12 +08:00
|
|
|
int i;
|
2013-08-06 12:18:00 +08:00
|
|
|
u8 pp, key = 0;
|
2009-10-30 13:47:12 +08:00
|
|
|
bool found = false;
|
2013-08-06 12:18:00 +08:00
|
|
|
bool second = false;
|
2013-09-20 12:52:44 +08:00
|
|
|
int pgsize;
|
2010-07-29 20:47:54 +08:00
|
|
|
ulong mp_ea = vcpu->arch.magic_page_ea;
|
|
|
|
|
|
|
|
/* Magic page override */
|
|
|
|
if (unlikely(mp_ea) &&
|
|
|
|
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
|
|
|
|
!(vcpu->arch.shared->msr & MSR_PR)) {
|
|
|
|
gpte->eaddr = eaddr;
|
|
|
|
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
|
|
|
|
gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
|
|
|
|
gpte->raddr &= KVM_PAM;
|
|
|
|
gpte->may_execute = true;
|
|
|
|
gpte->may_read = true;
|
|
|
|
gpte->may_write = true;
|
2013-09-20 12:52:44 +08:00
|
|
|
gpte->page_size = MMU_PAGE_4K;
|
2010-07-29 20:47:54 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
|
2009-10-30 13:47:12 +08:00
|
|
|
if (!slbe)
|
|
|
|
goto no_seg_found;
|
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
|
2013-08-06 12:18:00 +08:00
|
|
|
v_val = avpn & HPTE_V_AVPN;
|
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
if (slbe->tb)
|
2013-08-06 12:18:00 +08:00
|
|
|
v_val |= SLB_VSID_B_1T;
|
|
|
|
if (slbe->large)
|
|
|
|
v_val |= HPTE_V_LARGE;
|
|
|
|
v_val |= HPTE_V_VALID;
|
|
|
|
|
|
|
|
v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
|
|
|
|
HPTE_V_SECONDARY;
|
2013-06-22 15:16:32 +08:00
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
|
|
|
|
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
do_second:
|
2013-09-20 12:52:49 +08:00
|
|
|
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
|
2009-10-30 13:47:12 +08:00
|
|
|
if (kvm_is_error_hva(ptegp))
|
|
|
|
goto no_page_found;
|
|
|
|
|
|
|
|
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
|
|
|
|
printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
|
|
|
|
goto no_page_found;
|
|
|
|
}
|
|
|
|
|
2010-07-29 20:47:43 +08:00
|
|
|
if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
|
2009-10-30 13:47:12 +08:00
|
|
|
key = 4;
|
2010-07-29 20:47:43 +08:00
|
|
|
else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
|
2009-10-30 13:47:12 +08:00
|
|
|
key = 4;
|
|
|
|
|
|
|
|
for (i=0; i<16; i+=2) {
|
2013-08-06 12:18:00 +08:00
|
|
|
/* Check all relevant fields of 1st dword */
|
|
|
|
if ((pteg[i] & v_mask) == v_val) {
|
2013-09-20 12:52:44 +08:00
|
|
|
/* If large page bit is set, check pgsize encoding */
|
|
|
|
if (slbe->large &&
|
|
|
|
(vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
|
|
|
|
pgsize = decode_pagesize(slbe, pteg[i+1]);
|
|
|
|
if (pgsize < 0)
|
|
|
|
continue;
|
|
|
|
}
|
2009-10-30 13:47:12 +08:00
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-06 12:18:00 +08:00
|
|
|
if (!found) {
|
|
|
|
if (second)
|
|
|
|
goto no_page_found;
|
|
|
|
v_val |= HPTE_V_SECONDARY;
|
|
|
|
second = true;
|
|
|
|
goto do_second;
|
|
|
|
}
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2013-08-06 12:18:00 +08:00
|
|
|
v = pteg[i];
|
|
|
|
r = pteg[i+1];
|
|
|
|
pp = (r & HPTE_R_PP) | key;
|
2013-09-20 12:52:46 +08:00
|
|
|
if (r & HPTE_R_PP0)
|
|
|
|
pp |= 8;
|
2013-08-06 12:18:00 +08:00
|
|
|
|
|
|
|
gpte->eaddr = eaddr;
|
|
|
|
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
|
2013-09-20 12:52:44 +08:00
|
|
|
|
|
|
|
eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
|
2013-08-06 12:18:00 +08:00
|
|
|
gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
|
2013-09-20 12:52:44 +08:00
|
|
|
gpte->page_size = pgsize;
|
2013-08-06 12:18:00 +08:00
|
|
|
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
|
|
|
|
gpte->may_read = false;
|
|
|
|
gpte->may_write = false;
|
|
|
|
|
|
|
|
switch (pp) {
|
|
|
|
case 0:
|
|
|
|
case 1:
|
|
|
|
case 2:
|
|
|
|
case 6:
|
|
|
|
gpte->may_write = true;
|
|
|
|
/* fall through */
|
|
|
|
case 3:
|
|
|
|
case 5:
|
|
|
|
case 7:
|
2013-09-20 12:52:46 +08:00
|
|
|
case 10:
|
2013-08-06 12:18:00 +08:00
|
|
|
gpte->may_read = true;
|
|
|
|
break;
|
|
|
|
}
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2013-08-06 12:18:00 +08:00
|
|
|
dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
|
|
|
|
"-> 0x%lx\n",
|
|
|
|
eaddr, avpn, gpte->vpage, gpte->raddr);
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2013-08-06 12:18:00 +08:00
|
|
|
/* Update PTE R and C bits, so the guest's swapper knows we used the
|
|
|
|
* page */
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
if (gpte->may_read && !(r & HPTE_R_R)) {
|
|
|
|
/*
|
|
|
|
* Set the accessed flag.
|
|
|
|
* We have to write this back with a single byte write
|
|
|
|
* because another vcpu may be accessing this on
|
|
|
|
* non-PAPR platforms such as mac99, and this is
|
|
|
|
* what real hardware does.
|
|
|
|
*/
|
|
|
|
char __user *addr = (char __user *) &pteg[i+1];
|
2013-08-06 12:18:00 +08:00
|
|
|
r |= HPTE_R_R;
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
put_user(r >> 8, addr + 6);
|
2013-08-06 12:18:00 +08:00
|
|
|
}
|
KVM: PPC: Book3S PR: Better handling of host-side read-only pages
Currently we request write access to all pages that get mapped into the
guest, even if the guest is only loading from the page. This reduces
the effectiveness of KSM because it means that we unshare every page we
access. Also, we always set the changed (C) bit in the guest HPTE if
it allows writing, even for a guest load.
This fixes both these problems. We pass an 'iswrite' flag to the
mmu.xlate() functions and to kvmppc_mmu_map_page() to indicate whether
the access is a load or a store. The mmu.xlate() functions now only
set C for stores. kvmppc_gfn_to_pfn() now calls gfn_to_pfn_prot()
instead of gfn_to_pfn() so that it can indicate whether we need write
access to the page, and get back a 'writable' flag to indicate whether
the page is writable or not. If that 'writable' flag is clear, we then
make the host HPTE read-only even if the guest HPTE allowed writing.
This means that we can get a protection fault when the guest writes to a
page that it has mapped read-write but which is read-only on the host
side (perhaps due to KSM having merged the page). Thus we now call
kvmppc_handle_pagefault() for protection faults as well as HPTE not found
faults. In kvmppc_handle_pagefault(), if the access was allowed by the
guest HPTE and we thus need to install a new host HPTE, we then need to
remove the old host HPTE if there is one. This is done with a new
function, kvmppc_mmu_unmap_page(), which uses kvmppc_mmu_pte_vflush() to
find and remove the old host HPTE.
Since the memslot-related functions require the KVM SRCU read lock to
be held, this adds srcu_read_lock/unlock pairs around the calls to
kvmppc_handle_pagefault().
Finally, this changes kvmppc_mmu_book3s_32_xlate_pte() to not ignore
guest HPTEs that don't permit access, and to return -EPERM for accesses
that are not permitted by the page protections.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:51 +08:00
|
|
|
if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
|
|
|
|
/* Set the dirty flag */
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
/* Use a single byte write */
|
|
|
|
char __user *addr = (char __user *) &pteg[i+1];
|
2013-08-06 12:18:00 +08:00
|
|
|
r |= HPTE_R_C;
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
put_user(r, addr + 7);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
|
2013-08-06 12:18:00 +08:00
|
|
|
|
KVM: PPC: Book3S PR: Better handling of host-side read-only pages
Currently we request write access to all pages that get mapped into the
guest, even if the guest is only loading from the page. This reduces
the effectiveness of KSM because it means that we unshare every page we
access. Also, we always set the changed (C) bit in the guest HPTE if
it allows writing, even for a guest load.
This fixes both these problems. We pass an 'iswrite' flag to the
mmu.xlate() functions and to kvmppc_mmu_map_page() to indicate whether
the access is a load or a store. The mmu.xlate() functions now only
set C for stores. kvmppc_gfn_to_pfn() now calls gfn_to_pfn_prot()
instead of gfn_to_pfn() so that it can indicate whether we need write
access to the page, and get back a 'writable' flag to indicate whether
the page is writable or not. If that 'writable' flag is clear, we then
make the host HPTE read-only even if the guest HPTE allowed writing.
This means that we can get a protection fault when the guest writes to a
page that it has mapped read-write but which is read-only on the host
side (perhaps due to KSM having merged the page). Thus we now call
kvmppc_handle_pagefault() for protection faults as well as HPTE not found
faults. In kvmppc_handle_pagefault(), if the access was allowed by the
guest HPTE and we thus need to install a new host HPTE, we then need to
remove the old host HPTE if there is one. This is done with a new
function, kvmppc_mmu_unmap_page(), which uses kvmppc_mmu_pte_vflush() to
find and remove the old host HPTE.
Since the memslot-related functions require the KVM SRCU read lock to
be held, this adds srcu_read_lock/unlock pairs around the calls to
kvmppc_handle_pagefault().
Finally, this changes kvmppc_mmu_book3s_32_xlate_pte() to not ignore
guest HPTEs that don't permit access, and to return -EPERM for accesses
that are not permitted by the page protections.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:51 +08:00
|
|
|
if (!gpte->may_read || (iswrite && !gpte->may_write))
|
2013-08-06 12:18:00 +08:00
|
|
|
return -EPERM;
|
|
|
|
return 0;
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
no_page_found:
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
|
2009-10-30 13:47:12 +08:00
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
no_seg_found:
|
|
|
|
dprintk("KVM MMU: Trigger segment fault\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
|
|
|
|
{
|
|
|
|
struct kvmppc_vcpu_book3s *vcpu_book3s;
|
|
|
|
u64 esid, esid_1t;
|
|
|
|
int slb_nr;
|
|
|
|
struct kvmppc_slb *slbe;
|
|
|
|
|
|
|
|
dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
|
|
|
|
|
|
|
|
vcpu_book3s = to_book3s(vcpu);
|
|
|
|
|
|
|
|
esid = GET_ESID(rb);
|
|
|
|
esid_1t = GET_ESID_1T(rb);
|
|
|
|
slb_nr = rb & 0xfff;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (slb_nr > vcpu->arch.slb_nr)
|
2009-10-30 13:47:12 +08:00
|
|
|
return;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slbe = &vcpu->arch.slb[slb_nr];
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
|
2010-01-10 10:27:47 +08:00
|
|
|
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
|
|
|
|
slbe->esid = slbe->tb ? esid_1t : esid;
|
2013-06-22 15:16:32 +08:00
|
|
|
slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
|
2009-10-30 13:47:12 +08:00
|
|
|
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
|
|
|
|
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
|
|
|
|
slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
|
|
|
|
slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
|
|
|
|
slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
|
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
slbe->base_page_size = MMU_PAGE_4K;
|
|
|
|
if (slbe->large) {
|
|
|
|
if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
|
|
|
|
switch (rs & SLB_VSID_LP) {
|
|
|
|
case SLB_VSID_LP_00:
|
|
|
|
slbe->base_page_size = MMU_PAGE_16M;
|
|
|
|
break;
|
|
|
|
case SLB_VSID_LP_01:
|
|
|
|
slbe->base_page_size = MMU_PAGE_64K;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
slbe->base_page_size = MMU_PAGE_16M;
|
|
|
|
}
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
|
|
|
|
slbe->origv = rs;
|
|
|
|
|
|
|
|
/* Map the new segment */
|
|
|
|
kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
|
|
|
|
{
|
|
|
|
struct kvmppc_slb *slbe;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (slb_nr > vcpu->arch.slb_nr)
|
2009-10-30 13:47:12 +08:00
|
|
|
return 0;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slbe = &vcpu->arch.slb[slb_nr];
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
return slbe->orige;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
|
|
|
|
{
|
|
|
|
struct kvmppc_slb *slbe;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
if (slb_nr > vcpu->arch.slb_nr)
|
2009-10-30 13:47:12 +08:00
|
|
|
return 0;
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slbe = &vcpu->arch.slb[slb_nr];
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
return slbe->origv;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
|
|
|
|
{
|
|
|
|
struct kvmppc_slb *slbe;
|
2013-06-22 15:16:32 +08:00
|
|
|
u64 seg_size;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
dprintk("KVM MMU: slbie(0x%llx)\n", ea);
|
|
|
|
|
2011-06-29 08:17:33 +08:00
|
|
|
slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
if (!slbe)
|
|
|
|
return;
|
|
|
|
|
|
|
|
dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
|
|
|
|
|
|
|
|
slbe->valid = false;
|
2013-06-22 15:15:24 +08:00
|
|
|
slbe->orige = 0;
|
|
|
|
slbe->origv = 0;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2013-06-22 15:16:32 +08:00
|
|
|
seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
|
|
|
|
kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
dprintk("KVM MMU: slbia()\n");
|
|
|
|
|
2013-06-22 15:15:24 +08:00
|
|
|
for (i = 1; i < vcpu->arch.slb_nr; i++) {
|
2011-06-29 08:17:33 +08:00
|
|
|
vcpu->arch.slb[i].valid = false;
|
2013-06-22 15:15:24 +08:00
|
|
|
vcpu->arch.slb[i].orige = 0;
|
|
|
|
vcpu->arch.slb[i].origv = 0;
|
|
|
|
}
|
2009-10-30 13:47:12 +08:00
|
|
|
|
2010-07-29 20:47:43 +08:00
|
|
|
if (vcpu->arch.shared->msr & MSR_IR) {
|
2009-10-30 13:47:12 +08:00
|
|
|
kvmppc_mmu_flush_segments(vcpu);
|
2010-04-16 06:11:40 +08:00
|
|
|
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
|
|
|
|
ulong value)
|
|
|
|
{
|
|
|
|
u64 rb = 0, rs = 0;
|
|
|
|
|
2009-12-20 01:07:39 +08:00
|
|
|
/*
|
|
|
|
* According to Book3 2.01 mtsrin is implemented as:
|
|
|
|
*
|
|
|
|
* The SLB entry specified by (RB)32:35 is loaded from register
|
|
|
|
* RS, as follows.
|
|
|
|
*
|
|
|
|
* SLBE Bit Source SLB Field
|
|
|
|
*
|
|
|
|
* 0:31 0x0000_0000 ESID-0:31
|
|
|
|
* 32:35 (RB)32:35 ESID-32:35
|
|
|
|
* 36 0b1 V
|
|
|
|
* 37:61 0x00_0000|| 0b0 VSID-0:24
|
|
|
|
* 62:88 (RS)37:63 VSID-25:51
|
|
|
|
* 89:91 (RS)33:35 Ks Kp N
|
|
|
|
* 92 (RS)36 L ((RS)36 must be 0b0)
|
|
|
|
* 93 0b0 C
|
|
|
|
*/
|
|
|
|
|
|
|
|
dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value);
|
|
|
|
|
2009-10-30 13:47:12 +08:00
|
|
|
/* ESID = srnum */
|
|
|
|
rb |= (srnum & 0xf) << 28;
|
|
|
|
/* Set the valid bit */
|
|
|
|
rb |= 1 << 27;
|
|
|
|
/* Index = ESID */
|
|
|
|
rb |= srnum;
|
|
|
|
|
|
|
|
/* VSID = VSID */
|
|
|
|
rs |= (value & 0xfffffff) << 12;
|
|
|
|
/* flags = flags */
|
2009-12-20 01:07:39 +08:00
|
|
|
rs |= ((value >> 28) & 0x7) << 9;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
|
|
|
|
bool large)
|
|
|
|
{
|
|
|
|
u64 mask = 0xFFFFFFFFFULL;
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
long i;
|
|
|
|
struct kvm_vcpu *v;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
|
|
|
|
|
2013-09-20 12:52:44 +08:00
|
|
|
/*
|
|
|
|
* The tlbie instruction changed behaviour starting with
|
|
|
|
* POWER6. POWER6 and later don't have the large page flag
|
|
|
|
* in the instruction but in the RB value, along with bits
|
|
|
|
* indicating page and segment sizes.
|
|
|
|
*/
|
|
|
|
if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
|
|
|
|
/* POWER6 or later */
|
|
|
|
if (va & 1) { /* L bit */
|
|
|
|
if ((va & 0xf000) == 0x1000)
|
|
|
|
mask = 0xFFFFFFFF0ULL; /* 64k page */
|
|
|
|
else
|
|
|
|
mask = 0xFFFFFF000ULL; /* 16M page */
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* older processors, e.g. PPC970 */
|
|
|
|
if (large)
|
|
|
|
mask = 0xFFFFFF000ULL;
|
|
|
|
}
|
KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe
This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits. The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that. Also, real hardware does single-byte writes to update R and C.
The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.). Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.
The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus. To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().
For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.
With this, PR KVM can successfully run SMP guests.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:48 +08:00
|
|
|
/* flush this VA on all vcpus */
|
|
|
|
kvm_for_each_vcpu(i, v, vcpu->kvm)
|
|
|
|
kvmppc_mmu_pte_vflush(v, va >> 12, mask);
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
2013-09-20 12:52:45 +08:00
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
|
|
static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
|
|
|
|
{
|
|
|
|
ulong mp_ea = vcpu->arch.magic_page_ea;
|
|
|
|
|
|
|
|
return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) &&
|
|
|
|
(mp_ea >> SID_SHIFT) == esid;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-04-20 08:49:46 +08:00
|
|
|
static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
2009-10-30 13:47:12 +08:00
|
|
|
u64 *vsid)
|
|
|
|
{
|
2010-04-20 08:49:48 +08:00
|
|
|
ulong ea = esid << SID_SHIFT;
|
|
|
|
struct kvmppc_slb *slb;
|
|
|
|
u64 gvsid = esid;
|
2010-07-29 20:47:54 +08:00
|
|
|
ulong mp_ea = vcpu->arch.magic_page_ea;
|
2013-09-20 12:52:45 +08:00
|
|
|
int pagesize = MMU_PAGE_64K;
|
2010-04-20 08:49:48 +08:00
|
|
|
|
2010-07-29 20:47:43 +08:00
|
|
|
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
2011-06-29 08:17:33 +08:00
|
|
|
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
|
2013-06-22 15:16:32 +08:00
|
|
|
if (slb) {
|
2010-04-20 08:49:48 +08:00
|
|
|
gvsid = slb->vsid;
|
2013-09-20 12:52:45 +08:00
|
|
|
pagesize = slb->base_page_size;
|
2013-06-22 15:16:32 +08:00
|
|
|
if (slb->tb) {
|
|
|
|
gvsid <<= SID_SHIFT_1T - SID_SHIFT;
|
|
|
|
gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
|
|
|
|
gvsid |= VSID_1T;
|
|
|
|
}
|
|
|
|
}
|
2010-04-20 08:49:48 +08:00
|
|
|
}
|
|
|
|
|
2010-07-29 20:47:43 +08:00
|
|
|
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
2009-10-30 13:47:12 +08:00
|
|
|
case 0:
|
2013-09-20 12:52:45 +08:00
|
|
|
gvsid = VSID_REAL | esid;
|
2009-10-30 13:47:12 +08:00
|
|
|
break;
|
|
|
|
case MSR_IR:
|
2013-09-20 12:52:45 +08:00
|
|
|
gvsid |= VSID_REAL_IR;
|
2009-10-30 13:47:12 +08:00
|
|
|
break;
|
|
|
|
case MSR_DR:
|
2013-09-20 12:52:45 +08:00
|
|
|
gvsid |= VSID_REAL_DR;
|
2009-10-30 13:47:12 +08:00
|
|
|
break;
|
|
|
|
case MSR_DR|MSR_IR:
|
2010-04-20 08:49:48 +08:00
|
|
|
if (!slb)
|
2010-07-29 20:47:54 +08:00
|
|
|
goto no_slb;
|
2009-10-30 13:47:12 +08:00
|
|
|
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-09-20 12:52:45 +08:00
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
|
|
/*
|
|
|
|
* Mark this as a 64k segment if the host is using
|
|
|
|
* 64k pages, the host MMU supports 64k pages and
|
|
|
|
* the guest segment page size is >= 64k,
|
|
|
|
* but not if this segment contains the magic page.
|
|
|
|
*/
|
|
|
|
if (pagesize >= MMU_PAGE_64K &&
|
|
|
|
mmu_psize_defs[MMU_PAGE_64K].shift &&
|
|
|
|
!segment_contains_magic_page(vcpu, esid))
|
|
|
|
gvsid |= VSID_64K;
|
|
|
|
#endif
|
|
|
|
|
2010-07-29 20:47:43 +08:00
|
|
|
if (vcpu->arch.shared->msr & MSR_PR)
|
2013-09-20 12:52:45 +08:00
|
|
|
gvsid |= VSID_PR;
|
2010-04-20 08:49:51 +08:00
|
|
|
|
2013-09-20 12:52:45 +08:00
|
|
|
*vsid = gvsid;
|
2009-10-30 13:47:12 +08:00
|
|
|
return 0;
|
2010-07-29 20:47:54 +08:00
|
|
|
|
|
|
|
no_slb:
|
|
|
|
/* Catch magic page case */
|
|
|
|
if (unlikely(mp_ea) &&
|
|
|
|
unlikely(esid == (mp_ea >> SID_SHIFT)) &&
|
|
|
|
!(vcpu->arch.shared->msr & MSR_PR)) {
|
|
|
|
*vsid = VSID_REAL | esid;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
return (to_book3s(vcpu)->hid[5] & 0x80);
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
|
|
|
|
|
|
|
|
mmu->mfsrin = NULL;
|
|
|
|
mmu->mtsrin = kvmppc_mmu_book3s_64_mtsrin;
|
|
|
|
mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
|
|
|
|
mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
|
|
|
|
mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
|
|
|
|
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
|
|
|
|
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
|
|
|
|
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
|
|
|
|
mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
|
|
|
|
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
|
|
|
|
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
|
|
|
|
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
|
|
|
|
mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32;
|
2009-11-30 11:02:02 +08:00
|
|
|
|
|
|
|
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
|
2009-10-30 13:47:12 +08:00
|
|
|
}
|