KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits
This allows both the guest and the host to use the referenced (R) and changed (C) bits in the guest hashed page table. The guest has a view of R and C that is maintained in the guest_rpte field of the revmap entry for the HPTE, and the host has a view that is maintained in the rmap entry for the associated gfn. Both view are updated from the guest HPT. If a bit (R or C) is zero in either view, it will be initially set to zero in the HPTE (or HPTEs), until set to 1 by hardware. When an HPTE is removed for any reason, the R and C bits from the HPTE are ORed into both views. We have to be careful to read the R and C bits from the HPTE after invalidating it, but before unlocking it, in case of any late updates by the hardware. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
parent
a92bce95f0
commit
bad3b5075e
|
@ -200,8 +200,9 @@ struct revmap_entry {
|
||||||
* index in the guest HPT of a HPTE that points to the page.
|
* index in the guest HPT of a HPTE that points to the page.
|
||||||
*/
|
*/
|
||||||
#define KVMPPC_RMAP_LOCK_BIT 63
|
#define KVMPPC_RMAP_LOCK_BIT 63
|
||||||
#define KVMPPC_RMAP_REF_BIT 33
|
#define KVMPPC_RMAP_RC_SHIFT 32
|
||||||
#define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT)
|
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
|
||||||
|
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
|
||||||
#define KVMPPC_RMAP_PRESENT 0x100000000ul
|
#define KVMPPC_RMAP_PRESENT 0x100000000ul
|
||||||
#define KVMPPC_RMAP_INDEX 0xfffffffful
|
#define KVMPPC_RMAP_INDEX 0xfffffffful
|
||||||
|
|
||||||
|
|
|
@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
unsigned long is_io;
|
unsigned long is_io;
|
||||||
unsigned int writing, write_ok;
|
unsigned int writing, write_ok;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
unsigned long rcbits;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Real-mode code has already searched the HPT and found the
|
* Real-mode code has already searched the HPT and found the
|
||||||
|
@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
|
||||||
|
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
|
||||||
|
r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
|
||||||
|
|
||||||
if (hptep[0] & HPTE_V_VALID) {
|
if (hptep[0] & HPTE_V_VALID) {
|
||||||
/* HPTE was previously valid, so we need to invalidate it */
|
/* HPTE was previously valid, so we need to invalidate it */
|
||||||
unlock_rmap(rmap);
|
unlock_rmap(rmap);
|
||||||
hptep[0] |= HPTE_V_ABSENT;
|
hptep[0] |= HPTE_V_ABSENT;
|
||||||
kvmppc_invalidate_hpte(kvm, hptep, index);
|
kvmppc_invalidate_hpte(kvm, hptep, index);
|
||||||
|
/* don't lose previous R and C bits */
|
||||||
|
r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
|
||||||
} else {
|
} else {
|
||||||
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
|
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
|
||||||
}
|
}
|
||||||
|
@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
struct revmap_entry *rev = kvm->arch.revmap;
|
struct revmap_entry *rev = kvm->arch.revmap;
|
||||||
unsigned long h, i, j;
|
unsigned long h, i, j;
|
||||||
unsigned long *hptep;
|
unsigned long *hptep;
|
||||||
unsigned long ptel, psize;
|
unsigned long ptel, psize, rcbits;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
|
lock_rmap(rmapp);
|
||||||
cpu_relax();
|
|
||||||
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
||||||
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
|
unlock_rmap(rmapp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To avoid an ABBA deadlock with the HPTE lock bit,
|
* To avoid an ABBA deadlock with the HPTE lock bit,
|
||||||
* we have to unlock the rmap chain before locking the HPTE.
|
* we can't spin on the HPTE lock while holding the
|
||||||
* Thus we remove the first entry, unlock the rmap chain,
|
* rmap chain lock.
|
||||||
* lock the HPTE and then check that it is for the
|
|
||||||
* page we're unmapping before changing it to non-present.
|
|
||||||
*/
|
*/
|
||||||
i = *rmapp & KVMPPC_RMAP_INDEX;
|
i = *rmapp & KVMPPC_RMAP_INDEX;
|
||||||
|
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
|
||||||
|
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
|
||||||
|
/* unlock rmap before spinning on the HPTE lock */
|
||||||
|
unlock_rmap(rmapp);
|
||||||
|
while (hptep[0] & HPTE_V_HVLOCK)
|
||||||
|
cpu_relax();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
j = rev[i].forw;
|
j = rev[i].forw;
|
||||||
if (j == i) {
|
if (j == i) {
|
||||||
/* chain is now empty */
|
/* chain is now empty */
|
||||||
j = 0;
|
*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
|
||||||
} else {
|
} else {
|
||||||
/* remove i from chain */
|
/* remove i from chain */
|
||||||
h = rev[i].back;
|
h = rev[i].back;
|
||||||
rev[h].forw = j;
|
rev[h].forw = j;
|
||||||
rev[j].back = h;
|
rev[j].back = h;
|
||||||
rev[i].forw = rev[i].back = i;
|
rev[i].forw = rev[i].back = i;
|
||||||
j |= KVMPPC_RMAP_PRESENT;
|
*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
|
||||||
}
|
}
|
||||||
smp_wmb();
|
|
||||||
*rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);
|
|
||||||
|
|
||||||
/* Now lock, check and modify the HPTE */
|
/* Now check and modify the HPTE */
|
||||||
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
|
|
||||||
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
|
|
||||||
cpu_relax();
|
|
||||||
ptel = rev[i].guest_rpte;
|
ptel = rev[i].guest_rpte;
|
||||||
psize = hpte_page_size(hptep[0], ptel);
|
psize = hpte_page_size(hptep[0], ptel);
|
||||||
if ((hptep[0] & HPTE_V_VALID) &&
|
if ((hptep[0] & HPTE_V_VALID) &&
|
||||||
hpte_rpn(ptel, psize) == gfn) {
|
hpte_rpn(ptel, psize) == gfn) {
|
||||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
|
||||||
hptep[0] |= HPTE_V_ABSENT;
|
hptep[0] |= HPTE_V_ABSENT;
|
||||||
|
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||||
|
/* Harvest R and C */
|
||||||
|
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
|
||||||
|
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||||
|
rev[i].guest_rpte = ptel | rcbits;
|
||||||
}
|
}
|
||||||
|
unlock_rmap(rmapp);
|
||||||
hptep[0] &= ~HPTE_V_HVLOCK;
|
hptep[0] &= ~HPTE_V_HVLOCK;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
kvm_unmap_rmapp(kvm, rmapp, gfn);
|
kvm_unmap_rmapp(kvm, rmapp, gfn);
|
||||||
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
|
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
__clear_bit(KVMPPC_RMAP_REF_BIT, rmapp);
|
*rmapp &= ~KVMPPC_RMAP_REFERENCED;
|
||||||
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
|
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
|
||||||
|
|
||||||
/* Remove this HPTE from the chain for a real page */
|
/* Remove this HPTE from the chain for a real page */
|
||||||
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||||
unsigned long hpte_v)
|
struct revmap_entry *rev,
|
||||||
|
unsigned long hpte_v, unsigned long hpte_r)
|
||||||
{
|
{
|
||||||
struct revmap_entry *rev, *next, *prev;
|
struct revmap_entry *next, *prev;
|
||||||
unsigned long gfn, ptel, head;
|
unsigned long gfn, ptel, head;
|
||||||
struct kvm_memory_slot *memslot;
|
struct kvm_memory_slot *memslot;
|
||||||
unsigned long *rmap;
|
unsigned long *rmap;
|
||||||
|
unsigned long rcbits;
|
||||||
|
|
||||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
|
||||||
ptel = rev->guest_rpte;
|
ptel = rev->guest_rpte |= rcbits;
|
||||||
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
|
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
|
||||||
memslot = builtin_gfn_to_memslot(kvm, gfn);
|
memslot = builtin_gfn_to_memslot(kvm, gfn);
|
||||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||||
|
@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||||
else
|
else
|
||||||
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
|
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
|
||||||
}
|
}
|
||||||
|
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||||
unlock_rmap(rmap);
|
unlock_rmap(rmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
unsigned int writing;
|
unsigned int writing;
|
||||||
unsigned long mmu_seq;
|
unsigned long mmu_seq;
|
||||||
|
unsigned long rcbits;
|
||||||
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
|
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
|
||||||
|
|
||||||
psize = hpte_page_size(pteh, ptel);
|
psize = hpte_page_size(pteh, ptel);
|
||||||
|
@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
} else {
|
} else {
|
||||||
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
|
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
|
||||||
realmode);
|
realmode);
|
||||||
|
/* Only set R/C in real HPTE if already set in *rmap */
|
||||||
|
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
|
||||||
|
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
asm volatile("tlbiel %0" : : "r" (rb));
|
asm volatile("tlbiel %0" : : "r" (rb));
|
||||||
asm volatile("ptesync" : : : "memory");
|
asm volatile("ptesync" : : : "memory");
|
||||||
}
|
}
|
||||||
remove_revmap_chain(kvm, pte_index, v);
|
/* Read PTE low word after tlbie to get final R/C values */
|
||||||
|
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
|
||||||
}
|
}
|
||||||
r = rev->guest_rpte;
|
r = rev->guest_rpte;
|
||||||
unlock_hpte(hpte, 0);
|
unlock_hpte(hpte, 0);
|
||||||
|
@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
args[j] = ((0x80 | flags) << 56) + pte_index;
|
args[j] = ((0x80 | flags) << 56) + pte_index;
|
||||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||||
/* insert R and C bits from guest PTE */
|
|
||||||
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
|
||||||
args[j] |= rcbits << (56 - 5);
|
|
||||||
|
|
||||||
if (!(hp[0] & HPTE_V_VALID))
|
if (!(hp[0] & HPTE_V_VALID)) {
|
||||||
|
/* insert R and C bits from PTE */
|
||||||
|
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
||||||
|
args[j] |= rcbits << (56 - 5);
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
hp[0] &= ~HPTE_V_VALID; /* leave it locked */
|
hp[0] &= ~HPTE_V_VALID; /* leave it locked */
|
||||||
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
||||||
|
@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||||
asm volatile("ptesync" : : : "memory");
|
asm volatile("ptesync" : : : "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Read PTE low words after tlbie to get final R/C values */
|
||||||
for (k = 0; k < n; ++k) {
|
for (k = 0; k < n; ++k) {
|
||||||
j = indexes[k];
|
j = indexes[k];
|
||||||
pte_index = args[j] & ((1ul << 56) - 1);
|
pte_index = args[j] & ((1ul << 56) - 1);
|
||||||
hp = hptes[k];
|
hp = hptes[k];
|
||||||
rev = revs[k];
|
rev = revs[k];
|
||||||
remove_revmap_chain(kvm, pte_index, hp[0]);
|
remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
|
||||||
unlock_hpte(hp, 0);
|
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
||||||
|
args[j] |= rcbits << (56 - 5);
|
||||||
|
hp[0] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
pte_index &= ~3;
|
pte_index &= ~3;
|
||||||
n = 4;
|
n = 4;
|
||||||
}
|
}
|
||||||
if (flags & H_R_XLATE)
|
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
|
||||||
for (i = 0; i < n; ++i, ++pte_index) {
|
for (i = 0; i < n; ++i, ++pte_index) {
|
||||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||||
v = hpte[0] & ~HPTE_V_HVLOCK;
|
v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||||
|
@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
v &= ~HPTE_V_ABSENT;
|
v &= ~HPTE_V_ABSENT;
|
||||||
v |= HPTE_V_VALID;
|
v |= HPTE_V_VALID;
|
||||||
}
|
}
|
||||||
if (v & HPTE_V_VALID) {
|
if (v & HPTE_V_VALID)
|
||||||
if (rev)
|
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
|
||||||
r = rev[i].guest_rpte;
|
|
||||||
else
|
|
||||||
r = hpte[1] | HPTE_R_RPN;
|
|
||||||
}
|
|
||||||
vcpu->arch.gpr[4 + i * 2] = v;
|
vcpu->arch.gpr[4 + i * 2] = v;
|
||||||
vcpu->arch.gpr[5 + i * 2] = r;
|
vcpu->arch.gpr[5 + i * 2] = r;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue