KVM: PPC: Book3S HV: Keep HPTE locked when invalidating
This reworks the implementations of the H_REMOVE and H_BULK_REMOVE hcalls to make sure that we keep the HPTE locked and in the reverse- mapping chain until we have finished invalidating it. Previously we would remove it from the chain and unlock it before invalidating it, leaving a tiny window when the guest could access the page even though we believe we have removed it from the guest (e.g., kvm_unmap_hva() has been called for the page and has found no HPTEs in the chain). In addition, we'll need this for future patches where we will need to read the R and C bits in the HPTE after invalidating it. Doing this required restructuring kvmppc_h_bulk_remove() substantially. Since we want to batch up the tlbies, we now need to keep several HPTEs locked simultaneously. In order to avoid possible deadlocks, we don't spin on the HPTE bitlock for any except the first HPTE in a batch. If we can't acquire the HPTE bitlock for the second or subsequent HPTE, we terminate the batch at that point, do the tlbies that we have accumulated so far, unlock those HPTEs, and then start a new batch to do the remaining invalidations. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
parent
b5434032fc
commit
a92bce95f0
|
@ -140,6 +140,12 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
|
||||||
return kvmppc_read_update_linux_pte(ptep, writing);
|
return kvmppc_read_update_linux_pte(ptep, writing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
|
||||||
|
{
|
||||||
|
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
|
||||||
|
hpte[0] = hpte_v;
|
||||||
|
}
|
||||||
|
|
||||||
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||||
{
|
{
|
||||||
|
@ -356,6 +362,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
struct kvm *kvm = vcpu->kvm;
|
struct kvm *kvm = vcpu->kvm;
|
||||||
unsigned long *hpte;
|
unsigned long *hpte;
|
||||||
unsigned long v, r, rb;
|
unsigned long v, r, rb;
|
||||||
|
struct revmap_entry *rev;
|
||||||
|
|
||||||
if (pte_index >= HPT_NPTE)
|
if (pte_index >= HPT_NPTE)
|
||||||
return H_PARAMETER;
|
return H_PARAMETER;
|
||||||
|
@ -368,30 +375,32 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
hpte[0] &= ~HPTE_V_HVLOCK;
|
hpte[0] &= ~HPTE_V_HVLOCK;
|
||||||
return H_NOT_FOUND;
|
return H_NOT_FOUND;
|
||||||
}
|
}
|
||||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
|
||||||
flags |= H_LOCAL;
|
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||||
vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
|
v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||||
vcpu->arch.gpr[5] = r = hpte[1];
|
if (v & HPTE_V_VALID) {
|
||||||
rb = compute_tlbie_rb(v, r, pte_index);
|
hpte[0] &= ~HPTE_V_VALID;
|
||||||
if (v & HPTE_V_VALID)
|
rb = compute_tlbie_rb(v, hpte[1], pte_index);
|
||||||
|
if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
|
||||||
|
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||||
|
cpu_relax();
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||||
|
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
kvm->arch.tlbie_lock = 0;
|
||||||
|
} else {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
asm volatile("tlbiel %0" : : "r" (rb));
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
}
|
||||||
remove_revmap_chain(kvm, pte_index, v);
|
remove_revmap_chain(kvm, pte_index, v);
|
||||||
smp_wmb();
|
|
||||||
hpte[0] = 0;
|
|
||||||
if (!(v & HPTE_V_VALID))
|
|
||||||
return H_SUCCESS;
|
|
||||||
if (!(flags & H_LOCAL)) {
|
|
||||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
|
||||||
cpu_relax();
|
|
||||||
asm volatile("ptesync" : : : "memory");
|
|
||||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
|
||||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
|
||||||
asm volatile("ptesync" : : : "memory");
|
|
||||||
kvm->arch.tlbie_lock = 0;
|
|
||||||
} else {
|
|
||||||
asm volatile("ptesync" : : : "memory");
|
|
||||||
asm volatile("tlbiel %0" : : "r" (rb));
|
|
||||||
asm volatile("ptesync" : : : "memory");
|
|
||||||
}
|
}
|
||||||
|
r = rev->guest_rpte;
|
||||||
|
unlock_hpte(hpte, 0);
|
||||||
|
|
||||||
|
vcpu->arch.gpr[4] = v;
|
||||||
|
vcpu->arch.gpr[5] = r;
|
||||||
return H_SUCCESS;
|
return H_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,82 +408,113 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm *kvm = vcpu->kvm;
|
struct kvm *kvm = vcpu->kvm;
|
||||||
unsigned long *args = &vcpu->arch.gpr[4];
|
unsigned long *args = &vcpu->arch.gpr[4];
|
||||||
unsigned long *hp, tlbrb[4];
|
unsigned long *hp, *hptes[4], tlbrb[4];
|
||||||
long int i, found;
|
long int i, j, k, n, found, indexes[4];
|
||||||
long int n_inval = 0;
|
unsigned long flags, req, pte_index, rcbits;
|
||||||
unsigned long flags, req, pte_index;
|
|
||||||
long int local = 0;
|
long int local = 0;
|
||||||
long int ret = H_SUCCESS;
|
long int ret = H_SUCCESS;
|
||||||
|
struct revmap_entry *rev, *revs[4];
|
||||||
|
|
||||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
if (atomic_read(&kvm->online_vcpus) == 1)
|
||||||
local = 1;
|
local = 1;
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
|
||||||
pte_index = args[i * 2];
|
n = 0;
|
||||||
flags = pte_index >> 56;
|
for (; i < 4; ++i) {
|
||||||
pte_index &= ((1ul << 56) - 1);
|
j = i * 2;
|
||||||
req = flags >> 6;
|
pte_index = args[j];
|
||||||
flags &= 3;
|
flags = pte_index >> 56;
|
||||||
if (req == 3)
|
pte_index &= ((1ul << 56) - 1);
|
||||||
break;
|
req = flags >> 6;
|
||||||
if (req != 1 || flags == 3 ||
|
flags &= 3;
|
||||||
pte_index >= HPT_NPTE) {
|
if (req == 3) { /* no more requests */
|
||||||
/* parameter error */
|
i = 4;
|
||||||
args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
|
|
||||||
ret = H_PARAMETER;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
|
||||||
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
|
|
||||||
cpu_relax();
|
|
||||||
found = 0;
|
|
||||||
if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
|
|
||||||
switch (flags & 3) {
|
|
||||||
case 0: /* absolute */
|
|
||||||
found = 1;
|
|
||||||
break;
|
|
||||||
case 1: /* andcond */
|
|
||||||
if (!(hp[0] & args[i * 2 + 1]))
|
|
||||||
found = 1;
|
|
||||||
break;
|
|
||||||
case 2: /* AVPN */
|
|
||||||
if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
|
|
||||||
found = 1;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
|
||||||
if (!found) {
|
/* parameter error */
|
||||||
hp[0] &= ~HPTE_V_HVLOCK;
|
args[j] = ((0xa0 | flags) << 56) + pte_index;
|
||||||
args[i * 2] = ((0x90 | flags) << 56) + pte_index;
|
ret = H_PARAMETER;
|
||||||
continue;
|
break;
|
||||||
}
|
}
|
||||||
/* insert R and C bits from PTE */
|
hp = (unsigned long *)
|
||||||
flags |= (hp[1] >> 5) & 0x0c;
|
(kvm->arch.hpt_virt + (pte_index << 4));
|
||||||
args[i * 2] = ((0x80 | flags) << 56) + pte_index;
|
/* to avoid deadlock, don't spin except for first */
|
||||||
if (hp[0] & HPTE_V_VALID) {
|
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
|
||||||
tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
if (n)
|
||||||
remove_revmap_chain(kvm, pte_index, hp[0]);
|
break;
|
||||||
}
|
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
|
||||||
smp_wmb();
|
cpu_relax();
|
||||||
hp[0] = 0;
|
}
|
||||||
}
|
found = 0;
|
||||||
if (n_inval == 0)
|
if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
|
||||||
return ret;
|
switch (flags & 3) {
|
||||||
|
case 0: /* absolute */
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
case 1: /* andcond */
|
||||||
|
if (!(hp[0] & args[j + 1]))
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
case 2: /* AVPN */
|
||||||
|
if ((hp[0] & ~0x7fUL) == args[j + 1])
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
hp[0] &= ~HPTE_V_HVLOCK;
|
||||||
|
args[j] = ((0x90 | flags) << 56) + pte_index;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!local) {
|
args[j] = ((0x80 | flags) << 56) + pte_index;
|
||||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||||
cpu_relax();
|
/* insert R and C bits from guest PTE */
|
||||||
asm volatile("ptesync" : : : "memory");
|
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
||||||
for (i = 0; i < n_inval; ++i)
|
args[j] |= rcbits << (56 - 5);
|
||||||
asm volatile(PPC_TLBIE(%1,%0)
|
|
||||||
: : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
|
if (!(hp[0] & HPTE_V_VALID))
|
||||||
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
continue;
|
||||||
kvm->arch.tlbie_lock = 0;
|
|
||||||
} else {
|
hp[0] &= ~HPTE_V_VALID; /* leave it locked */
|
||||||
asm volatile("ptesync" : : : "memory");
|
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
||||||
for (i = 0; i < n_inval; ++i)
|
indexes[n] = j;
|
||||||
asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
|
hptes[n] = hp;
|
||||||
asm volatile("ptesync" : : : "memory");
|
revs[n] = rev;
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!n)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Now that we've collected a batch, do the tlbies */
|
||||||
|
if (!local) {
|
||||||
|
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||||
|
cpu_relax();
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
for (k = 0; k < n; ++k)
|
||||||
|
asm volatile(PPC_TLBIE(%1,%0) : :
|
||||||
|
"r" (tlbrb[k]),
|
||||||
|
"r" (kvm->arch.lpid));
|
||||||
|
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||||
|
kvm->arch.tlbie_lock = 0;
|
||||||
|
} else {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
for (k = 0; k < n; ++k)
|
||||||
|
asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = 0; k < n; ++k) {
|
||||||
|
j = indexes[k];
|
||||||
|
pte_index = args[j] & ((1ul << 56) - 1);
|
||||||
|
hp = hptes[k];
|
||||||
|
rev = revs[k];
|
||||||
|
remove_revmap_chain(kvm, pte_index, hp[0]);
|
||||||
|
unlock_hpte(hp, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -720,9 +760,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
||||||
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
|
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
|
||||||
gr = rev->guest_rpte;
|
gr = rev->guest_rpte;
|
||||||
|
|
||||||
/* Unlock the HPTE */
|
unlock_hpte(hpte, v);
|
||||||
asm volatile("lwsync" : : : "memory");
|
|
||||||
hpte[0] = v;
|
|
||||||
|
|
||||||
/* For not found, if the HPTE is valid by now, retry the instruction */
|
/* For not found, if the HPTE is valid by now, retry the instruction */
|
||||||
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
|
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
|
||||||
|
|
Loading…
Reference in New Issue