2018-10-08 13:31:03 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Copyright IBM Corporation, 2018
|
|
|
|
* Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
|
|
|
|
* Paul Mackerras <paulus@ozlabs.org>
|
|
|
|
*
|
|
|
|
* Description: KVM functions specific to running nested KVM-HV guests
|
|
|
|
* on Book3S processors (specifically POWER9 and later).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
|
|
|
|
#include <asm/kvm_ppc.h>
|
2018-10-08 13:31:07 +08:00
|
|
|
#include <asm/kvm_book3s.h>
|
2018-10-08 13:31:03 +08:00
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/pgalloc.h>
|
2018-10-08 13:31:07 +08:00
|
|
|
#include <asm/pte-walk.h>
|
|
|
|
#include <asm/reg.h>
|
2018-10-08 13:31:03 +08:00
|
|
|
|
|
|
|
static struct patb_entry *pseries_partition_tb;
|
|
|
|
|
|
|
|
static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
|
|
|
|
|
2018-10-08 13:31:04 +08:00
|
|
|
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
|
|
|
{
|
|
|
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
|
|
|
|
|
|
|
hr->pcr = vc->pcr;
|
|
|
|
hr->dpdes = vc->dpdes;
|
|
|
|
hr->hfscr = vcpu->arch.hfscr;
|
|
|
|
hr->tb_offset = vc->tb_offset;
|
|
|
|
hr->dawr0 = vcpu->arch.dawr;
|
|
|
|
hr->dawrx0 = vcpu->arch.dawrx;
|
|
|
|
hr->ciabr = vcpu->arch.ciabr;
|
|
|
|
hr->purr = vcpu->arch.purr;
|
|
|
|
hr->spurr = vcpu->arch.spurr;
|
|
|
|
hr->ic = vcpu->arch.ic;
|
|
|
|
hr->vtb = vc->vtb;
|
|
|
|
hr->srr0 = vcpu->arch.shregs.srr0;
|
|
|
|
hr->srr1 = vcpu->arch.shregs.srr1;
|
|
|
|
hr->sprg[0] = vcpu->arch.shregs.sprg0;
|
|
|
|
hr->sprg[1] = vcpu->arch.shregs.sprg1;
|
|
|
|
hr->sprg[2] = vcpu->arch.shregs.sprg2;
|
|
|
|
hr->sprg[3] = vcpu->arch.shregs.sprg3;
|
|
|
|
hr->pidr = vcpu->arch.pid;
|
|
|
|
hr->cfar = vcpu->arch.cfar;
|
|
|
|
hr->ppr = vcpu->arch.ppr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
|
|
|
|
struct hv_guest_state *hr)
|
|
|
|
{
|
|
|
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
|
|
|
|
|
|
|
hr->dpdes = vc->dpdes;
|
|
|
|
hr->hfscr = vcpu->arch.hfscr;
|
|
|
|
hr->purr = vcpu->arch.purr;
|
|
|
|
hr->spurr = vcpu->arch.spurr;
|
|
|
|
hr->ic = vcpu->arch.ic;
|
|
|
|
hr->vtb = vc->vtb;
|
|
|
|
hr->srr0 = vcpu->arch.shregs.srr0;
|
|
|
|
hr->srr1 = vcpu->arch.shregs.srr1;
|
|
|
|
hr->sprg[0] = vcpu->arch.shregs.sprg0;
|
|
|
|
hr->sprg[1] = vcpu->arch.shregs.sprg1;
|
|
|
|
hr->sprg[2] = vcpu->arch.shregs.sprg2;
|
|
|
|
hr->sprg[3] = vcpu->arch.shregs.sprg3;
|
|
|
|
hr->pidr = vcpu->arch.pid;
|
|
|
|
hr->cfar = vcpu->arch.cfar;
|
|
|
|
hr->ppr = vcpu->arch.ppr;
|
|
|
|
switch (trap) {
|
|
|
|
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
|
|
|
|
hr->hdar = vcpu->arch.fault_dar;
|
|
|
|
hr->hdsisr = vcpu->arch.fault_dsisr;
|
|
|
|
hr->asdr = vcpu->arch.fault_gpa;
|
|
|
|
break;
|
|
|
|
case BOOK3S_INTERRUPT_H_INST_STORAGE:
|
|
|
|
hr->asdr = vcpu->arch.fault_gpa;
|
|
|
|
break;
|
|
|
|
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
|
|
|
|
hr->heir = vcpu->arch.emul_inst;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
|
|
|
{
|
|
|
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
|
|
|
|
|
|
|
vc->pcr = hr->pcr;
|
|
|
|
vc->dpdes = hr->dpdes;
|
|
|
|
vcpu->arch.hfscr = hr->hfscr;
|
|
|
|
vcpu->arch.dawr = hr->dawr0;
|
|
|
|
vcpu->arch.dawrx = hr->dawrx0;
|
|
|
|
vcpu->arch.ciabr = hr->ciabr;
|
|
|
|
vcpu->arch.purr = hr->purr;
|
|
|
|
vcpu->arch.spurr = hr->spurr;
|
|
|
|
vcpu->arch.ic = hr->ic;
|
|
|
|
vc->vtb = hr->vtb;
|
|
|
|
vcpu->arch.shregs.srr0 = hr->srr0;
|
|
|
|
vcpu->arch.shregs.srr1 = hr->srr1;
|
|
|
|
vcpu->arch.shregs.sprg0 = hr->sprg[0];
|
|
|
|
vcpu->arch.shregs.sprg1 = hr->sprg[1];
|
|
|
|
vcpu->arch.shregs.sprg2 = hr->sprg[2];
|
|
|
|
vcpu->arch.shregs.sprg3 = hr->sprg[3];
|
|
|
|
vcpu->arch.pid = hr->pidr;
|
|
|
|
vcpu->arch.cfar = hr->cfar;
|
|
|
|
vcpu->arch.ppr = hr->ppr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
|
|
|
|
struct hv_guest_state *hr)
|
|
|
|
{
|
|
|
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
|
|
|
|
|
|
|
vc->dpdes = hr->dpdes;
|
|
|
|
vcpu->arch.hfscr = hr->hfscr;
|
|
|
|
vcpu->arch.purr = hr->purr;
|
|
|
|
vcpu->arch.spurr = hr->spurr;
|
|
|
|
vcpu->arch.ic = hr->ic;
|
|
|
|
vc->vtb = hr->vtb;
|
|
|
|
vcpu->arch.fault_dar = hr->hdar;
|
|
|
|
vcpu->arch.fault_dsisr = hr->hdsisr;
|
|
|
|
vcpu->arch.fault_gpa = hr->asdr;
|
|
|
|
vcpu->arch.emul_inst = hr->heir;
|
|
|
|
vcpu->arch.shregs.srr0 = hr->srr0;
|
|
|
|
vcpu->arch.shregs.srr1 = hr->srr1;
|
|
|
|
vcpu->arch.shregs.sprg0 = hr->sprg[0];
|
|
|
|
vcpu->arch.shregs.sprg1 = hr->sprg[1];
|
|
|
|
vcpu->arch.shregs.sprg2 = hr->sprg[2];
|
|
|
|
vcpu->arch.shregs.sprg3 = hr->sprg[3];
|
|
|
|
vcpu->arch.pid = hr->pidr;
|
|
|
|
vcpu->arch.cfar = hr->cfar;
|
|
|
|
vcpu->arch.ppr = hr->ppr;
|
|
|
|
}
|
|
|
|
|
|
|
|
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
long int err, r;
|
|
|
|
struct kvm_nested_guest *l2;
|
|
|
|
struct pt_regs l2_regs, saved_l1_regs;
|
|
|
|
struct hv_guest_state l2_hv, saved_l1_hv;
|
|
|
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
|
|
|
u64 hv_ptr, regs_ptr;
|
|
|
|
u64 hdec_exp;
|
|
|
|
s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
|
|
|
|
u64 mask;
|
|
|
|
unsigned long lpcr;
|
|
|
|
|
|
|
|
if (vcpu->kvm->arch.l1_ptcr == 0)
|
|
|
|
return H_NOT_AVAILABLE;
|
|
|
|
|
|
|
|
/* copy parameters in */
|
|
|
|
hv_ptr = kvmppc_get_gpr(vcpu, 4);
|
|
|
|
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
|
|
|
|
sizeof(struct hv_guest_state));
|
|
|
|
if (err)
|
|
|
|
return H_PARAMETER;
|
|
|
|
if (l2_hv.version != HV_GUEST_STATE_VERSION)
|
|
|
|
return H_P2;
|
|
|
|
|
|
|
|
regs_ptr = kvmppc_get_gpr(vcpu, 5);
|
|
|
|
err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
|
|
|
|
sizeof(struct pt_regs));
|
|
|
|
if (err)
|
|
|
|
return H_PARAMETER;
|
|
|
|
|
|
|
|
/* translate lpid */
|
|
|
|
l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
|
|
|
|
if (!l2)
|
|
|
|
return H_PARAMETER;
|
|
|
|
if (!l2->l1_gr_to_hr) {
|
|
|
|
mutex_lock(&l2->tlb_lock);
|
|
|
|
kvmhv_update_ptbl_cache(l2);
|
|
|
|
mutex_unlock(&l2->tlb_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* save l1 values of things */
|
|
|
|
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
|
|
|
|
saved_l1_regs = vcpu->arch.regs;
|
|
|
|
kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
|
|
|
|
|
|
|
|
/* convert TB values/offsets to host (L0) values */
|
|
|
|
hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
|
|
|
|
vc->tb_offset += l2_hv.tb_offset;
|
|
|
|
|
|
|
|
/* set L1 state to L2 state */
|
|
|
|
vcpu->arch.nested = l2;
|
|
|
|
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
|
|
|
|
vcpu->arch.regs = l2_regs;
|
|
|
|
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
|
|
|
|
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
|
|
|
|
LPCR_LPES | LPCR_MER;
|
|
|
|
lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
|
|
|
|
restore_hv_regs(vcpu, &l2_hv);
|
|
|
|
|
|
|
|
vcpu->arch.ret = RESUME_GUEST;
|
|
|
|
vcpu->arch.trap = 0;
|
|
|
|
do {
|
|
|
|
if (mftb() >= hdec_exp) {
|
|
|
|
vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
|
|
|
|
r = RESUME_HOST;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
|
|
|
|
lpcr);
|
|
|
|
} while (is_kvmppc_resume_guest(r));
|
|
|
|
|
|
|
|
/* save L2 state for return */
|
|
|
|
l2_regs = vcpu->arch.regs;
|
|
|
|
l2_regs.msr = vcpu->arch.shregs.msr;
|
|
|
|
delta_purr = vcpu->arch.purr - l2_hv.purr;
|
|
|
|
delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
|
|
|
|
delta_ic = vcpu->arch.ic - l2_hv.ic;
|
|
|
|
delta_vtb = vc->vtb - l2_hv.vtb;
|
|
|
|
save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
|
|
|
|
|
|
|
|
/* restore L1 state */
|
|
|
|
vcpu->arch.nested = NULL;
|
|
|
|
vcpu->arch.regs = saved_l1_regs;
|
|
|
|
vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
|
|
|
|
/* set L1 MSR TS field according to L2 transaction state */
|
|
|
|
if (l2_regs.msr & MSR_TS_MASK)
|
|
|
|
vcpu->arch.shregs.msr |= MSR_TS_S;
|
|
|
|
vc->tb_offset = saved_l1_hv.tb_offset;
|
|
|
|
restore_hv_regs(vcpu, &saved_l1_hv);
|
|
|
|
vcpu->arch.purr += delta_purr;
|
|
|
|
vcpu->arch.spurr += delta_spurr;
|
|
|
|
vcpu->arch.ic += delta_ic;
|
|
|
|
vc->vtb += delta_vtb;
|
|
|
|
|
|
|
|
kvmhv_put_nested(l2);
|
|
|
|
|
|
|
|
/* copy l2_hv_state and regs back to guest */
|
|
|
|
err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
|
|
|
|
sizeof(struct hv_guest_state));
|
|
|
|
if (err)
|
|
|
|
return H_AUTHORITY;
|
|
|
|
err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
|
|
|
|
sizeof(struct pt_regs));
|
|
|
|
if (err)
|
|
|
|
return H_AUTHORITY;
|
|
|
|
|
|
|
|
if (r == -EINTR)
|
|
|
|
return H_INTERRUPT;
|
|
|
|
|
|
|
|
return vcpu->arch.trap;
|
|
|
|
}
|
|
|
|
|
2018-10-08 13:31:03 +08:00
|
|
|
long kvmhv_nested_init(void)
|
|
|
|
{
|
|
|
|
long int ptb_order;
|
|
|
|
unsigned long ptcr;
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
if (!kvmhv_on_pseries())
|
|
|
|
return 0;
|
|
|
|
if (!radix_enabled())
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
/* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
|
|
|
|
ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
|
|
|
|
if (ptb_order < 8)
|
|
|
|
ptb_order = 8;
|
|
|
|
pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!pseries_partition_tb) {
|
|
|
|
pr_err("kvm-hv: failed to allocated nested partition table\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
|
|
|
|
rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
|
|
|
|
if (rc != H_SUCCESS) {
|
|
|
|
pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
|
|
|
|
rc);
|
|
|
|
kfree(pseries_partition_tb);
|
|
|
|
pseries_partition_tb = NULL;
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmhv_nested_exit(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* N.B. the kvmhv_on_pseries() test is there because it enables
|
|
|
|
* the compiler to remove the call to plpar_hcall_norets()
|
|
|
|
* when CONFIG_PPC_PSERIES=n.
|
|
|
|
*/
|
|
|
|
if (kvmhv_on_pseries() && pseries_partition_tb) {
|
|
|
|
plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
|
|
|
|
kfree(pseries_partition_tb);
|
|
|
|
pseries_partition_tb = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
|
|
|
|
{
|
|
|
|
if (cpu_has_feature(CPU_FTR_HVMODE)) {
|
|
|
|
mmu_partition_table_set_entry(lpid, dw0, dw1);
|
|
|
|
} else {
|
|
|
|
pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
|
|
|
|
pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
|
|
|
|
{
|
|
|
|
unsigned long dw0;
|
|
|
|
|
|
|
|
dw0 = PATB_HR | radix__get_tree_size() |
|
|
|
|
__pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
|
|
|
|
kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmhv_vm_nested_init(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
kvm->arch.max_nested_lpid = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle the H_SET_PARTITION_TABLE hcall.
|
|
|
|
* r4 = guest real address of partition table + log_2(size) - 12
|
|
|
|
* (formatted as for the PTCR).
|
|
|
|
*/
|
|
|
|
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
|
|
|
|
int srcu_idx;
|
|
|
|
long ret = H_SUCCESS;
|
|
|
|
|
|
|
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
/*
|
|
|
|
* Limit the partition table to 4096 entries (because that's what
|
|
|
|
* hardware supports), and check the base address.
|
|
|
|
*/
|
|
|
|
if ((ptcr & PRTS_MASK) > 12 - 8 ||
|
|
|
|
!kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
|
|
|
|
ret = H_PARAMETER;
|
|
|
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
|
|
|
if (ret == H_SUCCESS)
|
|
|
|
kvm->arch.l1_ptcr = ptcr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reload the partition table entry for a guest.
|
|
|
|
* Caller must hold gp->tlb_lock.
|
|
|
|
*/
|
|
|
|
static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct patb_entry ptbl_entry;
|
|
|
|
unsigned long ptbl_addr;
|
|
|
|
struct kvm *kvm = gp->l1_host;
|
|
|
|
|
|
|
|
ret = -EFAULT;
|
|
|
|
ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
|
|
|
|
if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
|
|
|
|
ret = kvm_read_guest(kvm, ptbl_addr,
|
|
|
|
&ptbl_entry, sizeof(ptbl_entry));
|
|
|
|
if (ret) {
|
|
|
|
gp->l1_gr_to_hr = 0;
|
|
|
|
gp->process_table = 0;
|
|
|
|
} else {
|
|
|
|
gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
|
|
|
|
gp->process_table = be64_to_cpu(ptbl_entry.patb1);
|
|
|
|
}
|
|
|
|
kvmhv_set_nested_ptbl(gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
|
|
|
|
{
|
|
|
|
struct kvm_nested_guest *gp;
|
|
|
|
long shadow_lpid;
|
|
|
|
|
|
|
|
gp = kzalloc(sizeof(*gp), GFP_KERNEL);
|
|
|
|
if (!gp)
|
|
|
|
return NULL;
|
|
|
|
gp->l1_host = kvm;
|
|
|
|
gp->l1_lpid = lpid;
|
|
|
|
mutex_init(&gp->tlb_lock);
|
|
|
|
gp->shadow_pgtable = pgd_alloc(kvm->mm);
|
|
|
|
if (!gp->shadow_pgtable)
|
|
|
|
goto out_free;
|
|
|
|
shadow_lpid = kvmppc_alloc_lpid();
|
|
|
|
if (shadow_lpid < 0)
|
|
|
|
goto out_free2;
|
|
|
|
gp->shadow_lpid = shadow_lpid;
|
|
|
|
|
|
|
|
return gp;
|
|
|
|
|
|
|
|
out_free2:
|
|
|
|
pgd_free(kvm->mm, gp->shadow_pgtable);
|
|
|
|
out_free:
|
|
|
|
kfree(gp);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free up any resources allocated for a nested guest.
|
|
|
|
*/
|
|
|
|
static void kvmhv_release_nested(struct kvm_nested_guest *gp)
|
|
|
|
{
|
2018-10-08 13:31:07 +08:00
|
|
|
struct kvm *kvm = gp->l1_host;
|
|
|
|
|
|
|
|
if (gp->shadow_pgtable) {
|
|
|
|
/*
|
|
|
|
* No vcpu is using this struct and no call to
|
|
|
|
* kvmhv_get_nested can find this struct,
|
|
|
|
* so we don't need to hold kvm->mmu_lock.
|
|
|
|
*/
|
|
|
|
kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
|
|
|
|
gp->shadow_lpid);
|
|
|
|
pgd_free(kvm->mm, gp->shadow_pgtable);
|
|
|
|
}
|
2018-10-08 13:31:03 +08:00
|
|
|
kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
|
|
|
|
kvmppc_free_lpid(gp->shadow_lpid);
|
|
|
|
kfree(gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = gp->l1_host;
|
|
|
|
int lpid = gp->l1_lpid;
|
|
|
|
long ref;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
if (gp == kvm->arch.nested_guests[lpid]) {
|
|
|
|
kvm->arch.nested_guests[lpid] = NULL;
|
|
|
|
if (lpid == kvm->arch.max_nested_lpid) {
|
|
|
|
while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
|
|
|
|
;
|
|
|
|
kvm->arch.max_nested_lpid = lpid;
|
|
|
|
}
|
|
|
|
--gp->refcnt;
|
|
|
|
}
|
|
|
|
ref = gp->refcnt;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
if (ref == 0)
|
|
|
|
kvmhv_release_nested(gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free up all nested resources allocated for this guest.
|
|
|
|
* This is called with no vcpus of the guest running, when
|
|
|
|
* switching the guest to HPT mode or when destroying the
|
|
|
|
* guest.
|
|
|
|
*/
|
|
|
|
void kvmhv_release_all_nested(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct kvm_nested_guest *gp;
|
|
|
|
struct kvm_nested_guest *freelist = NULL;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
|
|
|
|
gp = kvm->arch.nested_guests[i];
|
|
|
|
if (!gp)
|
|
|
|
continue;
|
|
|
|
kvm->arch.nested_guests[i] = NULL;
|
|
|
|
if (--gp->refcnt == 0) {
|
|
|
|
gp->next = freelist;
|
|
|
|
freelist = gp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
kvm->arch.max_nested_lpid = -1;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
while ((gp = freelist) != NULL) {
|
|
|
|
freelist = gp->next;
|
|
|
|
kvmhv_release_nested(gp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* caller must hold gp->tlb_lock */
|
|
|
|
void kvmhv_flush_nested(struct kvm_nested_guest *gp)
|
|
|
|
{
|
2018-10-08 13:31:07 +08:00
|
|
|
struct kvm *kvm = gp->l1_host;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
radix__flush_tlb_lpid(gp->shadow_lpid);
|
2018-10-08 13:31:03 +08:00
|
|
|
kvmhv_update_ptbl_cache(gp);
|
|
|
|
if (gp->l1_gr_to_hr == 0)
|
|
|
|
kvmhv_remove_nested(gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
|
|
|
|
bool create)
|
|
|
|
{
|
|
|
|
struct kvm_nested_guest *gp, *newgp;
|
|
|
|
|
|
|
|
if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
|
|
|
|
l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
gp = kvm->arch.nested_guests[l1_lpid];
|
|
|
|
if (gp)
|
|
|
|
++gp->refcnt;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
|
|
|
|
if (gp || !create)
|
|
|
|
return gp;
|
|
|
|
|
|
|
|
newgp = kvmhv_alloc_nested(kvm, l1_lpid);
|
|
|
|
if (!newgp)
|
|
|
|
return NULL;
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
if (kvm->arch.nested_guests[l1_lpid]) {
|
|
|
|
/* someone else beat us to it */
|
|
|
|
gp = kvm->arch.nested_guests[l1_lpid];
|
|
|
|
} else {
|
|
|
|
kvm->arch.nested_guests[l1_lpid] = newgp;
|
|
|
|
++newgp->refcnt;
|
|
|
|
gp = newgp;
|
|
|
|
newgp = NULL;
|
|
|
|
if (l1_lpid > kvm->arch.max_nested_lpid)
|
|
|
|
kvm->arch.max_nested_lpid = l1_lpid;
|
|
|
|
}
|
|
|
|
++gp->refcnt;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
|
|
|
|
if (newgp)
|
|
|
|
kvmhv_release_nested(newgp);
|
|
|
|
|
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvmhv_put_nested(struct kvm_nested_guest *gp)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = gp->l1_host;
|
|
|
|
long ref;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
ref = --gp->refcnt;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
if (ref == 0)
|
|
|
|
kvmhv_release_nested(gp);
|
|
|
|
}
|
2018-10-08 13:31:04 +08:00
|
|
|
|
2018-10-08 13:31:07 +08:00
|
|
|
static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_nested_guest *gp,
|
|
|
|
long gpa, int *shift_ret)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
bool ret = false;
|
|
|
|
pte_t *ptep;
|
|
|
|
int shift;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
|
|
|
|
if (!shift)
|
|
|
|
shift = PAGE_SHIFT;
|
|
|
|
if (ptep && pte_present(*ptep)) {
|
|
|
|
kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
|
|
|
|
ret = true;
|
|
|
|
}
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
|
|
|
|
if (shift_ret)
|
|
|
|
*shift_ret = shift;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Used to convert a nested guest real address to a L1 guest real address */
|
|
|
|
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_nested_guest *gp,
|
|
|
|
unsigned long n_gpa, unsigned long dsisr,
|
|
|
|
struct kvmppc_pte *gpte_p)
|
2018-10-08 13:31:04 +08:00
|
|
|
{
|
2018-10-08 13:31:07 +08:00
|
|
|
u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
|
|
|
|
&fault_addr);
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
/* We didn't find a pte */
|
|
|
|
if (ret == -EINVAL) {
|
|
|
|
/* Unsupported mmu config */
|
|
|
|
flags |= DSISR_UNSUPP_MMU;
|
|
|
|
} else if (ret == -ENOENT) {
|
|
|
|
/* No translation found */
|
|
|
|
flags |= DSISR_NOHPTE;
|
|
|
|
} else if (ret == -EFAULT) {
|
|
|
|
/* Couldn't access L1 real address */
|
|
|
|
flags |= DSISR_PRTABLE_FAULT;
|
|
|
|
vcpu->arch.fault_gpa = fault_addr;
|
|
|
|
} else {
|
|
|
|
/* Unknown error */
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
goto forward_to_l1;
|
|
|
|
} else {
|
|
|
|
/* We found a pte -> check permissions */
|
|
|
|
if (dsisr & DSISR_ISSTORE) {
|
|
|
|
/* Can we write? */
|
|
|
|
if (!gpte_p->may_write) {
|
|
|
|
flags |= DSISR_PROTFAULT;
|
|
|
|
goto forward_to_l1;
|
|
|
|
}
|
|
|
|
} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
|
|
|
|
/* Can we execute? */
|
|
|
|
if (!gpte_p->may_execute) {
|
|
|
|
flags |= SRR1_ISI_N_OR_G;
|
|
|
|
goto forward_to_l1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Can we read? */
|
|
|
|
if (!gpte_p->may_read && !gpte_p->may_write) {
|
|
|
|
flags |= DSISR_PROTFAULT;
|
|
|
|
goto forward_to_l1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
forward_to_l1:
|
|
|
|
vcpu->arch.fault_dsisr = flags;
|
|
|
|
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
|
|
|
|
vcpu->arch.shregs.msr &= ~0x783f0000ul;
|
|
|
|
vcpu->arch.shregs.msr |= flags;
|
|
|
|
}
|
2018-10-08 13:31:04 +08:00
|
|
|
return RESUME_HOST;
|
|
|
|
}
|
2018-10-08 13:31:07 +08:00
|
|
|
|
|
|
|
static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_nested_guest *gp,
|
|
|
|
unsigned long n_gpa,
|
|
|
|
struct kvmppc_pte gpte,
|
|
|
|
unsigned long dsisr)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
bool writing = !!(dsisr & DSISR_ISSTORE);
|
|
|
|
u64 pgflags;
|
|
|
|
bool ret;
|
|
|
|
|
|
|
|
/* Are the rc bits set in the L1 partition scoped pte? */
|
|
|
|
pgflags = _PAGE_ACCESSED;
|
|
|
|
if (writing)
|
|
|
|
pgflags |= _PAGE_DIRTY;
|
|
|
|
if (pgflags & ~gpte.rc)
|
|
|
|
return RESUME_HOST;
|
|
|
|
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
|
|
|
|
ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
|
|
|
|
gpte.raddr, kvm->arch.lpid);
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
if (!ret)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
|
|
|
|
ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
|
|
|
|
gp->shadow_lpid);
|
|
|
|
if (!ret)
|
|
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int kvmppc_radix_level_to_shift(int level)
|
|
|
|
{
|
|
|
|
switch (level) {
|
|
|
|
case 2:
|
|
|
|
return PUD_SHIFT;
|
|
|
|
case 1:
|
|
|
|
return PMD_SHIFT;
|
|
|
|
default:
|
|
|
|
return PAGE_SHIFT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int kvmppc_radix_shift_to_level(int shift)
|
|
|
|
{
|
|
|
|
if (shift == PUD_SHIFT)
|
|
|
|
return 2;
|
|
|
|
if (shift == PMD_SHIFT)
|
|
|
|
return 1;
|
|
|
|
if (shift == PAGE_SHIFT)
|
|
|
|
return 0;
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* called with gp->tlb_lock held */
|
|
|
|
static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_nested_guest *gp)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
struct kvm_memory_slot *memslot;
|
|
|
|
struct kvmppc_pte gpte;
|
|
|
|
pte_t pte, *pte_p;
|
|
|
|
unsigned long mmu_seq;
|
|
|
|
unsigned long dsisr = vcpu->arch.fault_dsisr;
|
|
|
|
unsigned long ea = vcpu->arch.fault_dar;
|
|
|
|
unsigned long n_gpa, gpa, gfn, perm = 0UL;
|
|
|
|
unsigned int shift, l1_shift, level;
|
|
|
|
bool writing = !!(dsisr & DSISR_ISSTORE);
|
|
|
|
bool kvm_ro = false;
|
|
|
|
long int ret;
|
|
|
|
|
|
|
|
if (!gp->l1_gr_to_hr) {
|
|
|
|
kvmhv_update_ptbl_cache(gp);
|
|
|
|
if (!gp->l1_gr_to_hr)
|
|
|
|
return RESUME_HOST;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert the nested guest real address into a L1 guest real address */
|
|
|
|
|
|
|
|
n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
|
|
|
|
if (!(dsisr & DSISR_PRTABLE_FAULT))
|
|
|
|
n_gpa |= ea & 0xFFF;
|
|
|
|
ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the hardware found a translation but we don't now have a usable
|
|
|
|
* translation in the l1 partition-scoped tree, remove the shadow pte
|
|
|
|
* and let the guest retry.
|
|
|
|
*/
|
|
|
|
if (ret == RESUME_HOST &&
|
|
|
|
(dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
|
|
|
|
DSISR_BAD_COPYPASTE)))
|
|
|
|
goto inval;
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* Failed to set the reference/change bits */
|
|
|
|
if (dsisr & DSISR_SET_RC) {
|
|
|
|
ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
|
|
|
|
if (ret == RESUME_HOST)
|
|
|
|
return ret;
|
|
|
|
if (ret)
|
|
|
|
goto inval;
|
|
|
|
dsisr &= ~DSISR_SET_RC;
|
|
|
|
if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
|
|
|
|
DSISR_PROTFAULT)))
|
|
|
|
return RESUME_GUEST;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We took an HISI or HDSI while we were running a nested guest which
|
|
|
|
* means we have no partition scoped translation for that. This means
|
|
|
|
* we need to insert a pte for the mapping into our shadow_pgtable.
|
|
|
|
*/
|
|
|
|
|
|
|
|
l1_shift = gpte.page_shift;
|
|
|
|
if (l1_shift < PAGE_SHIFT) {
|
|
|
|
/* We don't support l1 using a page size smaller than our own */
|
|
|
|
pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
|
|
|
|
l1_shift, PAGE_SHIFT);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
gpa = gpte.raddr;
|
|
|
|
gfn = gpa >> PAGE_SHIFT;
|
|
|
|
|
|
|
|
/* 1. Get the corresponding host memslot */
|
|
|
|
|
|
|
|
memslot = gfn_to_memslot(kvm, gfn);
|
|
|
|
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
|
|
|
|
if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
|
|
|
|
/* unusual error -> reflect to the guest as a DSI */
|
|
|
|
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
|
|
|
return RESUME_GUEST;
|
|
|
|
}
|
|
|
|
/* passthrough of emulated MMIO case... */
|
|
|
|
pr_err("emulated MMIO passthrough?\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (memslot->flags & KVM_MEM_READONLY) {
|
|
|
|
if (writing) {
|
|
|
|
/* Give the guest a DSI */
|
|
|
|
kvmppc_core_queue_data_storage(vcpu, ea,
|
|
|
|
DSISR_ISSTORE | DSISR_PROTFAULT);
|
|
|
|
return RESUME_GUEST;
|
|
|
|
}
|
|
|
|
kvm_ro = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 2. Find the host pte for this L1 guest real address */
|
|
|
|
|
|
|
|
/* Used to check for invalidations in progress */
|
|
|
|
mmu_seq = kvm->mmu_notifier_seq;
|
|
|
|
smp_rmb();
|
|
|
|
|
|
|
|
/* See if can find translation in our partition scoped tables for L1 */
|
|
|
|
pte = __pte(0);
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
|
|
|
if (!shift)
|
|
|
|
shift = PAGE_SHIFT;
|
|
|
|
if (pte_p)
|
|
|
|
pte = *pte_p;
|
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
|
|
|
|
if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
|
|
|
|
/* No suitable pte found -> try to insert a mapping */
|
|
|
|
ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
|
|
|
|
writing, kvm_ro, &pte, &level);
|
|
|
|
if (ret == -EAGAIN)
|
|
|
|
return RESUME_GUEST;
|
|
|
|
else if (ret)
|
|
|
|
return ret;
|
|
|
|
shift = kvmppc_radix_level_to_shift(level);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
|
|
|
|
|
|
|
|
/* The permissions is the combination of the host and l1 guest ptes */
|
|
|
|
perm |= gpte.may_read ? 0UL : _PAGE_READ;
|
|
|
|
perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
|
|
|
|
perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
|
|
|
|
pte = __pte(pte_val(pte) & ~perm);
|
|
|
|
|
|
|
|
/* What size pte can we insert? */
|
|
|
|
if (shift > l1_shift) {
|
|
|
|
u64 mask;
|
|
|
|
unsigned int actual_shift = PAGE_SHIFT;
|
|
|
|
if (PMD_SHIFT < l1_shift)
|
|
|
|
actual_shift = PMD_SHIFT;
|
|
|
|
mask = (1UL << shift) - (1UL << actual_shift);
|
|
|
|
pte = __pte(pte_val(pte) | (gpa & mask));
|
|
|
|
shift = actual_shift;
|
|
|
|
}
|
|
|
|
level = kvmppc_radix_shift_to_level(shift);
|
|
|
|
n_gpa &= ~((1UL << shift) - 1);
|
|
|
|
|
|
|
|
/* 4. Insert the pte into our shadow_pgtable */
|
|
|
|
|
|
|
|
ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
|
|
|
|
mmu_seq, gp->shadow_lpid);
|
|
|
|
if (ret == -EAGAIN)
|
|
|
|
ret = RESUME_GUEST; /* Let the guest try again */
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
inval:
|
|
|
|
kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
|
|
|
|
return RESUME_GUEST;
|
|
|
|
}
|
|
|
|
|
|
|
|
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm_nested_guest *gp = vcpu->arch.nested;
|
|
|
|
long int ret;
|
|
|
|
|
|
|
|
mutex_lock(&gp->tlb_lock);
|
|
|
|
ret = __kvmhv_nested_page_fault(vcpu, gp);
|
|
|
|
mutex_unlock(&gp->tlb_lock);
|
|
|
|
return ret;
|
|
|
|
}
|