KVM: PPC: Book3S HV: Handle page fault for a nested guest
Consider a normal (L1) guest running under the main hypervisor (L0), and then a nested guest (L2) running under the L1 guest which is acting as a nested hypervisor. L0 has page tables to map the address space for L1 providing the translation from L1 real address -> L0 real address; L1 | | (L1 -> L0) | ----> L0 There are also page tables in L1 used to map the address space for L2 providing the translation from L2 real address -> L1 read address. Since the hardware can only walk a single level of page table, we need to maintain in L0 a "shadow_pgtable" for L2 which provides the translation from L2 real address -> L0 real address. Which looks like; L2 L2 | | | (L2 -> L1) | | | ----> L1 | (L2 -> L0) | | | (L1 -> L0) | | | ----> L0 --------> L0 When a page fault occurs while running a nested (L2) guest we need to insert a pte into this "shadow_pgtable" for the L2 -> L0 mapping. To do this we need to: 1. Walk the pgtable in L1 memory to find the L2 -> L1 mapping, and provide a page fault to L1 if this mapping doesn't exist. 2. Use our L1 -> L0 pgtable to convert this L1 address to an L0 address, or try to insert a pte for that mapping if it doesn't exist. 3. Now we have a L2 -> L0 mapping, insert this into our shadow_pgtable Once this mapping exists we can take rc faults when hardware is unable to automatically set the reference and change bits in the pte. On these we need to: 1. Check the rc bits on the L2 -> L1 pte match, and otherwise reflect the fault down to L1. 2. Set the rc bits in the L1 -> L0 pte which corresponds to the same host page. 3. Set the rc bits in the L2 -> L0 pte. As we reuse a large number of functions in book3s_64_mmu_radix.c for this we also needed to refactor a number of these functions to take an lpid parameter so that the correct lpid is used for tlb invalidations. The functionality however has remained the same. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
4bad77799f
commit
fd10be2573
|
@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
|
|||
unsigned long addr,
|
||||
unsigned long page_size);
|
||||
extern void radix__flush_pwc_lpid(unsigned int lpid);
|
||||
extern void radix__flush_tlb_lpid(unsigned int lpid);
|
||||
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
|
||||
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
|
||||
|
||||
|
|
|
@ -188,17 +188,34 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
|
|||
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu,
|
||||
unsigned long ea, unsigned long dsisr);
|
||||
extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 root,
|
||||
u64 *pte_ret_p);
|
||||
extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 table,
|
||||
int table_index, u64 *pte_ret_p);
|
||||
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data, bool iswrite);
|
||||
extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
|
||||
bool writing, unsigned long gpa,
|
||||
unsigned int lpid);
|
||||
extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa,
|
||||
struct kvm_memory_slot *memslot,
|
||||
bool writing, bool kvm_ro,
|
||||
pte_t *inserted_pte, unsigned int *levelp);
|
||||
extern int kvmppc_init_vm_radix(struct kvm *kvm);
|
||||
extern void kvmppc_free_radix(struct kvm *kvm);
|
||||
extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
|
||||
unsigned int lpid);
|
||||
extern int kvmppc_radix_init(void);
|
||||
extern void kvmppc_radix_exit(void);
|
||||
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
|
||||
unsigned long gpa, unsigned int shift,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned int lpid);
|
||||
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
|
|
|
@ -549,6 +549,10 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
|
||||
|
||||
extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
||||
unsigned long gpa, unsigned int level,
|
||||
unsigned long mmu_seq, unsigned int lpid);
|
||||
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_64_H__ */
|
||||
|
|
|
@ -367,7 +367,9 @@ struct kvmppc_pte {
|
|||
bool may_write : 1;
|
||||
bool may_execute : 1;
|
||||
unsigned long wimg;
|
||||
unsigned long rc;
|
||||
u8 page_size; /* MMU_PAGE_xxx */
|
||||
u8 page_shift;
|
||||
};
|
||||
|
||||
struct kvmppc_mmu {
|
||||
|
|
|
@ -29,43 +29,16 @@
|
|||
*/
|
||||
static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
|
||||
|
||||
/*
|
||||
* Used to walk a partition or process table radix tree in guest memory
|
||||
* Note: We exploit the fact that a partition table and a process
|
||||
* table have the same layout, a partition-scoped page table and a
|
||||
* process-scoped page table have the same layout, and the 2nd
|
||||
* doubleword of a partition table entry has the same layout as
|
||||
* the PTCR register.
|
||||
*/
|
||||
int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 table,
|
||||
int table_index, u64 *pte_ret_p)
|
||||
int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 root,
|
||||
u64 *pte_ret_p)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int ret, level, ps;
|
||||
unsigned long ptbl, root;
|
||||
unsigned long rts, bits, offset;
|
||||
unsigned long size, index;
|
||||
struct prtb_entry entry;
|
||||
unsigned long rts, bits, offset, index;
|
||||
u64 pte, base, gpa;
|
||||
__be64 rpte;
|
||||
|
||||
if ((table & PRTS_MASK) > 24)
|
||||
return -EINVAL;
|
||||
size = 1ul << ((table & PRTS_MASK) + 12);
|
||||
|
||||
/* Is the table big enough to contain this entry? */
|
||||
if ((table_index * sizeof(entry)) >= size)
|
||||
return -EINVAL;
|
||||
|
||||
/* Read the table to find the root of the radix tree */
|
||||
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
|
||||
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Root is stored in the first double word */
|
||||
root = be64_to_cpu(entry.prtb0);
|
||||
rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
|
||||
((root & RTS2_MASK) >> RTS2_SHIFT);
|
||||
bits = root & RPDS_MASK;
|
||||
|
@ -79,6 +52,7 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
|
||||
/* Walk each level of the radix tree */
|
||||
for (level = 3; level >= 0; --level) {
|
||||
u64 addr;
|
||||
/* Check a valid size */
|
||||
if (level && bits != p9_supported_radix_bits[level])
|
||||
return -EINVAL;
|
||||
|
@ -90,10 +64,13 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
if (base & ((1UL << (bits + 3)) - 1))
|
||||
return -EINVAL;
|
||||
/* Read the entry from guest memory */
|
||||
ret = kvm_read_guest(kvm, base + (index * sizeof(rpte)),
|
||||
&rpte, sizeof(rpte));
|
||||
if (ret)
|
||||
addr = base + (index * sizeof(rpte));
|
||||
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
|
||||
if (ret) {
|
||||
if (pte_ret_p)
|
||||
*pte_ret_p = addr;
|
||||
return ret;
|
||||
}
|
||||
pte = __be64_to_cpu(rpte);
|
||||
if (!(pte & _PAGE_PRESENT))
|
||||
return -ENOENT;
|
||||
|
@ -119,6 +96,7 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
if (offset == mmu_psize_defs[ps].shift)
|
||||
break;
|
||||
gpte->page_size = ps;
|
||||
gpte->page_shift = offset;
|
||||
|
||||
gpte->eaddr = eaddr;
|
||||
gpte->raddr = gpa;
|
||||
|
@ -128,12 +106,51 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
gpte->may_write = !!(pte & _PAGE_WRITE);
|
||||
gpte->may_execute = !!(pte & _PAGE_EXEC);
|
||||
|
||||
gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
|
||||
|
||||
if (pte_ret_p)
|
||||
*pte_ret_p = pte;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to walk a partition or process table radix tree in guest memory
|
||||
* Note: We exploit the fact that a partition table and a process
|
||||
* table have the same layout, a partition-scoped page table and a
|
||||
* process-scoped page table have the same layout, and the 2nd
|
||||
* doubleword of a partition table entry has the same layout as
|
||||
* the PTCR register.
|
||||
*/
|
||||
int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 table,
|
||||
int table_index, u64 *pte_ret_p)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int ret;
|
||||
unsigned long size, ptbl, root;
|
||||
struct prtb_entry entry;
|
||||
|
||||
if ((table & PRTS_MASK) > 24)
|
||||
return -EINVAL;
|
||||
size = 1ul << ((table & PRTS_MASK) + 12);
|
||||
|
||||
/* Is the table big enough to contain this entry? */
|
||||
if ((table_index * sizeof(entry)) >= size)
|
||||
return -EINVAL;
|
||||
|
||||
/* Read the table to find the root of the radix tree */
|
||||
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
|
||||
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Root is stored in the first double word */
|
||||
root = be64_to_cpu(entry.prtb0);
|
||||
|
||||
return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
|
||||
}
|
||||
|
||||
int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data, bool iswrite)
|
||||
{
|
||||
|
@ -181,7 +198,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
}
|
||||
|
||||
static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned int pshift)
|
||||
unsigned int pshift, unsigned int lpid)
|
||||
{
|
||||
unsigned long psize = PAGE_SIZE;
|
||||
|
||||
|
@ -189,12 +206,12 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
|||
psize = 1UL << pshift;
|
||||
|
||||
addr &= ~(psize - 1);
|
||||
radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize);
|
||||
radix__flush_tlb_lpid_page(lpid, addr, psize);
|
||||
}
|
||||
|
||||
static void kvmppc_radix_flush_pwc(struct kvm *kvm)
|
||||
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
|
||||
{
|
||||
radix__flush_pwc_lpid(kvm->arch.lpid);
|
||||
radix__flush_pwc_lpid(lpid);
|
||||
}
|
||||
|
||||
static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
|
||||
|
@ -239,16 +256,17 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
|
|||
kmem_cache_free(kvm_pmd_cache, pmdp);
|
||||
}
|
||||
|
||||
static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
|
||||
unsigned long gpa, unsigned int shift,
|
||||
struct kvm_memory_slot *memslot)
|
||||
void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
|
||||
unsigned long gpa, unsigned int shift,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned int lpid)
|
||||
|
||||
{
|
||||
unsigned long old;
|
||||
|
||||
old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift);
|
||||
if (old & _PAGE_DIRTY) {
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
|
||||
if ((old & _PAGE_DIRTY) && (lpid == kvm->arch.lpid)) {
|
||||
unsigned long gfn = gpa >> PAGE_SHIFT;
|
||||
unsigned long page_size = PAGE_SIZE;
|
||||
|
||||
|
@ -271,7 +289,8 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
|
|||
* and emit a warning if encountered, but there may already be data
|
||||
* corruption due to the unexpected mappings.
|
||||
*/
|
||||
static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
|
||||
static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
|
||||
unsigned int lpid)
|
||||
{
|
||||
if (full) {
|
||||
memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
|
||||
|
@ -285,14 +304,15 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
|
|||
WARN_ON_ONCE(1);
|
||||
kvmppc_unmap_pte(kvm, p,
|
||||
pte_pfn(*p) << PAGE_SHIFT,
|
||||
PAGE_SHIFT, NULL);
|
||||
PAGE_SHIFT, NULL, lpid);
|
||||
}
|
||||
}
|
||||
|
||||
kvmppc_pte_free(pte);
|
||||
}
|
||||
|
||||
static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
|
||||
static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
|
||||
unsigned int lpid)
|
||||
{
|
||||
unsigned long im;
|
||||
pmd_t *p = pmd;
|
||||
|
@ -307,20 +327,21 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
|
|||
WARN_ON_ONCE(1);
|
||||
kvmppc_unmap_pte(kvm, (pte_t *)p,
|
||||
pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
|
||||
PMD_SHIFT, NULL);
|
||||
PMD_SHIFT, NULL, lpid);
|
||||
}
|
||||
} else {
|
||||
pte_t *pte;
|
||||
|
||||
pte = pte_offset_map(p, 0);
|
||||
kvmppc_unmap_free_pte(kvm, pte, full);
|
||||
kvmppc_unmap_free_pte(kvm, pte, full, lpid);
|
||||
pmd_clear(p);
|
||||
}
|
||||
}
|
||||
kvmppc_pmd_free(pmd);
|
||||
}
|
||||
|
||||
static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
|
||||
static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
|
||||
unsigned int lpid)
|
||||
{
|
||||
unsigned long iu;
|
||||
pud_t *p = pud;
|
||||
|
@ -334,36 +355,40 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
|
|||
pmd_t *pmd;
|
||||
|
||||
pmd = pmd_offset(p, 0);
|
||||
kvmppc_unmap_free_pmd(kvm, pmd, true);
|
||||
kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
|
||||
pud_clear(p);
|
||||
}
|
||||
}
|
||||
pud_free(kvm->mm, pud);
|
||||
}
|
||||
|
||||
void kvmppc_free_radix(struct kvm *kvm)
|
||||
void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
|
||||
{
|
||||
unsigned long ig;
|
||||
pgd_t *pgd;
|
||||
|
||||
if (!kvm->arch.pgtable)
|
||||
return;
|
||||
pgd = kvm->arch.pgtable;
|
||||
for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
|
||||
pud_t *pud;
|
||||
|
||||
if (!pgd_present(*pgd))
|
||||
continue;
|
||||
pud = pud_offset(pgd, 0);
|
||||
kvmppc_unmap_free_pud(kvm, pud);
|
||||
kvmppc_unmap_free_pud(kvm, pud, lpid);
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
pgd_free(kvm->mm, kvm->arch.pgtable);
|
||||
kvm->arch.pgtable = NULL;
|
||||
}
|
||||
|
||||
void kvmppc_free_radix(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->arch.pgtable) {
|
||||
kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
|
||||
kvm->arch.lpid);
|
||||
pgd_free(kvm->mm, kvm->arch.pgtable);
|
||||
kvm->arch.pgtable = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
|
||||
unsigned long gpa)
|
||||
unsigned long gpa, unsigned int lpid)
|
||||
{
|
||||
pte_t *pte = pte_offset_kernel(pmd, 0);
|
||||
|
||||
|
@ -373,13 +398,13 @@ static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
|
|||
* flushing the PWC again.
|
||||
*/
|
||||
pmd_clear(pmd);
|
||||
kvmppc_radix_flush_pwc(kvm);
|
||||
kvmppc_radix_flush_pwc(kvm, lpid);
|
||||
|
||||
kvmppc_unmap_free_pte(kvm, pte, false);
|
||||
kvmppc_unmap_free_pte(kvm, pte, false, lpid);
|
||||
}
|
||||
|
||||
static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
|
||||
unsigned long gpa)
|
||||
unsigned long gpa, unsigned int lpid)
|
||||
{
|
||||
pmd_t *pmd = pmd_offset(pud, 0);
|
||||
|
||||
|
@ -389,9 +414,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
|
|||
* so can be freed without flushing the PWC again.
|
||||
*/
|
||||
pud_clear(pud);
|
||||
kvmppc_radix_flush_pwc(kvm);
|
||||
kvmppc_radix_flush_pwc(kvm, lpid);
|
||||
|
||||
kvmppc_unmap_free_pmd(kvm, pmd, false);
|
||||
kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -403,9 +428,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
|
|||
*/
|
||||
#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
|
||||
|
||||
static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
||||
unsigned long gpa, unsigned int level,
|
||||
unsigned long mmu_seq)
|
||||
int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
||||
unsigned long gpa, unsigned int level,
|
||||
unsigned long mmu_seq, unsigned int lpid)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud, *new_pud = NULL;
|
||||
|
@ -471,7 +496,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
goto out_unlock;
|
||||
}
|
||||
/* Valid 1GB page here already, remove it */
|
||||
kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL);
|
||||
kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
|
||||
lpid);
|
||||
}
|
||||
if (level == 2) {
|
||||
if (!pud_none(*pud)) {
|
||||
|
@ -480,7 +506,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
* install a large page, so remove and free the page
|
||||
* table page.
|
||||
*/
|
||||
kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
|
||||
kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
|
||||
ret = 0;
|
||||
|
@ -506,7 +532,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
|
||||
PTE_BITS_MUST_MATCH);
|
||||
kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
|
||||
0, pte_val(pte), lgpa, PMD_SHIFT);
|
||||
0, pte_val(pte), lgpa, PMD_SHIFT);
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -520,7 +546,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
goto out_unlock;
|
||||
}
|
||||
/* Valid 2MB page here already, remove it */
|
||||
kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL);
|
||||
kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
|
||||
lpid);
|
||||
}
|
||||
if (level == 1) {
|
||||
if (!pmd_none(*pmd)) {
|
||||
|
@ -529,7 +556,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
* install a large page, so remove and free the page
|
||||
* table page.
|
||||
*/
|
||||
kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
|
||||
kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
|
||||
ret = 0;
|
||||
|
@ -569,8 +596,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
|
||||
bool writing, unsigned long gpa)
|
||||
bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
|
||||
unsigned long gpa, unsigned int lpid)
|
||||
{
|
||||
unsigned long pgflags;
|
||||
unsigned int shift;
|
||||
|
@ -597,11 +624,11 @@ static bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
|
|||
return false;
|
||||
}
|
||||
|
||||
static int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa,
|
||||
struct kvm_memory_slot *memslot,
|
||||
bool writing, bool kvm_ro,
|
||||
pte_t *inserted_pte, unsigned int *levelp)
|
||||
int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa,
|
||||
struct kvm_memory_slot *memslot,
|
||||
bool writing, bool kvm_ro,
|
||||
pte_t *inserted_pte, unsigned int *levelp)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct page *page = NULL;
|
||||
|
@ -683,7 +710,7 @@ static int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
|||
|
||||
/* Allocate space in the tree and write the PTE */
|
||||
ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
|
||||
mmu_seq);
|
||||
mmu_seq, kvm->arch.lpid);
|
||||
if (inserted_pte)
|
||||
*inserted_pte = pte;
|
||||
if (levelp)
|
||||
|
@ -758,7 +785,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
if (dsisr & DSISR_SET_RC) {
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
|
||||
writing, gpa))
|
||||
writing, gpa, kvm->arch.lpid))
|
||||
dsisr &= ~DSISR_SET_RC;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
|
@ -786,7 +813,8 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep))
|
||||
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot);
|
||||
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
|
||||
kvm->arch.lpid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -841,7 +869,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
|
|||
ret = 1 << (shift - PAGE_SHIFT);
|
||||
kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
|
||||
gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -12,9 +12,12 @@
|
|||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pte-walk.h>
|
||||
#include <asm/reg.h>
|
||||
|
||||
static struct patb_entry *pseries_partition_tb;
|
||||
|
||||
|
@ -403,10 +406,20 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
|
|||
*/
|
||||
static void kvmhv_release_nested(struct kvm_nested_guest *gp)
|
||||
{
|
||||
struct kvm *kvm = gp->l1_host;
|
||||
|
||||
if (gp->shadow_pgtable) {
|
||||
/*
|
||||
* No vcpu is using this struct and no call to
|
||||
* kvmhv_get_nested can find this struct,
|
||||
* so we don't need to hold kvm->mmu_lock.
|
||||
*/
|
||||
kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
|
||||
gp->shadow_lpid);
|
||||
pgd_free(kvm->mm, gp->shadow_pgtable);
|
||||
}
|
||||
kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
|
||||
kvmppc_free_lpid(gp->shadow_lpid);
|
||||
if (gp->shadow_pgtable)
|
||||
pgd_free(gp->l1_host->mm, gp->shadow_pgtable);
|
||||
kfree(gp);
|
||||
}
|
||||
|
||||
|
@ -466,6 +479,12 @@ void kvmhv_release_all_nested(struct kvm *kvm)
|
|||
/* caller must hold gp->tlb_lock */
|
||||
void kvmhv_flush_nested(struct kvm_nested_guest *gp)
|
||||
{
|
||||
struct kvm *kvm = gp->l1_host;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
radix__flush_tlb_lpid(gp->shadow_lpid);
|
||||
kvmhv_update_ptbl_cache(gp);
|
||||
if (gp->l1_gr_to_hr == 0)
|
||||
kvmhv_remove_nested(gp);
|
||||
|
@ -525,7 +544,314 @@ void kvmhv_put_nested(struct kvm_nested_guest *gp)
|
|||
kvmhv_release_nested(gp);
|
||||
}
|
||||
|
||||
long kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
|
||||
static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_guest *gp,
|
||||
long gpa, int *shift_ret)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
bool ret = false;
|
||||
pte_t *ptep;
|
||||
int shift;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
|
||||
if (!shift)
|
||||
shift = PAGE_SHIFT;
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
|
||||
ret = true;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (shift_ret)
|
||||
*shift_ret = shift;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Used to convert a nested guest real address to a L1 guest real address */
|
||||
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_guest *gp,
|
||||
unsigned long n_gpa, unsigned long dsisr,
|
||||
struct kvmppc_pte *gpte_p)
|
||||
{
|
||||
u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
|
||||
int ret;
|
||||
|
||||
ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
|
||||
&fault_addr);
|
||||
|
||||
if (ret) {
|
||||
/* We didn't find a pte */
|
||||
if (ret == -EINVAL) {
|
||||
/* Unsupported mmu config */
|
||||
flags |= DSISR_UNSUPP_MMU;
|
||||
} else if (ret == -ENOENT) {
|
||||
/* No translation found */
|
||||
flags |= DSISR_NOHPTE;
|
||||
} else if (ret == -EFAULT) {
|
||||
/* Couldn't access L1 real address */
|
||||
flags |= DSISR_PRTABLE_FAULT;
|
||||
vcpu->arch.fault_gpa = fault_addr;
|
||||
} else {
|
||||
/* Unknown error */
|
||||
return ret;
|
||||
}
|
||||
goto forward_to_l1;
|
||||
} else {
|
||||
/* We found a pte -> check permissions */
|
||||
if (dsisr & DSISR_ISSTORE) {
|
||||
/* Can we write? */
|
||||
if (!gpte_p->may_write) {
|
||||
flags |= DSISR_PROTFAULT;
|
||||
goto forward_to_l1;
|
||||
}
|
||||
} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
|
||||
/* Can we execute? */
|
||||
if (!gpte_p->may_execute) {
|
||||
flags |= SRR1_ISI_N_OR_G;
|
||||
goto forward_to_l1;
|
||||
}
|
||||
} else {
|
||||
/* Can we read? */
|
||||
if (!gpte_p->may_read && !gpte_p->may_write) {
|
||||
flags |= DSISR_PROTFAULT;
|
||||
goto forward_to_l1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
forward_to_l1:
|
||||
vcpu->arch.fault_dsisr = flags;
|
||||
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
|
||||
vcpu->arch.shregs.msr &= ~0x783f0000ul;
|
||||
vcpu->arch.shregs.msr |= flags;
|
||||
}
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_guest *gp,
|
||||
unsigned long n_gpa,
|
||||
struct kvmppc_pte gpte,
|
||||
unsigned long dsisr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
bool writing = !!(dsisr & DSISR_ISSTORE);
|
||||
u64 pgflags;
|
||||
bool ret;
|
||||
|
||||
/* Are the rc bits set in the L1 partition scoped pte? */
|
||||
pgflags = _PAGE_ACCESSED;
|
||||
if (writing)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
if (pgflags & ~gpte.rc)
|
||||
return RESUME_HOST;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
|
||||
ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
|
||||
gpte.raddr, kvm->arch.lpid);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (!ret)
|
||||
return -EINVAL;
|
||||
|
||||
/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
|
||||
ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
|
||||
gp->shadow_lpid);
|
||||
if (!ret)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kvmppc_radix_level_to_shift(int level)
|
||||
{
|
||||
switch (level) {
|
||||
case 2:
|
||||
return PUD_SHIFT;
|
||||
case 1:
|
||||
return PMD_SHIFT;
|
||||
default:
|
||||
return PAGE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int kvmppc_radix_shift_to_level(int shift)
|
||||
{
|
||||
if (shift == PUD_SHIFT)
|
||||
return 2;
|
||||
if (shift == PMD_SHIFT)
|
||||
return 1;
|
||||
if (shift == PAGE_SHIFT)
|
||||
return 0;
|
||||
WARN_ON_ONCE(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* called with gp->tlb_lock held */
|
||||
static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_guest *gp)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvmppc_pte gpte;
|
||||
pte_t pte, *pte_p;
|
||||
unsigned long mmu_seq;
|
||||
unsigned long dsisr = vcpu->arch.fault_dsisr;
|
||||
unsigned long ea = vcpu->arch.fault_dar;
|
||||
unsigned long n_gpa, gpa, gfn, perm = 0UL;
|
||||
unsigned int shift, l1_shift, level;
|
||||
bool writing = !!(dsisr & DSISR_ISSTORE);
|
||||
bool kvm_ro = false;
|
||||
long int ret;
|
||||
|
||||
if (!gp->l1_gr_to_hr) {
|
||||
kvmhv_update_ptbl_cache(gp);
|
||||
if (!gp->l1_gr_to_hr)
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
/* Convert the nested guest real address into a L1 guest real address */
|
||||
|
||||
n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
|
||||
if (!(dsisr & DSISR_PRTABLE_FAULT))
|
||||
n_gpa |= ea & 0xFFF;
|
||||
ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
|
||||
|
||||
/*
|
||||
* If the hardware found a translation but we don't now have a usable
|
||||
* translation in the l1 partition-scoped tree, remove the shadow pte
|
||||
* and let the guest retry.
|
||||
*/
|
||||
if (ret == RESUME_HOST &&
|
||||
(dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
|
||||
DSISR_BAD_COPYPASTE)))
|
||||
goto inval;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Failed to set the reference/change bits */
|
||||
if (dsisr & DSISR_SET_RC) {
|
||||
ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
|
||||
if (ret == RESUME_HOST)
|
||||
return ret;
|
||||
if (ret)
|
||||
goto inval;
|
||||
dsisr &= ~DSISR_SET_RC;
|
||||
if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
|
||||
DSISR_PROTFAULT)))
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* We took an HISI or HDSI while we were running a nested guest which
|
||||
* means we have no partition scoped translation for that. This means
|
||||
* we need to insert a pte for the mapping into our shadow_pgtable.
|
||||
*/
|
||||
|
||||
l1_shift = gpte.page_shift;
|
||||
if (l1_shift < PAGE_SHIFT) {
|
||||
/* We don't support l1 using a page size smaller than our own */
|
||||
pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
|
||||
l1_shift, PAGE_SHIFT);
|
||||
return -EINVAL;
|
||||
}
|
||||
gpa = gpte.raddr;
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
|
||||
/* 1. Get the corresponding host memslot */
|
||||
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
|
||||
if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
|
||||
/* unusual error -> reflect to the guest as a DSI */
|
||||
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
/* passthrough of emulated MMIO case... */
|
||||
pr_err("emulated MMIO passthrough?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (memslot->flags & KVM_MEM_READONLY) {
|
||||
if (writing) {
|
||||
/* Give the guest a DSI */
|
||||
kvmppc_core_queue_data_storage(vcpu, ea,
|
||||
DSISR_ISSTORE | DSISR_PROTFAULT);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
kvm_ro = true;
|
||||
}
|
||||
|
||||
/* 2. Find the host pte for this L1 guest real address */
|
||||
|
||||
/* Used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
/* See if can find translation in our partition scoped tables for L1 */
|
||||
pte = __pte(0);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (!shift)
|
||||
shift = PAGE_SHIFT;
|
||||
if (pte_p)
|
||||
pte = *pte_p;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
|
||||
/* No suitable pte found -> try to insert a mapping */
|
||||
ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
|
||||
writing, kvm_ro, &pte, &level);
|
||||
if (ret == -EAGAIN)
|
||||
return RESUME_GUEST;
|
||||
else if (ret)
|
||||
return ret;
|
||||
shift = kvmppc_radix_level_to_shift(level);
|
||||
}
|
||||
|
||||
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
|
||||
|
||||
/* The permissions is the combination of the host and l1 guest ptes */
|
||||
perm |= gpte.may_read ? 0UL : _PAGE_READ;
|
||||
perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
|
||||
perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
|
||||
pte = __pte(pte_val(pte) & ~perm);
|
||||
|
||||
/* What size pte can we insert? */
|
||||
if (shift > l1_shift) {
|
||||
u64 mask;
|
||||
unsigned int actual_shift = PAGE_SHIFT;
|
||||
if (PMD_SHIFT < l1_shift)
|
||||
actual_shift = PMD_SHIFT;
|
||||
mask = (1UL << shift) - (1UL << actual_shift);
|
||||
pte = __pte(pte_val(pte) | (gpa & mask));
|
||||
shift = actual_shift;
|
||||
}
|
||||
level = kvmppc_radix_shift_to_level(shift);
|
||||
n_gpa &= ~((1UL << shift) - 1);
|
||||
|
||||
/* 4. Insert the pte into our shadow_pgtable */
|
||||
|
||||
ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
|
||||
mmu_seq, gp->shadow_lpid);
|
||||
if (ret == -EAGAIN)
|
||||
ret = RESUME_GUEST; /* Let the guest try again */
|
||||
|
||||
return ret;
|
||||
|
||||
inval:
|
||||
kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_nested_guest *gp = vcpu->arch.nested;
|
||||
long int ret;
|
||||
|
||||
mutex_lock(&gp->tlb_lock);
|
||||
ret = __kvmhv_nested_page_fault(vcpu, gp);
|
||||
mutex_unlock(&gp->tlb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -830,6 +830,15 @@ void radix__flush_pwc_lpid(unsigned int lpid)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
|
||||
|
||||
/*
|
||||
* Flush partition scoped translations from LPID (=LPIDR)
|
||||
*/
|
||||
void radix__flush_tlb_lpid(unsigned int lpid)
|
||||
{
|
||||
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
|
||||
|
||||
/*
|
||||
* Flush partition scoped translations from LPID (=LPIDR)
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue