|
|
|
@ -17,7 +17,6 @@
|
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
|
#include <asm/bootparam.h>
|
|
|
|
|
#include "lg.h"
|
|
|
|
|
|
|
|
|
|
/*M:008
|
|
|
|
@ -325,10 +324,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* First step: get the top-level Guest page table entry. */
|
|
|
|
|
gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
|
|
|
|
|
/* Toplevel not present? We can't map it in. */
|
|
|
|
|
if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
|
|
|
|
|
return false;
|
|
|
|
|
if (unlikely(cpu->linear_pages)) {
|
|
|
|
|
/* Faking up a linear mapping. */
|
|
|
|
|
gpgd = __pgd(CHECK_GPGD_MASK);
|
|
|
|
|
} else {
|
|
|
|
|
gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
|
|
|
|
|
/* Toplevel not present? We can't map it in. */
|
|
|
|
|
if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now look at the matching shadow entry. */
|
|
|
|
|
spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
|
|
|
|
@ -353,10 +357,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
|
|
|
|
|
/* Middle level not present? We can't map it in. */
|
|
|
|
|
if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
|
|
|
|
|
return false;
|
|
|
|
|
if (unlikely(cpu->linear_pages)) {
|
|
|
|
|
/* Faking up a linear mapping. */
|
|
|
|
|
gpmd = __pmd(_PAGE_TABLE);
|
|
|
|
|
} else {
|
|
|
|
|
gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
|
|
|
|
|
/* Middle level not present? We can't map it in. */
|
|
|
|
|
if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now look at the matching shadow entry. */
|
|
|
|
|
spmd = spmd_addr(cpu, *spgd, vaddr);
|
|
|
|
@ -397,8 +406,13 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
|
|
|
|
gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Read the actual PTE value. */
|
|
|
|
|
gpte = lgread(cpu, gpte_ptr, pte_t);
|
|
|
|
|
if (unlikely(cpu->linear_pages)) {
|
|
|
|
|
/* Linear? Make up a PTE which points to same page. */
|
|
|
|
|
gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
|
|
|
|
|
} else {
|
|
|
|
|
/* Read the actual PTE value. */
|
|
|
|
|
gpte = lgread(cpu, gpte_ptr, pte_t);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If this page isn't in the Guest page tables, we can't page it in. */
|
|
|
|
|
if (!(pte_flags(gpte) & _PAGE_PRESENT))
|
|
|
|
@ -454,7 +468,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
|
|
|
|
* Finally, we write the Guest PTE entry back: we've set the
|
|
|
|
|
* _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
|
|
|
|
|
*/
|
|
|
|
|
lgwrite(cpu, gpte_ptr, pte_t, gpte);
|
|
|
|
|
if (likely(!cpu->linear_pages))
|
|
|
|
|
lgwrite(cpu, gpte_ptr, pte_t, gpte);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The fault is fixed, the page table is populated, the mapping
|
|
|
|
@ -612,6 +627,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
pmd_t gpmd;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Still not set up? Just map 1:1. */
|
|
|
|
|
if (unlikely(cpu->linear_pages))
|
|
|
|
|
return vaddr;
|
|
|
|
|
|
|
|
|
|
/* First step: get the top-level Guest page table entry. */
|
|
|
|
|
gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
|
|
|
|
|
/* Toplevel not present? We can't map it in. */
|
|
|
|
@ -708,32 +728,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
|
|
|
|
|
return next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*H:430
|
|
|
|
|
* (iv) Switching page tables
|
|
|
|
|
*
|
|
|
|
|
* Now we've seen all the page table setting and manipulation, let's see
|
|
|
|
|
* what happens when the Guest changes page tables (ie. changes the top-level
|
|
|
|
|
* pgdir). This occurs on almost every context switch.
|
|
|
|
|
*/
|
|
|
|
|
void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
|
|
|
|
|
{
|
|
|
|
|
int newpgdir, repin = 0;
|
|
|
|
|
|
|
|
|
|
/* Look to see if we have this one already. */
|
|
|
|
|
newpgdir = find_pgdir(cpu->lg, pgtable);
|
|
|
|
|
/*
|
|
|
|
|
* If not, we allocate or mug an existing one: if it's a fresh one,
|
|
|
|
|
* repin gets set to 1.
|
|
|
|
|
*/
|
|
|
|
|
if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
|
|
|
|
|
newpgdir = new_pgdir(cpu, pgtable, &repin);
|
|
|
|
|
/* Change the current pgd index to the new one. */
|
|
|
|
|
cpu->cpu_pgd = newpgdir;
|
|
|
|
|
/* If it was completely blank, we map in the Guest kernel stack */
|
|
|
|
|
if (repin)
|
|
|
|
|
pin_stack_pages(cpu);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*H:470
|
|
|
|
|
* Finally, a routine which throws away everything: all PGD entries in all
|
|
|
|
|
* the shadow page tables, including the Guest's kernel mappings. This is used
|
|
|
|
@ -780,6 +774,44 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
|
|
|
|
|
/* We need the Guest kernel stack mapped again. */
|
|
|
|
|
pin_stack_pages(cpu);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*H:430
|
|
|
|
|
* (iv) Switching page tables
|
|
|
|
|
*
|
|
|
|
|
* Now we've seen all the page table setting and manipulation, let's see
|
|
|
|
|
* what happens when the Guest changes page tables (ie. changes the top-level
|
|
|
|
|
* pgdir). This occurs on almost every context switch.
|
|
|
|
|
*/
|
|
|
|
|
void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
|
|
|
|
|
{
|
|
|
|
|
int newpgdir, repin = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The very first time they call this, we're actually running without
|
|
|
|
|
* any page tables; we've been making it up. Throw them away now.
|
|
|
|
|
*/
|
|
|
|
|
if (unlikely(cpu->linear_pages)) {
|
|
|
|
|
release_all_pagetables(cpu->lg);
|
|
|
|
|
cpu->linear_pages = false;
|
|
|
|
|
/* Force allocation of a new pgdir. */
|
|
|
|
|
newpgdir = ARRAY_SIZE(cpu->lg->pgdirs);
|
|
|
|
|
} else {
|
|
|
|
|
/* Look to see if we have this one already. */
|
|
|
|
|
newpgdir = find_pgdir(cpu->lg, pgtable);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If not, we allocate or mug an existing one: if it's a fresh one,
|
|
|
|
|
* repin gets set to 1.
|
|
|
|
|
*/
|
|
|
|
|
if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
|
|
|
|
|
newpgdir = new_pgdir(cpu, pgtable, &repin);
|
|
|
|
|
/* Change the current pgd index to the new one. */
|
|
|
|
|
cpu->cpu_pgd = newpgdir;
|
|
|
|
|
/* If it was completely blank, we map in the Guest kernel stack */
|
|
|
|
|
if (repin)
|
|
|
|
|
pin_stack_pages(cpu);
|
|
|
|
|
}
|
|
|
|
|
/*:*/
|
|
|
|
|
|
|
|
|
|
/*M:009
|
|
|
|
@ -919,168 +951,26 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*H:505
|
|
|
|
|
* To get through boot, we construct simple identity page mappings (which
|
|
|
|
|
* set virtual == physical) and linear mappings which will get the Guest far
|
|
|
|
|
* enough into the boot to create its own. The linear mapping means we
|
|
|
|
|
* simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET,
|
|
|
|
|
* as you'll see.
|
|
|
|
|
*
|
|
|
|
|
* We lay them out of the way, just below the initrd (which is why we need to
|
|
|
|
|
* know its size here).
|
|
|
|
|
*/
|
|
|
|
|
static unsigned long setup_pagetables(struct lguest *lg,
|
|
|
|
|
unsigned long mem,
|
|
|
|
|
unsigned long initrd_size)
|
|
|
|
|
{
|
|
|
|
|
pgd_t __user *pgdir;
|
|
|
|
|
pte_t __user *linear;
|
|
|
|
|
unsigned long mem_base = (unsigned long)lg->mem_base;
|
|
|
|
|
unsigned int mapped_pages, i, linear_pages;
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
pmd_t __user *pmds;
|
|
|
|
|
unsigned int j;
|
|
|
|
|
pgd_t pgd;
|
|
|
|
|
pmd_t pmd;
|
|
|
|
|
#else
|
|
|
|
|
unsigned int phys_linear;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We have mapped_pages frames to map, so we need linear_pages page
|
|
|
|
|
* tables to map them.
|
|
|
|
|
*/
|
|
|
|
|
mapped_pages = mem / PAGE_SIZE;
|
|
|
|
|
linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
|
|
|
|
|
|
|
|
|
|
/* We put the toplevel page directory page at the top of memory. */
|
|
|
|
|
pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
|
|
|
|
|
|
|
|
|
|
/* Now we use the next linear_pages pages as pte pages */
|
|
|
|
|
linear = (void *)pgdir - linear_pages * PAGE_SIZE;
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
/*
|
|
|
|
|
* And the single mid page goes below that. We only use one, but
|
|
|
|
|
* that's enough to map 1G, which definitely gets us through boot.
|
|
|
|
|
*/
|
|
|
|
|
pmds = (void *)linear - PAGE_SIZE;
|
|
|
|
|
#endif
|
|
|
|
|
/*
|
|
|
|
|
* Linear mapping is easy: put every page's address into the
|
|
|
|
|
* mapping in order.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < mapped_pages; i++) {
|
|
|
|
|
pte_t pte;
|
|
|
|
|
pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
|
|
|
|
|
if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
/*
|
|
|
|
|
* Make the Guest PMD entries point to the corresponding place in the
|
|
|
|
|
* linear mapping (up to one page worth of PMD).
|
|
|
|
|
*/
|
|
|
|
|
for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
|
|
|
|
|
i += PTRS_PER_PTE, j++) {
|
|
|
|
|
pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
|
|
|
|
|
__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
|
|
|
|
|
|
|
|
|
|
if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* One PGD entry, pointing to that PMD page. */
|
|
|
|
|
pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
|
|
|
|
|
/* Copy it in as the first PGD entry (ie. addresses 0-1G). */
|
|
|
|
|
if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
/*
|
|
|
|
|
* And the other PGD entry to make the linear mapping at PAGE_OFFSET
|
|
|
|
|
*/
|
|
|
|
|
if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
#else
|
|
|
|
|
/*
|
|
|
|
|
* The top level points to the linear page table pages above.
|
|
|
|
|
* We setup the identity and linear mappings here.
|
|
|
|
|
*/
|
|
|
|
|
phys_linear = (unsigned long)linear - mem_base;
|
|
|
|
|
for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
|
|
|
|
|
pgd_t pgd;
|
|
|
|
|
/*
|
|
|
|
|
* Create a PGD entry which points to the right part of the
|
|
|
|
|
* linear PTE pages.
|
|
|
|
|
*/
|
|
|
|
|
pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
|
|
|
|
|
(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Copy it into the PGD page at 0 and PAGE_OFFSET.
|
|
|
|
|
*/
|
|
|
|
|
if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
|
|
|
|
|
|| copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
|
|
|
|
|
+ i / PTRS_PER_PTE],
|
|
|
|
|
&pgd, sizeof(pgd)))
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We return the top level (guest-physical) address: we remember where
|
|
|
|
|
* this is to write it into lguest_data when the Guest initializes.
|
|
|
|
|
*/
|
|
|
|
|
return (unsigned long)pgdir - mem_base;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*H:500
|
|
|
|
|
* (vii) Setting up the page tables initially.
|
|
|
|
|
*
|
|
|
|
|
* When a Guest is first created, the Launcher tells us where the toplevel of
|
|
|
|
|
* its first page table is. We set some things up here:
|
|
|
|
|
* When a Guest is first created, set initialize a shadow page table which
|
|
|
|
|
* we will populate on future faults. The Guest doesn't have any actual
|
|
|
|
|
* pagetables yet, so we set linear_pages to tell demand_page() to fake it
|
|
|
|
|
* for the moment.
|
|
|
|
|
*/
|
|
|
|
|
int init_guest_pagetable(struct lguest *lg)
|
|
|
|
|
{
|
|
|
|
|
u64 mem;
|
|
|
|
|
u32 initrd_size;
|
|
|
|
|
struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
pgd_t *pgd;
|
|
|
|
|
pmd_t *pmd_table;
|
|
|
|
|
#endif
|
|
|
|
|
/*
|
|
|
|
|
* Get the Guest memory size and the ramdisk size from the boot header
|
|
|
|
|
* located at lg->mem_base (Guest address 0).
|
|
|
|
|
*/
|
|
|
|
|
if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
|
|
|
|
|
|| get_user(initrd_size, &boot->hdr.ramdisk_size))
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
struct lg_cpu *cpu = &lg->cpus[0];
|
|
|
|
|
int allocated = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We start on the first shadow page table, and give it a blank PGD
|
|
|
|
|
* page.
|
|
|
|
|
*/
|
|
|
|
|
lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
|
|
|
|
|
if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
|
|
|
|
|
return lg->pgdirs[0].gpgdir;
|
|
|
|
|
lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
|
|
|
|
|
if (!lg->pgdirs[0].pgdir)
|
|
|
|
|
/* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */
|
|
|
|
|
cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated);
|
|
|
|
|
if (!allocated)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
|
/* For PAE, we also create the initial mid-level. */
|
|
|
|
|
pgd = lg->pgdirs[0].pgdir;
|
|
|
|
|
pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
|
|
|
|
|
if (!pmd_table)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
set_pgd(pgd + SWITCHER_PGD_INDEX,
|
|
|
|
|
__pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* This is the current page table. */
|
|
|
|
|
lg->cpus[0].cpu_pgd = 0;
|
|
|
|
|
/* We start with a linear mapping until the initialize. */
|
|
|
|
|
cpu->linear_pages = true;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -1095,10 +985,10 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
|
|
|
|
|
* of virtual addresses used by the Switcher.
|
|
|
|
|
*/
|
|
|
|
|
|| put_user(RESERVE_MEM * 1024 * 1024,
|
|
|
|
|
&cpu->lg->lguest_data->reserve_mem)
|
|
|
|
|
|| put_user(cpu->lg->pgdirs[0].gpgdir,
|
|
|
|
|
&cpu->lg->lguest_data->pgdir))
|
|
|
|
|
&cpu->lg->lguest_data->reserve_mem)) {
|
|
|
|
|
kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In flush_user_mappings() we loop from 0 to
|
|
|
|
|