ARM: KVM: rework HYP page table freeing

There is no point in freeing HYP page tables differently from Stage-2.
They now have the same requirements, and should be dealt with the same way.

Promote unmap_stage2_range to be The One True Way, and get rid of a number
of nasty bugs in the process (good thing we never actually called free_hyp_pmds
before...).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
This commit is contained in:
Marc Zyngier 2013-04-12 19:12:05 +01:00 committed by Christoffer Dall
parent 0394e1f605
commit 4f728276fb
3 changed files with 82 additions and 103 deletions

View File

@ -32,7 +32,7 @@
int create_hyp_mappings(void *from, void *to); int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t); int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_hyp_pmds(void); void free_hyp_pgds(void);
int kvm_alloc_stage2_pgd(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm);

View File

@ -928,7 +928,7 @@ static int init_hyp_mode(void)
out_free_vfp: out_free_vfp:
free_percpu(kvm_host_vfp_state); free_percpu(kvm_host_vfp_state);
out_free_mappings: out_free_mappings:
free_hyp_pmds(); free_hyp_pgds();
out_free_stack_pages: out_free_stack_pages:
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));

View File

@ -72,56 +72,104 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p; return p;
} }
static void free_ptes(pmd_t *pmd, unsigned long addr) static void clear_pud_entry(pud_t *pud)
{ {
pte_t *pte; pmd_t *pmd_table = pmd_offset(pud, 0);
unsigned int i; pud_clear(pud);
pmd_free(NULL, pmd_table);
put_page(virt_to_page(pud));
}
for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { static void clear_pmd_entry(pmd_t *pmd)
if (!pmd_none(*pmd) && pmd_table(*pmd)) { {
pte = pte_offset_kernel(pmd, addr); pte_t *pte_table = pte_offset_kernel(pmd, 0);
pte_free_kernel(NULL, pte); pmd_clear(pmd);
} pte_free_kernel(NULL, pte_table);
pmd++; put_page(virt_to_page(pmd));
}
static bool pmd_empty(pmd_t *pmd)
{
struct page *pmd_page = virt_to_page(pmd);
return page_count(pmd_page) == 1;
}
static void clear_pte_entry(pte_t *pte)
{
if (pte_present(*pte)) {
kvm_set_pte(pte, __pte(0));
put_page(virt_to_page(pte));
} }
} }
static void free_hyp_pgd_entry(unsigned long addr) static bool pte_empty(pte_t *pte)
{
struct page *pte_page = virt_to_page(pte);
return page_count(pte_page) == 1;
}
static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
unsigned long hyp_addr = KERN_TO_HYP(addr); pte_t *pte;
unsigned long long addr = start, end = start + size;
u64 range;
pgd = hyp_pgd + pgd_index(hyp_addr); while (addr < end) {
pud = pud_offset(pgd, hyp_addr); pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
addr += PUD_SIZE;
continue;
}
if (pud_none(*pud)) pmd = pmd_offset(pud, addr);
return; if (pmd_none(*pmd)) {
BUG_ON(pud_bad(*pud)); addr += PMD_SIZE;
continue;
}
pmd = pmd_offset(pud, hyp_addr); pte = pte_offset_kernel(pmd, addr);
free_ptes(pmd, addr); clear_pte_entry(pte);
pmd_free(NULL, pmd); range = PAGE_SIZE;
pud_clear(pud);
/* If we emptied the pte, walk back up the ladder */
if (pte_empty(pte)) {
clear_pmd_entry(pmd);
range = PMD_SIZE;
if (pmd_empty(pmd)) {
clear_pud_entry(pud);
range = PUD_SIZE;
}
}
addr += range;
}
} }
/** /**
* free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables * free_hyp_pgds - free Hyp-mode page tables
* *
* Assumes this is a page table used strictly in Hyp-mode and therefore contains * Assumes hyp_pgd is a page table used strictly in Hyp-mode and therefore contains
* either mappings in the kernel memory area (above PAGE_OFFSET), or * either mappings in the kernel memory area (above PAGE_OFFSET), or
* device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
*/ */
void free_hyp_pmds(void) void free_hyp_pgds(void)
{ {
unsigned long addr; unsigned long addr;
mutex_lock(&kvm_hyp_pgd_mutex); mutex_lock(&kvm_hyp_pgd_mutex);
for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
free_hyp_pgd_entry(addr); if (hyp_pgd) {
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
free_hyp_pgd_entry(addr); unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
kfree(hyp_pgd);
}
mutex_unlock(&kvm_hyp_pgd_mutex); mutex_unlock(&kvm_hyp_pgd_mutex);
} }
@ -136,6 +184,7 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
do { do {
pte = pte_offset_kernel(pmd, addr); pte = pte_offset_kernel(pmd, addr);
kvm_set_pte(pte, pfn_pte(pfn, prot)); kvm_set_pte(pte, pfn_pte(pfn, prot));
get_page(virt_to_page(pte));
pfn++; pfn++;
} while (addr += PAGE_SIZE, addr != end); } while (addr += PAGE_SIZE, addr != end);
} }
@ -161,6 +210,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return -ENOMEM; return -ENOMEM;
} }
pmd_populate_kernel(NULL, pmd, pte); pmd_populate_kernel(NULL, pmd, pte);
get_page(virt_to_page(pmd));
} }
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
@ -197,6 +247,7 @@ static int __create_hyp_mappings(pgd_t *pgdp,
goto out; goto out;
} }
pud_populate(NULL, pud, pmd); pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
} }
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
@ -289,42 +340,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return 0; return 0;
} }
static void clear_pud_entry(pud_t *pud)
{
pmd_t *pmd_table = pmd_offset(pud, 0);
pud_clear(pud);
pmd_free(NULL, pmd_table);
put_page(virt_to_page(pud));
}
static void clear_pmd_entry(pmd_t *pmd)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
pmd_clear(pmd);
pte_free_kernel(NULL, pte_table);
put_page(virt_to_page(pmd));
}
static bool pmd_empty(pmd_t *pmd)
{
struct page *pmd_page = virt_to_page(pmd);
return page_count(pmd_page) == 1;
}
static void clear_pte_entry(pte_t *pte)
{
if (pte_present(*pte)) {
kvm_set_pte(pte, __pte(0));
put_page(virt_to_page(pte));
}
}
static bool pte_empty(pte_t *pte)
{
struct page *pte_page = virt_to_page(pte);
return page_count(pte_page) == 1;
}
/** /**
* unmap_stage2_range -- Clear stage2 page table entries to unmap a range * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
* @kvm: The VM pointer * @kvm: The VM pointer
@ -338,43 +353,7 @@ static bool pte_empty(pte_t *pte)
*/ */
static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
{ {
pgd_t *pgd; unmap_range(kvm->arch.pgd, start, size);
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
phys_addr_t addr = start, end = start + size;
u64 range;
while (addr < end) {
pgd = kvm->arch.pgd + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
addr += PUD_SIZE;
continue;
}
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
addr += PMD_SIZE;
continue;
}
pte = pte_offset_kernel(pmd, addr);
clear_pte_entry(pte);
range = PAGE_SIZE;
/* If we emptied the pte, walk back up the ladder */
if (pte_empty(pte)) {
clear_pmd_entry(pmd);
range = PMD_SIZE;
if (pmd_empty(pmd)) {
clear_pud_entry(pud);
range = PUD_SIZE;
}
}
addr += range;
}
} }
/** /**
@ -741,7 +720,7 @@ int kvm_mmu_init(void)
return 0; return 0;
out: out:
kfree(hyp_pgd); free_hyp_pgds();
return err; return err;
} }