x86, 64-bit: adjust mapping of physical pagetables to work with Xen
This makes a few of changes to the construction of the initial pagetables to work better with paravirt_ops/Xen. The main areas are: 1. Support non-PSE mapping of memory, since Xen doesn't currently allow 2M pages to be mapped in guests. 2. Make sure that the ioremap alias of all pages are dropped before attaching the new page to the pagetable. This avoids having writable aliases of pagetable pages. 3. Preserve existing pagetable entries, rather than overwriting. Its possible that a fair amount of pagetable has already been constructed, so reuse what's already in place rather than ignoring and overwriting it. The algorithm relies on the invariant that any page which is part of the kernel pagetable is itself mapped in the linear memory area. This way, it can avoid using ioremap on a pagetable page. The invariant holds because it maps memory from low to high addresses, and also allocates memory from low to high. Each allocated page can map at least 2M of address space, so the mapped area will always progress much faster than the allocated area. It relies on the early boot code mapping enough pages to get started. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
f97013fd8f
commit
4f9c11dd49
|
@ -253,6 +253,43 @@ static __meminit void unmap_low_page(void *adr)
|
||||||
early_iounmap(adr, PAGE_SIZE);
|
early_iounmap(adr, PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __meminit
|
||||||
|
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
|
||||||
|
{
|
||||||
|
unsigned pages = 0;
|
||||||
|
int i;
|
||||||
|
pte_t *pte = pte_page + pte_index(addr);
|
||||||
|
|
||||||
|
for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
|
||||||
|
|
||||||
|
if (addr >= end) {
|
||||||
|
if (!after_bootmem) {
|
||||||
|
for(; i < PTRS_PER_PTE; i++, pte++)
|
||||||
|
set_pte(pte, __pte(0));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pte_val(*pte))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (0)
|
||||||
|
printk(" pte=%p addr=%lx pte=%016lx\n",
|
||||||
|
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
||||||
|
set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
|
||||||
|
pages++;
|
||||||
|
}
|
||||||
|
update_page_count(PG_LEVEL_4K, pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __meminit
|
||||||
|
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
|
||||||
|
{
|
||||||
|
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||||
|
|
||||||
|
phys_pte_init(pte, address, end);
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned long __meminit
|
static unsigned long __meminit
|
||||||
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
||||||
{
|
{
|
||||||
|
@ -261,7 +298,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
||||||
int i = pmd_index(address);
|
int i = pmd_index(address);
|
||||||
|
|
||||||
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
|
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
|
||||||
|
unsigned long pte_phys;
|
||||||
pmd_t *pmd = pmd_page + pmd_index(address);
|
pmd_t *pmd = pmd_page + pmd_index(address);
|
||||||
|
pte_t *pte;
|
||||||
|
|
||||||
if (address >= end) {
|
if (address >= end) {
|
||||||
if (!after_bootmem) {
|
if (!after_bootmem) {
|
||||||
|
@ -271,12 +310,23 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pmd_val(*pmd))
|
if (pmd_val(*pmd)) {
|
||||||
|
phys_pte_update(pmd, address, end);
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
pages++;
|
if (cpu_has_pse) {
|
||||||
set_pte((pte_t *)pmd,
|
pages++;
|
||||||
pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
set_pte((pte_t *)pmd,
|
||||||
|
pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
pte = alloc_low_page(&pte_phys);
|
||||||
|
phys_pte_init(pte, address, end);
|
||||||
|
unmap_low_page(pte);
|
||||||
|
|
||||||
|
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
|
||||||
}
|
}
|
||||||
update_page_count(PG_LEVEL_2M, pages);
|
update_page_count(PG_LEVEL_2M, pages);
|
||||||
return address;
|
return address;
|
||||||
|
@ -333,11 +383,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
|
||||||
pmd = alloc_low_page(&pmd_phys);
|
pmd = alloc_low_page(&pmd_phys);
|
||||||
|
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
pud_populate(&init_mm, pud, __va(pmd_phys));
|
|
||||||
last_map_addr = phys_pmd_init(pmd, addr, end);
|
last_map_addr = phys_pmd_init(pmd, addr, end);
|
||||||
|
unmap_low_page(pmd);
|
||||||
|
pud_populate(&init_mm, pud, __va(pmd_phys));
|
||||||
spin_unlock(&init_mm.page_table_lock);
|
spin_unlock(&init_mm.page_table_lock);
|
||||||
|
|
||||||
unmap_low_page(pmd);
|
|
||||||
}
|
}
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
update_page_count(PG_LEVEL_1G, pages);
|
update_page_count(PG_LEVEL_1G, pages);
|
||||||
|
@ -345,16 +395,30 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long __meminit
|
||||||
|
phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
|
||||||
|
{
|
||||||
|
pud_t *pud;
|
||||||
|
|
||||||
|
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||||
|
|
||||||
|
return phys_pud_init(pud, addr, end);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init find_early_table_space(unsigned long end)
|
static void __init find_early_table_space(unsigned long end)
|
||||||
{
|
{
|
||||||
unsigned long puds, pmds, tables, start;
|
unsigned long puds, tables, start;
|
||||||
|
|
||||||
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
||||||
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
|
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
|
||||||
if (!direct_gbpages) {
|
if (!direct_gbpages) {
|
||||||
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
||||||
tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
|
tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
if (!cpu_has_pse) {
|
||||||
|
unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||||
|
tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RED-PEN putting page tables only on node 0 could
|
* RED-PEN putting page tables only on node 0 could
|
||||||
|
@ -526,19 +590,25 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
|
||||||
unsigned long pud_phys;
|
unsigned long pud_phys;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
|
|
||||||
|
next = start + PGDIR_SIZE;
|
||||||
|
if (next > end)
|
||||||
|
next = end;
|
||||||
|
|
||||||
|
if (pgd_val(*pgd)) {
|
||||||
|
last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (after_bootmem)
|
if (after_bootmem)
|
||||||
pud = pud_offset(pgd, start & PGDIR_MASK);
|
pud = pud_offset(pgd, start & PGDIR_MASK);
|
||||||
else
|
else
|
||||||
pud = alloc_low_page(&pud_phys);
|
pud = alloc_low_page(&pud_phys);
|
||||||
|
|
||||||
next = start + PGDIR_SIZE;
|
|
||||||
if (next > end)
|
|
||||||
next = end;
|
|
||||||
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
|
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
|
||||||
|
unmap_low_page(pud);
|
||||||
if (!after_bootmem)
|
if (!after_bootmem)
|
||||||
pgd_populate(&init_mm, pgd_offset_k(start),
|
pgd_populate(&init_mm, pgd_offset_k(start),
|
||||||
__va(pud_phys));
|
__va(pud_phys));
|
||||||
unmap_low_page(pud);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!after_bootmem)
|
if (!after_bootmem)
|
||||||
|
|
|
@ -485,7 +485,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
|
||||||
if (pgprot_val(flags))
|
if (pgprot_val(flags))
|
||||||
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
|
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
|
||||||
else
|
else
|
||||||
pte_clear(NULL, addr, pte);
|
pte_clear(&init_mm, addr, pte);
|
||||||
__flush_tlb_one(addr);
|
__flush_tlb_one(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue