x86/xen: don't copy bogus duplicate entries into kernel page tables
When RANDOMIZE_BASE (KASLR) is enabled; or the sum of all loaded modules exceeds 512 MiB, then loading modules fails with a warning (and hence a vmalloc allocation failure) because the PTEs for the newly-allocated vmalloc address space are not zero. WARNING: CPU: 0 PID: 494 at linux/mm/vmalloc.c:128 vmap_page_range_noflush+0x2a1/0x360() This is caused by xen_setup_kernel_pagetables() copying level2_kernel_pgt into level2_fixmap_pgt, overwriting many non-present entries. Without KASLR, the normal kernel image size only covers the first half of level2_kernel_pgt and module space starts after that. L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[ 0..255]->kernel [256..511]->module [511]->level2_fixmap_pgt[ 0..505]->module This allows 512 MiB of of module vmalloc space to be used before having to use the corrupted level2_fixmap_pgt entries. With KASLR enabled, the kernel image uses the full PUD range of 1G and module space starts in the level2_fixmap_pgt. So basically: L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[0..511]->kernel [511]->level2_fixmap_pgt[0..505]->module And now no module vmalloc space can be used without using the corrupt level2_fixmap_pgt entries. Fix this by properly converting the level2_fixmap_pgt entries to MFNs, and setting level1_fixmap_pgt as read-only. A number of comments were also using the the wrong L3 offset for level2_kernel_pgt. These have been corrected. Signed-off-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: stable@vger.kernel.org
This commit is contained in:
parent
5903c6bd1a
commit
0b5a50635f
|
@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512];
|
||||||
extern pmd_t level2_kernel_pgt[512];
|
extern pmd_t level2_kernel_pgt[512];
|
||||||
extern pmd_t level2_fixmap_pgt[512];
|
extern pmd_t level2_fixmap_pgt[512];
|
||||||
extern pmd_t level2_ident_pgt[512];
|
extern pmd_t level2_ident_pgt[512];
|
||||||
|
extern pte_t level1_fixmap_pgt[512];
|
||||||
extern pgd_t init_level4_pgt[];
|
extern pgd_t init_level4_pgt[];
|
||||||
|
|
||||||
#define swapper_pg_dir init_level4_pgt
|
#define swapper_pg_dir init_level4_pgt
|
||||||
|
|
|
@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
|
||||||
*
|
*
|
||||||
* We can construct this by grafting the Xen provided pagetable into
|
* We can construct this by grafting the Xen provided pagetable into
|
||||||
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
|
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
|
||||||
* level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
|
* level2_ident_pgt, and level2_kernel_pgt. This means that only the
|
||||||
* means that only the kernel has a physical mapping to start with -
|
* kernel has a physical mapping to start with - but that's enough to
|
||||||
* but that's enough to get __va working. We need to fill in the rest
|
* get __va working. We need to fill in the rest of the physical
|
||||||
* of the physical mapping once some sort of allocator has been set
|
* mapping once some sort of allocator has been set up. NOTE: for
|
||||||
* up.
|
* PVH, the page tables are native.
|
||||||
* NOTE: for PVH, the page tables are native.
|
|
||||||
*/
|
*/
|
||||||
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||||
{
|
{
|
||||||
|
@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||||
/* L3_i[0] -> level2_ident_pgt */
|
/* L3_i[0] -> level2_ident_pgt */
|
||||||
convert_pfn_mfn(level3_ident_pgt);
|
convert_pfn_mfn(level3_ident_pgt);
|
||||||
/* L3_k[510] -> level2_kernel_pgt
|
/* L3_k[510] -> level2_kernel_pgt
|
||||||
* L3_i[511] -> level2_fixmap_pgt */
|
* L3_k[511] -> level2_fixmap_pgt */
|
||||||
convert_pfn_mfn(level3_kernel_pgt);
|
convert_pfn_mfn(level3_kernel_pgt);
|
||||||
|
|
||||||
|
/* L3_k[511][506] -> level1_fixmap_pgt */
|
||||||
|
convert_pfn_mfn(level2_fixmap_pgt);
|
||||||
}
|
}
|
||||||
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
|
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
|
||||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
|
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
|
||||||
|
@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||||
addr[1] = (unsigned long)l3;
|
addr[1] = (unsigned long)l3;
|
||||||
addr[2] = (unsigned long)l2;
|
addr[2] = (unsigned long)l2;
|
||||||
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
|
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
|
||||||
* Both L4[272][0] and L4[511][511] have entries that point to the same
|
* Both L4[272][0] and L4[511][510] have entries that point to the same
|
||||||
* L2 (PMD) tables. Meaning that if you modify it in __va space
|
* L2 (PMD) tables. Meaning that if you modify it in __va space
|
||||||
* it will be also modified in the __ka space! (But if you just
|
* it will be also modified in the __ka space! (But if you just
|
||||||
* modify the PMD table to point to other PTE's or none, then you
|
* modify the PMD table to point to other PTE's or none, then you
|
||||||
* are OK - which is what cleanup_highmap does) */
|
* are OK - which is what cleanup_highmap does) */
|
||||||
copy_page(level2_ident_pgt, l2);
|
copy_page(level2_ident_pgt, l2);
|
||||||
/* Graft it onto L4[511][511] */
|
/* Graft it onto L4[511][510] */
|
||||||
copy_page(level2_kernel_pgt, l2);
|
copy_page(level2_kernel_pgt, l2);
|
||||||
|
|
||||||
/* Get [511][510] and graft that in level2_fixmap_pgt */
|
|
||||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
|
|
||||||
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
|
|
||||||
copy_page(level2_fixmap_pgt, l2);
|
|
||||||
/* Note that we don't do anything with level1_fixmap_pgt which
|
|
||||||
* we don't need. */
|
|
||||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||||
/* Make pagetable pieces RO */
|
/* Make pagetable pieces RO */
|
||||||
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
||||||
|
@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||||
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
|
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
|
||||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||||
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
|
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
|
||||||
|
set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
|
||||||
|
|
||||||
/* Pin down new L4 */
|
/* Pin down new L4 */
|
||||||
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
|
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
|
||||||
|
|
Loading…
Reference in New Issue