x86, mm: Hold mm->page_table_lock while doing vmalloc_sync
Take mm->page_table_lock while syncing the vmalloc region. This prevents a race with the Xen pagetable pin/unpin code, which expects that the page_table_lock is already held. If this race occurs, then Xen can see an inconsistent page type (a page can either be read/write or a pagetable page, and pin/unpin converts it between them), which will cause either the pin or the set_p[gm]d to fail; either will crash the kernel. vmalloc_sync_all() should be called rarely, so this extra use of page_table_lock should not interfere with its normal users. The mm pointer is stashed in the pgd page's index field, as that won't be otherwise used for pgds. Reported-by: Ian Campbell <ian.cambell@eu.citrix.com> Originally-by: Jan Beulich <jbeulich@novell.com> LKML-Reference: <4CB88A4C.1080305@goop.org> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
44235dcde4
commit
617d34d9e5
|
@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|||
extern spinlock_t pgd_lock;
|
||||
extern struct list_head pgd_list;
|
||||
|
||||
extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else /* !CONFIG_PARAVIRT */
|
||||
|
|
|
@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
|
|||
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
if (!vmalloc_sync_one(page_address(page), address))
|
||||
spinlock_t *pgt_lock;
|
||||
int ret;
|
||||
|
||||
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||
|
||||
spin_lock(pgt_lock);
|
||||
ret = vmalloc_sync_one(page_address(page), address);
|
||||
spin_unlock(pgt_lock);
|
||||
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
|
|
|
@ -116,12 +116,19 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
|||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
pgd_t *pgd;
|
||||
spinlock_t *pgt_lock;
|
||||
|
||||
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
||||
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||
spin_lock(pgt_lock);
|
||||
|
||||
if (pgd_none(*pgd))
|
||||
set_pgd(pgd, *pgd_ref);
|
||||
else
|
||||
BUG_ON(pgd_page_vaddr(*pgd)
|
||||
!= pgd_page_vaddr(*pgd_ref));
|
||||
|
||||
spin_unlock(pgt_lock);
|
||||
}
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
}
|
||||
|
|
|
@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
|
|||
#define UNSHARED_PTRS_PER_PGD \
|
||||
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
|
||||
|
||||
static void pgd_ctor(pgd_t *pgd)
|
||||
|
||||
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
|
||||
virt_to_page(pgd)->index = (pgoff_t)mm;
|
||||
}
|
||||
|
||||
struct mm_struct *pgd_page_get_mm(struct page *page)
|
||||
{
|
||||
return (struct mm_struct *)page->index;
|
||||
}
|
||||
|
||||
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
/* If the pgd points to a shared pagetable level (either the
|
||||
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
||||
|
@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd)
|
|||
}
|
||||
|
||||
/* list required to sync kernel mapping updates */
|
||||
if (!SHARED_KERNEL_PMD)
|
||||
if (!SHARED_KERNEL_PMD) {
|
||||
pgd_set_mm(pgd, mm);
|
||||
pgd_list_add(pgd);
|
||||
}
|
||||
}
|
||||
|
||||
static void pgd_dtor(pgd_t *pgd)
|
||||
|
@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||
*/
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
|
||||
pgd_ctor(pgd);
|
||||
pgd_ctor(mm, pgd);
|
||||
pgd_prepopulate_pmd(mm, pgd, pmds);
|
||||
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
|
|
Loading…
Reference in New Issue