From 20d5e4a9cd453991e2590a4c25230a99b42dee0c Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:19 +0800 Subject: [PATCH] x86/espfix: Init espfix on the boot CPU side As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called before we enable local irqs, so the lockdep sub-system would (correctly) trigger a warning about the potentially blocking API. So we allocate them on the boot CPU side when the secondary CPU is brought up by the boot CPU, and hand them over to the secondary CPU. And we use alloc_pages_node() with the secondary CPU's node, to make sure the espfix stack is NUMA-local to the CPU that is going to use it. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/espfix_64.c | 21 +++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 1fb0e0003c94..ce95676abd60 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -141,12 +141,12 @@ void init_espfix_ap(int cpu) pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; - int n; + int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ - if (likely(this_cpu_read(espfix_stack))) + if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ addr = espfix_base_addr(cpu); @@ -164,12 +164,15 @@ void init_espfix_ap(int cpu) if (stack_page) goto unlock_done; + node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) @@ -179,7 +182,9 @@ void init_espfix_ap(int cpu) pmd_p = pmd_offset(&pud, addr); pmd = *pmd_p; if (!pmd_present(pmd)) { - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) @@ -187,7 +192,7 @@ void init_espfix_ap(int cpu) } pte_p = pte_offset_kernel(&pmd, addr); - stack_page = (void *)__get_free_page(GFP_KERNEL); + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); @@ -198,7 +203,7 @@ void init_espfix_ap(int cpu) unlock_done: mutex_unlock(&espfix_init_mutex); done: - this_cpu_write(espfix_stack, addr); - this_cpu_write(espfix_waddr, (unsigned long)stack_page - + (addr & ~PAGE_MASK)); + per_cpu(espfix_stack, cpu) = addr; + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page + + (addr & ~PAGE_MASK); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f5abac6c28b..0bd8c1d507b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -238,13 +238,6 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(smp_processor_id()); -#endif - /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding @@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(cpu); +#endif + /* So we see what's up */ announce_cpu(cpu, apicid);