x86/espfix: Init espfix on the boot CPU side

As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called before we enable local irqs, so the lockdep sub-system would (correctly) trigger a warning about the potentially blocking API. So we allocate them on the boot CPU side when the secondary CPU is brought up by the boot CPU, and hand them over to the secondary CPU. And we use alloc_pages_node() with the secondary CPU's node, to make sure the espfix stack is NUMA-local to the CPU that is going to use it. Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com> Cc: <bp@alien8.de> Cc: <luto@amacapital.net> Cc: <luto@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-07-03 17:37:19 +08:00 · 2015-07-03 17:37:19 +08:00 · 20d5e4a9cd
parent 1db875631f
commit 20d5e4a9cd
2 changed files with 20 additions and 15 deletions
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@ -141,12 +141,12 @@ void init_espfix_ap(int cpu)
 	pud_t pud, *pud_p;
 	pmd_t pmd, *pmd_p;
 	pte_t pte, *pte_p;
-	int n;
+	int n, node;
 	void *stack_page;
 	pteval_t ptemask;
 	/* We only have to do this once... */
-	if (likely(this_cpu_read(espfix_stack)))
+	if (likely(per_cpu(espfix_stack, cpu)))
 		return;		/* Already initialized */
 	addr = espfix_base_addr(cpu);
@ -164,12 +164,15 @@ void init_espfix_ap(int cpu)
 	if (stack_page)
 		goto unlock_done;
 	node = cpu_to_node(cpu);
 	ptemask = __supported_pte_mask;
 	pud_p = &espfix_pud_page[pud_index(addr)];
 	pud = *pud_p;
 	if (!pud_present(pud)) {
-		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
 		pmd_p = (pmd_t *)page_address(page);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@ -179,7 +182,9 @@ void init_espfix_ap(int cpu)
 	pmd_p = pmd_offset(&pud, addr);
 	pmd = *pmd_p;
 	if (!pmd_present(pmd)) {
-		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
 		pte_p = (pte_t *)page_address(page);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@ -187,7 +192,7 @@ void init_espfix_ap(int cpu)
 	}
 	pte_p = pte_offset_kernel(&pmd, addr);
-	stack_page = (void *)__get_free_page(GFP_KERNEL);
+	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
@ -198,7 +203,7 @@ void init_espfix_ap(int cpu)
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
 done:
-	this_cpu_write(espfix_stack, addr);
+	per_cpu(espfix_stack, cpu) = addr;
-	this_cpu_write(espfix_waddr, (unsigned long)stack_page
+	per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
-		       + (addr & ~PAGE_MASK));
+				      + (addr & ~PAGE_MASK);
 }
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@ -238,13 +238,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	check_tsc_sync_target();
 	/*
 	 * Enable the espfix hack for this CPU
 	 */
 #ifdef CONFIG_X86_ESPFIX64
 	init_espfix_ap(smp_processor_id());
 #endif
 	/*
 	 * We need to hold vector_lock so there the set of online cpus
 	 * does not change while we are assigning vectors to cpus.  Holding
@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
 	/*
 	 * Enable the espfix hack for this CPU
 	*/
 #ifdef CONFIG_X86_ESPFIX64
 	init_espfix_ap(cpu);
 #endif
 	/* So we see what's up */
 	announce_cpu(cpu, apicid);