x86/smpboot: Remove initial_stack on 64-bit
In order to facilitate parallel startup, start to eliminate some of the global variables passing information to CPUs in the startup path. However, start by introducing one more: smpboot_control. For now this merely holds the CPU# of the CPU which is coming up. Each CPU can then find its own per-cpu data, and everything else it needs can be found from there, allowing the other global variables to be removed. First to be removed is initial_stack. Each CPU can load %rsp from its current_task->thread.sp instead. That is already set up with the correct idle thread for APs. Set up the .sp field in INIT_THREAD on x86 so that the BSP also finds a suitable stack pointer in the static per-cpu data when coming up on first boot. On resume from S3, the CPU needs a temporary stack because its idle task is already active. Instead of setting initial_stack, the sleep code can simply set its own current->thread.sp to point to the temporary stack. Nobody else cares about ->thread.sp for a thread which is currently on a CPU, because the true value is actually in the %rsp register. Which is restored with the rest of the CPU context in do_suspend_lowlevel(). Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Usama Arif <usama.arif@bytedance.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Usama Arif <usama.arif@bytedance.com> Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> Link: https://lore.kernel.org/r/20230316222109.1940300-7-usama.arif@bytedance.com
This commit is contained in:
parent
cefad862f2
commit
3adee777ad
|
@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x)
|
||||||
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define INIT_THREAD { }
|
extern unsigned long __end_init_task[];
|
||||||
|
|
||||||
|
#define INIT_THREAD { \
|
||||||
|
.sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
|
||||||
|
}
|
||||||
|
|
||||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||||
|
|
||||||
|
|
|
@ -199,5 +199,8 @@ extern void nmi_selftest(void);
|
||||||
#define nmi_selftest() do { } while (0)
|
#define nmi_selftest() do { } while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
extern unsigned int smpboot_control;
|
||||||
|
|
||||||
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#endif /* _ASM_X86_SMP_H */
|
#endif /* _ASM_X86_SMP_H */
|
||||||
|
|
|
@ -111,13 +111,29 @@ int x86_acpi_suspend_lowlevel(void)
|
||||||
saved_magic = 0x12345678;
|
saved_magic = 0x12345678;
|
||||||
#else /* CONFIG_64BIT */
|
#else /* CONFIG_64BIT */
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
|
/*
|
||||||
|
* As each CPU starts up, it will find its own stack pointer
|
||||||
|
* from its current_task->thread.sp. Typically that will be
|
||||||
|
* the idle thread for a newly-started AP, or even the boot
|
||||||
|
* CPU which will find it set to &init_task in the static
|
||||||
|
* per-cpu data.
|
||||||
|
*
|
||||||
|
* Make the resuming CPU use the temporary stack at startup
|
||||||
|
* by setting current->thread.sp to point to that. The true
|
||||||
|
* %rsp will be restored with the rest of the CPU context,
|
||||||
|
* by do_suspend_lowlevel(). And unwinders don't care about
|
||||||
|
* the abuse of ->thread.sp because it's a dead variable
|
||||||
|
* while the thread is running on the CPU anyway; the true
|
||||||
|
* value is in the actual %rsp register.
|
||||||
|
*/
|
||||||
|
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
|
||||||
early_gdt_descr.address =
|
early_gdt_descr.address =
|
||||||
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
|
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
|
||||||
initial_gs = per_cpu_offset(smp_processor_id());
|
initial_gs = per_cpu_offset(smp_processor_id());
|
||||||
|
smpboot_control = smp_processor_id();
|
||||||
#endif
|
#endif
|
||||||
initial_code = (unsigned long)wakeup_long64;
|
initial_code = (unsigned long)wakeup_long64;
|
||||||
saved_magic = 0x123456789abcdef0L;
|
saved_magic = 0x123456789abcdef0L;
|
||||||
#endif /* CONFIG_64BIT */
|
#endif /* CONFIG_64BIT */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -115,6 +115,7 @@ static void __used common(void)
|
||||||
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
|
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
|
||||||
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
|
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
|
||||||
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
|
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
|
||||||
|
OFFSET(X86_current_task, pcpu_hot, current_task);
|
||||||
#ifdef CONFIG_CALL_DEPTH_TRACKING
|
#ifdef CONFIG_CALL_DEPTH_TRACKING
|
||||||
OFFSET(X86_call_depth, pcpu_hot, call_depth);
|
OFFSET(X86_call_depth, pcpu_hot, call_depth);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -61,8 +61,8 @@ SYM_CODE_START_NOALIGN(startup_64)
|
||||||
* tables and then reload them.
|
* tables and then reload them.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Set up the stack for verify_cpu(), similar to initial_stack below */
|
/* Set up the stack for verify_cpu() */
|
||||||
leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
|
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
|
||||||
|
|
||||||
leaq _text(%rip), %rdi
|
leaq _text(%rip), %rdi
|
||||||
|
|
||||||
|
@ -241,6 +241,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
|
||||||
UNWIND_HINT_EMPTY
|
UNWIND_HINT_EMPTY
|
||||||
ANNOTATE_NOENDBR // above
|
ANNOTATE_NOENDBR // above
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
movl smpboot_control(%rip), %ecx
|
||||||
|
|
||||||
|
/* Get the per cpu offset for the given CPU# which is in ECX */
|
||||||
|
movq __per_cpu_offset(,%rcx,8), %rdx
|
||||||
|
#else
|
||||||
|
xorl %edx, %edx /* zero-extended to clear all of RDX */
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup a boot time stack - Any secondary CPU will have lost its stack
|
||||||
|
* by now because the cr3-switch above unmaps the real-mode stack.
|
||||||
|
*
|
||||||
|
* RDX contains the per-cpu offset
|
||||||
|
*/
|
||||||
|
movq pcpu_hot + X86_current_task(%rdx), %rax
|
||||||
|
movq TASK_threadsp(%rax), %rsp
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must switch to a new descriptor in kernel space for the GDT
|
* We must switch to a new descriptor in kernel space for the GDT
|
||||||
* because soon the kernel won't have access anymore to the userspace
|
* because soon the kernel won't have access anymore to the userspace
|
||||||
|
@ -275,12 +293,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
|
||||||
movl initial_gs+4(%rip),%edx
|
movl initial_gs+4(%rip),%edx
|
||||||
wrmsr
|
wrmsr
|
||||||
|
|
||||||
/*
|
|
||||||
* Setup a boot time stack - Any secondary CPU will have lost its stack
|
|
||||||
* by now because the cr3-switch above unmaps the real-mode stack
|
|
||||||
*/
|
|
||||||
movq initial_stack(%rip), %rsp
|
|
||||||
|
|
||||||
/* Setup and Load IDT */
|
/* Setup and Load IDT */
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
call early_setup_idt
|
call early_setup_idt
|
||||||
|
@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64)
|
||||||
SYM_CODE_START(start_cpu0)
|
SYM_CODE_START(start_cpu0)
|
||||||
ANNOTATE_NOENDBR
|
ANNOTATE_NOENDBR
|
||||||
UNWIND_HINT_EMPTY
|
UNWIND_HINT_EMPTY
|
||||||
movq initial_stack(%rip), %rsp
|
|
||||||
|
/* Find the idle task stack */
|
||||||
|
movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
|
||||||
|
movq TASK_threadsp(%rcx), %rsp
|
||||||
|
|
||||||
jmp .Ljump_to_C_code
|
jmp .Ljump_to_C_code
|
||||||
SYM_CODE_END(start_cpu0)
|
SYM_CODE_END(start_cpu0)
|
||||||
#endif
|
#endif
|
||||||
|
@ -420,12 +436,6 @@ SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
|
||||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||||
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
|
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
|
|
||||||
* reliably detect the end of the stack.
|
|
||||||
*/
|
|
||||||
SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
|
|
||||||
__FINITDATA
|
__FINITDATA
|
||||||
|
|
||||||
__INIT
|
__INIT
|
||||||
|
@ -660,6 +670,9 @@ SYM_DATA_END(level1_fixmap_pgt)
|
||||||
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
|
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
|
||||||
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
|
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
|
||||||
|
|
||||||
|
.align 16
|
||||||
|
SYM_DATA(smpboot_control, .long 0)
|
||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
/* This must match the first entry in level2_kernel_pgt */
|
/* This must match the first entry in level2_kernel_pgt */
|
||||||
SYM_DATA(phys_base, .quad 0x0)
|
SYM_DATA(phys_base, .quad 0x0)
|
||||||
|
|
|
@ -1088,7 +1088,12 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
|
||||||
idle->thread.sp = (unsigned long)task_pt_regs(idle);
|
idle->thread.sp = (unsigned long)task_pt_regs(idle);
|
||||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
|
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
|
||||||
initial_code = (unsigned long)start_secondary;
|
initial_code = (unsigned long)start_secondary;
|
||||||
initial_stack = idle->thread.sp;
|
|
||||||
|
if (IS_ENABLED(CONFIG_X86_32)) {
|
||||||
|
initial_stack = idle->thread.sp;
|
||||||
|
} else {
|
||||||
|
smpboot_control = cpu;
|
||||||
|
}
|
||||||
|
|
||||||
/* Enable the espfix hack for this CPU */
|
/* Enable the espfix hack for this CPU */
|
||||||
init_espfix_ap(cpu);
|
init_espfix_ap(cpu);
|
||||||
|
|
|
@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
|
||||||
ANNOTATE_NOENDBR
|
ANNOTATE_NOENDBR
|
||||||
cld
|
cld
|
||||||
|
|
||||||
mov initial_stack(%rip), %rsp
|
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
|
||||||
|
|
||||||
/* Set up %gs.
|
/* Set up %gs.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue