From 1cdde19109901e8f1194e227d0bcd48caf713323 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: [PATCH 1/8] x86: fix sigcontext.h user export Jakub Jelinek reported that some user-space code that relies on kernel headers has built dependency on the sigcontext->eip/rip register names - which have been unified in commit: commit 742fa54a62be6a263df14a553bf832724471dfbe Author: H. Peter Anvin Date: Wed Jan 30 13:30:56 2008 +0100 x86: use generic register names in struct sigcontext so give the old layout to user-space. This is not particularly pretty, but it's an ABI so there's no danger of the two definitions getting out of sync. Reported-by: Jakub Jelinek Signed-off-by: Ingo Molnar --- include/asm-x86/sigcontext.h | 66 ++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 681deade5f00..d743947f4c77 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -58,6 +58,7 @@ struct _fpstate { #define X86_FXSR_MAGIC 0x0000 +#ifdef __KERNEL__ struct sigcontext { unsigned short gs, __gsh; unsigned short fs, __fsh; @@ -82,6 +83,35 @@ struct sigcontext { unsigned long oldmask; unsigned long cr2; }; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned long edi; + unsigned long esi; + unsigned long ebp; + unsigned long esp; + unsigned long ebx; + unsigned long edx; + unsigned long ecx; + unsigned long eax; + unsigned long trapno; + unsigned long err; + unsigned long eip; + unsigned short cs, __csh; + unsigned long eflags; + unsigned long esp_at_signal; + unsigned short ss, __ssh; + struct _fpstate __user * fpstate; + unsigned long oldmask; + unsigned long cr2; +}; +#endif /* !__KERNEL__ */ #else /* __i386__ */ @@ -102,6 +132,7 @@ struct _fpstate { __u32 reserved2[24]; }; +#ifdef __KERNEL__ struct sigcontext { unsigned long r8; unsigned long r9; @@ -132,6 +163,41 @@ struct sigcontext { struct _fpstate __user *fpstate; /* zero when no FPU context */ unsigned long reserved1[8]; }; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long rdi; + unsigned long rsi; + unsigned long rbp; + unsigned long rbx; + unsigned long rdx; + unsigned long rax; + unsigned long rcx; + unsigned long rsp; + unsigned long rip; + unsigned long eflags; /* RFLAGS */ + unsigned short cs; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; + unsigned long err; + unsigned long trapno; + unsigned long oldmask; + unsigned long cr2; + struct _fpstate __user *fpstate; /* zero when no FPU context */ + unsigned long reserved1[8]; +}; +#endif /* !__KERNEL__ */ #endif /* !__i386__ */ From 416e2d63794d4e57774989429e174507801915f2 Mon Sep 17 00:00:00 2001 From: Jody Belka Date: Tue, 12 Feb 2008 23:37:48 +0000 Subject: [PATCH 2/8] x86: fixup machine_ops reboot_{32|64}.c unification fallout When reboot_32.c and reboot_64.c were unified (commit 4d022e35fd...), the machine_ops code was broken, leading to xen pvops kernels failing to properly halt/poweroff/reboot etc. This fixes that up. Signed-off-by: Jody Belka Cc: Miguel Boton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 46 +++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5818dc28167d..7fd6ac43e4a1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -326,7 +326,7 @@ static inline void kb_wait(void) } } -void machine_emergency_restart(void) +static void native_machine_emergency_restart(void) { int i; @@ -376,7 +376,7 @@ void machine_emergency_restart(void) } } -void machine_shutdown(void) +static void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP @@ -420,7 +420,7 @@ void machine_shutdown(void) #endif } -void machine_restart(char *__unused) +static void native_machine_restart(char *__unused) { printk("machine restart\n"); @@ -429,11 +429,11 @@ void machine_restart(char *__unused) machine_emergency_restart(); } -void machine_halt(void) +static void native_machine_halt(void) { } -void machine_power_off(void) +static void native_machine_power_off(void) { if (pm_power_off) { if (!reboot_force) @@ -443,9 +443,35 @@ void machine_power_off(void) } struct machine_ops machine_ops = { - .power_off = machine_power_off, - .shutdown = machine_shutdown, - .emergency_restart = machine_emergency_restart, - .restart = machine_restart, - .halt = machine_halt + .power_off = native_machine_power_off, + .shutdown = native_machine_shutdown, + .emergency_restart = native_machine_emergency_restart, + .restart = native_machine_restart, + .halt = native_machine_halt }; + +void machine_power_off(void) +{ + machine_ops.power_off(); +} + +void machine_shutdown(void) +{ + machine_ops.shutdown(); +} + +void machine_emergency_restart(void) +{ + machine_ops.emergency_restart(); +} + +void machine_restart(char *cmd) +{ + machine_ops.restart(cmd); +} + +void machine_halt(void) +{ + machine_ops.halt(); +} + From 37cc8d7f963ba2deec29c9b68716944516a3244f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: [PATCH 3/8] x86/early_ioremap: don't assume we're using swapper_pg_dir At the early stages of boot, before the kernel pagetable has been fully initialized, a Xen kernel will still be running off the Xen-provided pagetables rather than swapper_pg_dir[]. Therefore, readback cr3 to determine the base of the pagetable rather than assuming swapper_pg_dir[]. Signed-off-by: Jeremy Fitzhardinge Tested-by: Jody Belka Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a4897a85268a..9f42d7e9c158 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -265,7 +265,9 @@ static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { - pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; + /* Don't assume we're using swapper_pg_dir at this point */ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(addr)]; pud_t *pud = pud_offset(pgd, addr); pmd_t *pmd = pmd_offset(pud, addr); From 2b5407811db755257ae53c75cc6b312ed5d2ad9e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: [PATCH 4/8] xen: unpin initial Xen pagetable once we're finished with it Unpin the Xen-provided pagetable once we've finished with it, so it doesn't cause stray references which cause later swapper_pg_dir pagetable updates to fail. Signed-off-by: Jeremy Fitzhardinge Tested-by: Jody Belka Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index de647bc6e74d..49e5358f481a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -798,6 +798,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base) * added to the table can be prepared properly for Xen. */ xen_write_cr3(__pa(base)); + + /* Unpin initial Xen pagetable */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(xen_start_info->pt_base))); } static __init void xen_pagetable_setup_done(pgd_t *base) From 5d3c8b21e22712137db6bbd246d1bdcbe0a09914 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: [PATCH 5/8] x86: CPA: fix gbpages support in try_preserve_large_page [ mingo@elte.hu: while gbpages cannot be enabled on mainline currently, keep the code uptodate and this fix is easy enough. ] Use correct page sizes and masks for GB pages in try_preserve_large_page() This prevents a boot hang on a GB capable system with CONFIG_DIRECT_GBPAGES enabled. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 440210a2277d..bd61ed13f9cf 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -275,8 +275,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, break; #ifdef CONFIG_X86_64 case PG_LEVEL_1G: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; + psize = PUD_PAGE_SIZE; + pmask = PUD_PAGE_MASK; break; #endif default: From e85f20518bb928667508c22090c85d458e25a4f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Feb 2008 19:46:48 +0100 Subject: [PATCH 6/8] x86: EFI: fix use of unitialized variable and the cache logic Andi Kleen pointed out that the cache attribute logic is reverse in efi_enter_virtual_mode(). This problem alone is harmless as we do not (yet) do cache attribute conflict resolution. (This bug was not present in the original EFI submission - I introduced it while fixing up rejects.) While reviewing this code I noticed a second, worse problem: the use of uninitialized md->virt_addr. Fix both problems. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 32dd62b36ff7..b4d523276f40 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -428,9 +428,6 @@ void __init efi_enter_virtual_mode(void) else va = efi_ioremap(md->phys_addr, size); - if (md->attribute & EFI_MEMORY_WB) - set_memory_uc(md->virt_addr, size); - md->virt_addr = (u64) (unsigned long) va; if (!va) { @@ -439,6 +436,9 @@ void __init efi_enter_virtual_mode(void) continue; } + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc(md->virt_addr, size); + systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; From 4de0d4a6d173351b023ab2855c3d331146a418e5 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Wed, 13 Feb 2008 17:22:41 +0800 Subject: [PATCH 7/8] x86: EFI runtime code mapping enhancement This patch enhances EFI runtime code memory mapping as following: - Move __supported_pte_mask & _PAGE_NX checking before invoking runtime_code_page_mkexec(). This makes it possible for compiler to eliminate runtime_code_page_mkexec() on machine without NX support. - Use set_memory_x/nx in early_mapping_set_exec(). This eliminates the duplicated implementation. This patch has been tested on Intel x86_64 platform with EFI64/32 firmware. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 6 ++---- arch/x86/kernel/efi_64.c | 30 ++++++++++++------------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index b4d523276f40..cbdf9bacc575 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -384,9 +384,6 @@ static void __init runtime_code_page_mkexec(void) efi_memory_desc_t *md; void *p; - if (!(__supported_pte_mask & _PAGE_NX)) - return; - /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; @@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void) efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; efi.reset_system = virt_efi_reset_system; efi.set_virtual_address_map = virt_efi_set_virtual_address_map; - runtime_code_page_mkexec(); + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; } diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 09d5c2330934..d143a1e76b30 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -35,6 +35,7 @@ #include #include #include +#include static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; @@ -43,22 +44,15 @@ static void __init early_mapping_set_exec(unsigned long start, unsigned long end, int executable) { - pte_t *kpte; - unsigned int level; + unsigned long num_pages; - while (start < end) { - kpte = lookup_address((unsigned long)__va(start), &level); - BUG_ON(!kpte); - if (executable) - set_pte(kpte, pte_mkexec(*kpte)); - else - set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \ - __supported_pte_mask)); - if (level == PG_LEVEL_4K) - start = (start + PAGE_SIZE) & PAGE_MASK; - else - start = (start + PMD_SIZE) & PMD_MASK; - } + start &= PMD_MASK; + end = (end + PMD_SIZE - 1) & PMD_MASK; + num_pages = (end - start) >> PAGE_SHIFT; + if (executable) + set_memory_x((unsigned long)__va(start), num_pages); + else + set_memory_nx((unsigned long)__va(start), num_pages); } static void __init early_runtime_code_mapping_set_exec(int executable) @@ -74,7 +68,7 @@ static void __init early_runtime_code_mapping_set_exec(int executable) md = p; if (md->type == EFI_RUNTIME_SERVICES_CODE) { unsigned long end; - end = md->phys_addr + (md->num_pages << PAGE_SHIFT); + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); early_mapping_set_exec(md->phys_addr, end, executable); } } @@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void) { unsigned long vaddress; - local_irq_save(efi_flags); early_runtime_code_mapping_set_exec(1); + local_irq_save(efi_flags); vaddress = (unsigned long)__va(0x0UL); save_pgd = *pgd_offset_k(0x0UL); set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); @@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void) * After the lock is released, the original page table is restored. */ set_pgd(pgd_offset_k(0x0UL), save_pgd); - early_runtime_code_mapping_set_exec(0); __flush_tlb_all(); local_irq_restore(efi_flags); + early_runtime_code_mapping_set_exec(0); } void __init efi_reserve_bootmem(void) From c2a9cc7e86cf535a4fa14aebf5c3bc3349d09603 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 12 Feb 2008 12:10:27 -0800 Subject: [PATCH 8/8] x86: pit_clockevent can be static arch/x86/kernel/i8253.c:98:27: warning: symbol 'pit_clockevent' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index ef62b07b2b48..8540abe86ade 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - * !using_apic_timer decisions in do_timer_interrupt_hook() */ -struct clock_event_device pit_clockevent = { +static struct clock_event_device pit_clockevent = { .name = "pit", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = init_pit_timer,