Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Various vDSO updates from Andy Lutomirski, mostly cleanups and reorganization to improve maintainability, but also some micro-optimizations and robustization changes" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64/vsyscall: Restore orig_ax after vsyscall seccomp x86_64: Add a comment explaining the TASK_SIZE_MAX guard page x86_64,vsyscall: Make vsyscall emulation configurable x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none x86,vdso: Use LSL unconditionally for vgetcpu x86: vdso: Fix build with older gcc x86_64/vdso: Clean up vgetcpu init and merge the vdso initcalls x86_64/vdso: Remove jiffies from the vvar page x86/vdso: Make the PER_CPU segment 32 bits x86/vdso: Make the PER_CPU segment start out accessed x86/vdso: Change the PER_CPU segment to use struct desc_struct x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c
This commit is contained in:
commit
3100e448e7
|
@ -992,6 +992,24 @@ config X86_ESPFIX64
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on X86_16BIT && X86_64
|
depends on X86_16BIT && X86_64
|
||||||
|
|
||||||
|
config X86_VSYSCALL_EMULATION
|
||||||
|
bool "Enable vsyscall emulation" if EXPERT
|
||||||
|
default y
|
||||||
|
depends on X86_64
|
||||||
|
---help---
|
||||||
|
This enables emulation of the legacy vsyscall page. Disabling
|
||||||
|
it is roughly equivalent to booting with vsyscall=none, except
|
||||||
|
that it will also disable the helpful warning if a program
|
||||||
|
tries to use a vsyscall. With this option set to N, offending
|
||||||
|
programs will just segfault, citing addresses of the form
|
||||||
|
0xffffffffff600?00.
|
||||||
|
|
||||||
|
This option is required by many programs built before 2013, and
|
||||||
|
care should be used even with newer programs if set to N.
|
||||||
|
|
||||||
|
Disabling this option saves about 7K of kernel size and
|
||||||
|
possibly 4K of additional runtime pagetable memory.
|
||||||
|
|
||||||
config TOSHIBA
|
config TOSHIBA
|
||||||
tristate "Toshiba Laptop support"
|
tristate "Toshiba Laptop support"
|
||||||
depends on X86_32
|
depends on X86_32
|
||||||
|
|
|
@ -69,7 +69,9 @@ enum fixed_addresses {
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
FIX_HOLE,
|
FIX_HOLE,
|
||||||
#else
|
#else
|
||||||
|
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||||
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
|
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||||
PVCLOCK_FIXMAP_BEGIN,
|
PVCLOCK_FIXMAP_BEGIN,
|
||||||
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
|
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
|
||||||
|
|
|
@ -39,6 +39,8 @@ void copy_page(void *to, void *from);
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#define __HAVE_ARCH_GATE_AREA 1
|
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||||
|
# define __HAVE_ARCH_GATE_AREA 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_X86_PAGE_64_H */
|
#endif /* _ASM_X86_PAGE_64_H */
|
||||||
|
|
|
@ -894,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* User space process size. 47bits minus one guard page.
|
* User space process size. 47bits minus one guard page. The guard
|
||||||
|
* page is necessary on Intel CPUs: if a SYSCALL instruction is at
|
||||||
|
* the highest possible canonical userspace address, then that
|
||||||
|
* syscall will enter the kernel with a non-canonical return
|
||||||
|
* address, and SYSRET will explode dangerously. We avoid this
|
||||||
|
* particular problem by preventing anything from being mapped
|
||||||
|
* at the maximum canonical address.
|
||||||
*/
|
*/
|
||||||
#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
|
#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
|
||||||
|
|
||||||
|
|
|
@ -70,4 +70,23 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s)
|
||||||
++s->seq;
|
++s->seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
|
#define VGETCPU_CPU_MASK 0xfff
|
||||||
|
|
||||||
|
static inline unsigned int __getcpu(void)
|
||||||
|
{
|
||||||
|
unsigned int p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load per CPU data from GDT. LSL is faster than RDTSCP and
|
||||||
|
* works on all CPUs.
|
||||||
|
*/
|
||||||
|
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
||||||
#endif /* _ASM_X86_VGTOD_H */
|
#endif /* _ASM_X86_VGTOD_H */
|
||||||
|
|
|
@ -4,15 +4,7 @@
|
||||||
#include <linux/seqlock.h>
|
#include <linux/seqlock.h>
|
||||||
#include <uapi/asm/vsyscall.h>
|
#include <uapi/asm/vsyscall.h>
|
||||||
|
|
||||||
#define VGETCPU_RDTSCP 1
|
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||||
#define VGETCPU_LSL 2
|
|
||||||
|
|
||||||
/* kernel space (writeable) */
|
|
||||||
extern int vgetcpu_mode;
|
|
||||||
extern struct timezone sys_tz;
|
|
||||||
|
|
||||||
#include <asm/vvar.h>
|
|
||||||
|
|
||||||
extern void map_vsyscall(void);
|
extern void map_vsyscall(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -20,25 +12,12 @@ extern void map_vsyscall(void);
|
||||||
* Returns true if handled.
|
* Returns true if handled.
|
||||||
*/
|
*/
|
||||||
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
|
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
|
||||||
|
#else
|
||||||
#ifdef CONFIG_X86_64
|
static inline void map_vsyscall(void) {}
|
||||||
|
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||||
#define VGETCPU_CPU_MASK 0xfff
|
|
||||||
|
|
||||||
static inline unsigned int __getcpu(void)
|
|
||||||
{
|
{
|
||||||
unsigned int p;
|
return false;
|
||||||
|
|
||||||
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
|
|
||||||
/* Load per CPU data from RDTSCP */
|
|
||||||
native_read_tscp(&p);
|
|
||||||
} else {
|
|
||||||
/* Load per CPU data from GDT */
|
|
||||||
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
|
||||||
}
|
|
||||||
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_X86_VSYSCALL_H */
|
#endif /* _ASM_X86_VSYSCALL_H */
|
||||||
|
|
|
@ -44,8 +44,6 @@ extern char __vvar_page;
|
||||||
|
|
||||||
/* DECLARE_VVAR(offset, type, name) */
|
/* DECLARE_VVAR(offset, type, name) */
|
||||||
|
|
||||||
DECLARE_VVAR(0, volatile unsigned long, jiffies)
|
|
||||||
DECLARE_VVAR(16, int, vgetcpu_mode)
|
|
||||||
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
|
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
|
||||||
|
|
||||||
#undef DECLARE_VVAR
|
#undef DECLARE_VVAR
|
||||||
|
|
|
@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
|
||||||
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||||
obj-$(CONFIG_X86_64) += mcount_64.o
|
obj-$(CONFIG_X86_64) += mcount_64.o
|
||||||
obj-y += syscall_$(BITS).o vsyscall_gtod.o
|
obj-y += syscall_$(BITS).o vsyscall_gtod.o
|
||||||
obj-$(CONFIG_X86_64) += vsyscall_64.o
|
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
|
||||||
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
|
|
||||||
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
|
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
|
||||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||||
obj-y += bootflag.o e820.o
|
obj-y += bootflag.o e820.o
|
||||||
|
|
|
@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
static void vgetcpu_set_mode(void)
|
|
||||||
{
|
|
||||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
|
|
||||||
vgetcpu_mode = VGETCPU_RDTSCP;
|
|
||||||
else
|
|
||||||
vgetcpu_mode = VGETCPU_LSL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_IA32_EMULATION
|
#ifdef CONFIG_IA32_EMULATION
|
||||||
/* May not be __init: called during resume */
|
/* May not be __init: called during resume */
|
||||||
static void syscall32_cpu_init(void)
|
static void syscall32_cpu_init(void)
|
||||||
|
@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void)
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
sysenter_setup();
|
sysenter_setup();
|
||||||
enable_sep_cpu();
|
enable_sep_cpu();
|
||||||
#else
|
|
||||||
vgetcpu_set_mode();
|
|
||||||
#endif
|
#endif
|
||||||
cpu_detect_tlb(&boot_cpu_data);
|
cpu_detect_tlb(&boot_cpu_data);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p)
|
||||||
|
|
||||||
tboot_probe();
|
tboot_probe();
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
map_vsyscall();
|
map_vsyscall();
|
||||||
#endif
|
|
||||||
|
|
||||||
generic_apic_probe();
|
generic_apic_probe();
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include <asm/time.h>
|
#include <asm/time.h>
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
|
__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned long profile_pc(struct pt_regs *regs)
|
unsigned long profile_pc(struct pt_regs *regs)
|
||||||
|
|
|
@ -1,59 +1,43 @@
|
||||||
/*
|
/*
|
||||||
|
* Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
|
||||||
|
*
|
||||||
|
* Based on the original implementation which is:
|
||||||
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
|
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
|
||||||
* Copyright 2003 Andi Kleen, SuSE Labs.
|
* Copyright 2003 Andi Kleen, SuSE Labs.
|
||||||
*
|
*
|
||||||
* [ NOTE: this mechanism is now deprecated in favor of the vDSO. ]
|
* Parts of the original code have been moved to arch/x86/vdso/vma.c
|
||||||
*
|
*
|
||||||
* Thanks to hpa@transmeta.com for some useful hint.
|
* This file implements vsyscall emulation. vsyscalls are a legacy ABI:
|
||||||
* Special thanks to Ingo Molnar for his early experience with
|
* Userspace can request certain kernel services by calling fixed
|
||||||
* a different vsyscall implementation for Linux/IA32 and for the name.
|
* addresses. This concept is problematic:
|
||||||
*
|
*
|
||||||
* vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
|
* - It interferes with ASLR.
|
||||||
* at virtual address -10Mbyte+1024bytes etc... There are at max 4
|
* - It's awkward to write code that lives in kernel addresses but is
|
||||||
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid
|
* callable by userspace at fixed addresses.
|
||||||
* jumping out of line if necessary. We cannot add more with this
|
* - The whole concept is impossible for 32-bit compat userspace.
|
||||||
* mechanism because older kernels won't return -ENOSYS.
|
* - UML cannot easily virtualize a vsyscall.
|
||||||
*
|
*
|
||||||
* Note: the concept clashes with user mode linux. UML users should
|
* As of mid-2014, I believe that there is no new userspace code that
|
||||||
* use the vDSO.
|
* will use a vsyscall if the vDSO is present. I hope that there will
|
||||||
|
* soon be no new userspace code that will ever use a vsyscall.
|
||||||
|
*
|
||||||
|
* The code in this file emulates vsyscalls when notified of a page
|
||||||
|
* fault to a vsyscall address.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
||||||
|
|
||||||
#include <linux/time.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/timer.h>
|
#include <linux/timer.h>
|
||||||
#include <linux/seqlock.h>
|
|
||||||
#include <linux/jiffies.h>
|
|
||||||
#include <linux/sysctl.h>
|
|
||||||
#include <linux/topology.h>
|
|
||||||
#include <linux/timekeeper_internal.h>
|
|
||||||
#include <linux/getcpu.h>
|
|
||||||
#include <linux/cpu.h>
|
|
||||||
#include <linux/smp.h>
|
|
||||||
#include <linux/notifier.h>
|
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
|
||||||
#include <asm/vsyscall.h>
|
#include <asm/vsyscall.h>
|
||||||
#include <asm/pgtable.h>
|
|
||||||
#include <asm/compat.h>
|
|
||||||
#include <asm/page.h>
|
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <asm/errno.h>
|
|
||||||
#include <asm/io.h>
|
|
||||||
#include <asm/segment.h>
|
|
||||||
#include <asm/desc.h>
|
|
||||||
#include <asm/topology.h>
|
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "vsyscall_trace.h"
|
#include "vsyscall_trace.h"
|
||||||
|
|
||||||
DEFINE_VVAR(int, vgetcpu_mode);
|
|
||||||
|
|
||||||
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
|
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
|
||||||
|
|
||||||
static int __init vsyscall_setup(char *str)
|
static int __init vsyscall_setup(char *str)
|
||||||
|
@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||||
"seccomp tried to change syscall nr or ip");
|
"seccomp tried to change syscall nr or ip");
|
||||||
do_exit(SIGSYS);
|
do_exit(SIGSYS);
|
||||||
}
|
}
|
||||||
|
regs->orig_ax = -1;
|
||||||
if (tmp)
|
if (tmp)
|
||||||
goto do_ret; /* skip requested */
|
goto do_ret; /* skip requested */
|
||||||
|
|
||||||
|
@ -284,46 +269,54 @@ sigsegv:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Assume __initcall executes before all user space. Hopefully kmod
|
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
|
||||||
* doesn't violate that. We'll find out if it does.
|
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
|
||||||
|
* not need special handling anymore:
|
||||||
*/
|
*/
|
||||||
static void vsyscall_set_cpu(int cpu)
|
static const char *gate_vma_name(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
unsigned long d;
|
return "[vsyscall]";
|
||||||
unsigned long node = 0;
|
}
|
||||||
#ifdef CONFIG_NUMA
|
static struct vm_operations_struct gate_vma_ops = {
|
||||||
node = cpu_to_node(cpu);
|
.name = gate_vma_name,
|
||||||
|
};
|
||||||
|
static struct vm_area_struct gate_vma = {
|
||||||
|
.vm_start = VSYSCALL_ADDR,
|
||||||
|
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
|
||||||
|
.vm_page_prot = PAGE_READONLY_EXEC,
|
||||||
|
.vm_flags = VM_READ | VM_EXEC,
|
||||||
|
.vm_ops = &gate_vma_ops,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_IA32_EMULATION
|
||||||
|
if (!mm || mm->context.ia32_compat)
|
||||||
|
return NULL;
|
||||||
#endif
|
#endif
|
||||||
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
if (vsyscall_mode == NONE)
|
||||||
write_rdtscp_aux((node << 12) | cpu);
|
return NULL;
|
||||||
|
return &gate_vma;
|
||||||
/*
|
|
||||||
* Store cpu number in limit so that it can be loaded quickly
|
|
||||||
* in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
|
|
||||||
*/
|
|
||||||
d = 0x0f40000000000ULL;
|
|
||||||
d |= cpu;
|
|
||||||
d |= (node & 0xf) << 12;
|
|
||||||
d |= (node >> 4) << 48;
|
|
||||||
|
|
||||||
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cpu_vsyscall_init(void *arg)
|
int in_gate_area(struct mm_struct *mm, unsigned long addr)
|
||||||
{
|
{
|
||||||
/* preemption should be already off */
|
struct vm_area_struct *vma = get_gate_vma(mm);
|
||||||
vsyscall_set_cpu(raw_smp_processor_id());
|
|
||||||
|
if (!vma)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return (addr >= vma->vm_start) && (addr < vma->vm_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
/*
|
||||||
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
* Use this when you have no reliable mm, typically from interrupt
|
||||||
|
* context. It is less reliable than using a task's mm and may give
|
||||||
|
* false positives.
|
||||||
|
*/
|
||||||
|
int in_gate_area_no_mm(unsigned long addr)
|
||||||
{
|
{
|
||||||
long cpu = (long)arg;
|
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
||||||
|
|
||||||
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
|
||||||
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
|
|
||||||
|
|
||||||
return NOTIFY_DONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init map_vsyscall(void)
|
void __init map_vsyscall(void)
|
||||||
|
@ -331,24 +324,12 @@ void __init map_vsyscall(void)
|
||||||
extern char __vsyscall_page;
|
extern char __vsyscall_page;
|
||||||
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
||||||
|
|
||||||
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
if (vsyscall_mode != NONE)
|
||||||
vsyscall_mode == NATIVE
|
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
||||||
? PAGE_KERNEL_VSYSCALL
|
vsyscall_mode == NATIVE
|
||||||
: PAGE_KERNEL_VVAR);
|
? PAGE_KERNEL_VSYSCALL
|
||||||
|
: PAGE_KERNEL_VVAR);
|
||||||
|
|
||||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
||||||
(unsigned long)VSYSCALL_ADDR);
|
(unsigned long)VSYSCALL_ADDR);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init vsyscall_init(void)
|
|
||||||
{
|
|
||||||
cpu_notifier_register_begin();
|
|
||||||
|
|
||||||
on_each_cpu(cpu_vsyscall_init, NULL, 1);
|
|
||||||
/* notifier priority > KVM */
|
|
||||||
__hotcpu_notifier(cpu_vsyscall_notifier, 30);
|
|
||||||
|
|
||||||
cpu_notifier_register_done();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
__initcall(vsyscall_init);
|
|
||||||
|
|
|
@ -1204,55 +1204,6 @@ int kern_addr_valid(unsigned long addr)
|
||||||
return pfn_valid(pte_pfn(*pte));
|
return pfn_valid(pte_pfn(*pte));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
|
|
||||||
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
|
|
||||||
* not need special handling anymore:
|
|
||||||
*/
|
|
||||||
static const char *gate_vma_name(struct vm_area_struct *vma)
|
|
||||||
{
|
|
||||||
return "[vsyscall]";
|
|
||||||
}
|
|
||||||
static struct vm_operations_struct gate_vma_ops = {
|
|
||||||
.name = gate_vma_name,
|
|
||||||
};
|
|
||||||
static struct vm_area_struct gate_vma = {
|
|
||||||
.vm_start = VSYSCALL_ADDR,
|
|
||||||
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
|
|
||||||
.vm_page_prot = PAGE_READONLY_EXEC,
|
|
||||||
.vm_flags = VM_READ | VM_EXEC,
|
|
||||||
.vm_ops = &gate_vma_ops,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_IA32_EMULATION
|
|
||||||
if (!mm || mm->context.ia32_compat)
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
return &gate_vma;
|
|
||||||
}
|
|
||||||
|
|
||||||
int in_gate_area(struct mm_struct *mm, unsigned long addr)
|
|
||||||
{
|
|
||||||
struct vm_area_struct *vma = get_gate_vma(mm);
|
|
||||||
|
|
||||||
if (!vma)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return (addr >= vma->vm_start) && (addr < vma->vm_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use this when you have no reliable mm, typically from interrupt
|
|
||||||
* context. It is less reliable than using a task's mm and may give
|
|
||||||
* false positives.
|
|
||||||
*/
|
|
||||||
int in_gate_area_no_mm(unsigned long addr)
|
|
||||||
{
|
|
||||||
return (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned long probe_memory_block_size(void)
|
static unsigned long probe_memory_block_size(void)
|
||||||
{
|
{
|
||||||
/* start from 2g */
|
/* start from 2g */
|
||||||
|
|
|
@ -7,9 +7,7 @@
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/getcpu.h>
|
#include <linux/getcpu.h>
|
||||||
#include <linux/jiffies.h>
|
|
||||||
#include <linux/time.h>
|
#include <linux/time.h>
|
||||||
#include <asm/vsyscall.h>
|
|
||||||
#include <asm/vgtod.h>
|
#include <asm/vgtod.h>
|
||||||
|
|
||||||
notrace long
|
notrace long
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
/*
|
/*
|
||||||
* Set up the VMAs to tell the VM about the vDSO.
|
|
||||||
* Copyright 2007 Andi Kleen, SUSE Labs.
|
* Copyright 2007 Andi Kleen, SUSE Labs.
|
||||||
* Subject to the GPL, v.2
|
* Subject to the GPL, v.2
|
||||||
|
*
|
||||||
|
* This contains most of the x86 vDSO kernel-side code.
|
||||||
*/
|
*/
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
|
@ -10,17 +11,17 @@
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
#include <linux/elf.h>
|
#include <linux/elf.h>
|
||||||
#include <asm/vsyscall.h>
|
#include <linux/cpu.h>
|
||||||
#include <asm/vgtod.h>
|
#include <asm/vgtod.h>
|
||||||
#include <asm/proto.h>
|
#include <asm/proto.h>
|
||||||
#include <asm/vdso.h>
|
#include <asm/vdso.h>
|
||||||
|
#include <asm/vvar.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/hpet.h>
|
#include <asm/hpet.h>
|
||||||
|
#include <asm/desc.h>
|
||||||
|
|
||||||
#if defined(CONFIG_X86_64)
|
#if defined(CONFIG_X86_64)
|
||||||
unsigned int __read_mostly vdso64_enabled = 1;
|
unsigned int __read_mostly vdso64_enabled = 1;
|
||||||
|
|
||||||
extern unsigned short vdso_sync_cpuid;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void __init init_vdso_image(const struct vdso_image *image)
|
void __init init_vdso_image(const struct vdso_image *image)
|
||||||
|
@ -38,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image)
|
||||||
image->alt_len));
|
image->alt_len));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_X86_64)
|
|
||||||
static int __init init_vdso(void)
|
|
||||||
{
|
|
||||||
init_vdso_image(&vdso_image_64);
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_X32_ABI
|
|
||||||
init_vdso_image(&vdso_image_x32);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
subsys_initcall(init_vdso);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct linux_binprm;
|
struct linux_binprm;
|
||||||
|
|
||||||
/* Put the vdso above the (randomized) stack with another randomized offset.
|
/* Put the vdso above the (randomized) stack with another randomized offset.
|
||||||
|
@ -238,3 +225,63 @@ static __init int vdso_setup(char *s)
|
||||||
}
|
}
|
||||||
__setup("vdso=", vdso_setup);
|
__setup("vdso=", vdso_setup);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
static void vgetcpu_cpu_init(void *arg)
|
||||||
|
{
|
||||||
|
int cpu = smp_processor_id();
|
||||||
|
struct desc_struct d = { };
|
||||||
|
unsigned long node = 0;
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
node = cpu_to_node(cpu);
|
||||||
|
#endif
|
||||||
|
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
||||||
|
write_rdtscp_aux((node << 12) | cpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Store cpu number in limit so that it can be loaded
|
||||||
|
* quickly in user space in vgetcpu. (12 bits for the CPU
|
||||||
|
* and 8 bits for the node)
|
||||||
|
*/
|
||||||
|
d.limit0 = cpu | ((node & 0xf) << 12);
|
||||||
|
d.limit = node >> 4;
|
||||||
|
d.type = 5; /* RO data, expand down, accessed */
|
||||||
|
d.dpl = 3; /* Visible to user code */
|
||||||
|
d.s = 1; /* Not a system segment */
|
||||||
|
d.p = 1; /* Present */
|
||||||
|
d.d = 1; /* 32-bit */
|
||||||
|
|
||||||
|
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
||||||
|
{
|
||||||
|
long cpu = (long)arg;
|
||||||
|
|
||||||
|
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
||||||
|
smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
|
||||||
|
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init init_vdso(void)
|
||||||
|
{
|
||||||
|
init_vdso_image(&vdso_image_64);
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_X32_ABI
|
||||||
|
init_vdso_image(&vdso_image_x32);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cpu_notifier_register_begin();
|
||||||
|
|
||||||
|
on_each_cpu(vgetcpu_cpu_init, NULL, 1);
|
||||||
|
/* notifier priority > KVM */
|
||||||
|
__hotcpu_notifier(vgetcpu_cpu_notifier, 30);
|
||||||
|
|
||||||
|
cpu_notifier_register_done();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
subsys_initcall(init_vdso);
|
||||||
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
|
@ -1412,8 +1412,10 @@ static int xen_pgd_alloc(struct mm_struct *mm)
|
||||||
page->private = (unsigned long)user_pgd;
|
page->private = (unsigned long)user_pgd;
|
||||||
|
|
||||||
if (user_pgd != NULL) {
|
if (user_pgd != NULL) {
|
||||||
|
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||||
user_pgd[pgd_index(VSYSCALL_ADDR)] =
|
user_pgd[pgd_index(VSYSCALL_ADDR)] =
|
||||||
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
|
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
|
||||||
|
#endif
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1976,7 +1978,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||||
# ifdef CONFIG_HIGHMEM
|
# ifdef CONFIG_HIGHMEM
|
||||||
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
|
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
|
||||||
# endif
|
# endif
|
||||||
#else
|
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
|
||||||
case VSYSCALL_PAGE:
|
case VSYSCALL_PAGE:
|
||||||
#endif
|
#endif
|
||||||
case FIX_TEXT_POKE0:
|
case FIX_TEXT_POKE0:
|
||||||
|
@ -2015,7 +2017,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||||
|
|
||||||
__native_set_fixmap(idx, pte);
|
__native_set_fixmap(idx, pte);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||||
/* Replicate changes to map the vsyscall page into the user
|
/* Replicate changes to map the vsyscall page into the user
|
||||||
pagetable vsyscall mapping. */
|
pagetable vsyscall mapping. */
|
||||||
if (idx == VSYSCALL_PAGE) {
|
if (idx == VSYSCALL_PAGE) {
|
||||||
|
|
Loading…
Reference in New Issue