- PTRACE_GETREGS/PTRACE_PUTREGS regset selection cleanup
- Another initial cleanup - more to follow - to the fault handling code. - Other minor cleanups and corrections. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmAqU0oACgkQEsHwGGHe VUruWw//VA+/K7Ykd8tjZdmJPWdfsdqBtOrolh4hiajM6iYckTip/FdwHpeEQwM9 ff0iNMrxICG3gbQxCX6WNzPeJatYsnjtF67whfat2SEzNHSDtZDb1Bm20s2/1fbY OurRBTEBzuYMolpEJ2XABpu7LQ+6TV3LJ6yUBungILMOjP7KvrCK0SUrWj253VDU XljK5XBZnmYlEjPU6dlhn64Wsl/GD7AWCAeZGq47EgjH2cR6gxNmu9kYAArGbdiJ WjF8MWE7qVwCPUTiCBv+P1CjsQawvlcUY54wtG65dBYAZvpjmN82T2ypguzAt8KT 12A38vFlBuEUAWC0rUymNouh8Q20AElpdw/odLElHkpNxbHhf/7RyZ1E00LjsFtn MF9Gp9aSIQbfYWK+Hin9oRvqXckV08u3KtzUNeyMbdCmpyqHh6prj8JEZaxKZZUp zCaX8Qasn+Q9zL0DO51WI9EPOwpvSpifUYHmd5RHGbQDW9DjYK4mkBCHhjVfYXd/ NcxRO5rrMLmMG+XuNPg9vuHMi2HJnClJ6odD6b80xGvBodTZxZnqnYO9tUImbYnW pdmt73YDvakei8XY7cAdNWcsTi0kQYZGfInna6z43Ri2l+I1TZaoKGDqn7TbzNbb 9RB0lrD0tfW0PvvDbVwco0Q+8/ykIbvPkHPvjQGWioxHi6yI49s= =uVEk -----END PGP SIGNATURE----- Merge tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mm cleanups from Borislav Petkov: - PTRACE_GETREGS/PTRACE_PUTREGS regset selection cleanup - Another initial cleanup - more to follow - to the fault handling code. - Other minor cleanups and corrections. * tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/{fault,efi}: Fix and rename efi_recover_from_page_fault() x86/fault: Don't run fixups for SMAP violations x86/fault: Don't look for extable entries for SMEP violations x86/fault: Rename no_context() to kernelmode_fixup_or_oops() x86/fault: Bypass no_context() for implicit kernel faults from usermode x86/fault: Split the OOPS code out from no_context() x86/fault: Improve kernel-executing-user-memory handling x86/fault: Correct a few user vs kernel checks wrt WRUSS x86/fault: Document the locking in the fault_signal_pending() path x86/fault/32: Move is_f00f_bug() to do_kern_addr_fault() x86/fault: Fold mm_fault_error() into do_user_addr_fault() x86/fault: Skip the AMD erratum #91 workaround on unaffected CPUs x86/fault: Fix AMD erratum #91 errata fixup for user code x86/Kconfig: Remove HPET_EMULATE_RTC depends on RTC x86/asm: Fixup TASK_SIZE_MAX comment x86/ptrace: Clean up PTRACE_GETREGS/PTRACE_PUTREGS regset selection x86/vm86/32: Remove VM86_SCREEN_BITMAP support x86: Remove definition of DEBUG x86/entry: Remove now unused do_IRQ() declaration x86/mm: Remove duplicate definition of _PAGE_PAT_LARGE ...
This commit is contained in:
commit
ae821d2107
|
@ -890,7 +890,7 @@ config HPET_TIMER
|
||||||
|
|
||||||
config HPET_EMULATE_RTC
|
config HPET_EMULATE_RTC
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
|
depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
|
||||||
|
|
||||||
config APB_TIMER
|
config APB_TIMER
|
||||||
def_bool y if X86_INTEL_MID
|
def_bool y if X86_INTEL_MID
|
||||||
|
|
|
@ -139,7 +139,7 @@ extern void __init efi_dump_pagetable(void);
|
||||||
extern void __init efi_apply_memmap_quirks(void);
|
extern void __init efi_apply_memmap_quirks(void);
|
||||||
extern int __init efi_reuse_config(u64 tables, int nr_tables);
|
extern int __init efi_reuse_config(u64 tables, int nr_tables);
|
||||||
extern void efi_delete_dummy_variable(void);
|
extern void efi_delete_dummy_variable(void);
|
||||||
extern void efi_recover_from_page_fault(unsigned long phys_addr);
|
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
|
||||||
extern void efi_free_boot_services(void);
|
extern void efi_free_boot_services(void);
|
||||||
|
|
||||||
void efi_enter_mm(void);
|
void efi_enter_mm(void);
|
||||||
|
|
|
@ -40,8 +40,6 @@ extern void native_init_IRQ(void);
|
||||||
|
|
||||||
extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
|
extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
|
||||||
|
|
||||||
extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
|
|
||||||
|
|
||||||
extern void init_ISA_irqs(void);
|
extern void init_ISA_irqs(void);
|
||||||
|
|
||||||
extern void __init init_IRQ(void);
|
extern void __init init_IRQ(void);
|
||||||
|
|
|
@ -66,7 +66,7 @@
|
||||||
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
|
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
|
||||||
* address, then that syscall will enter the kernel with a
|
* address, then that syscall will enter the kernel with a
|
||||||
* non-canonical return address, and SYSRET will explode dangerously.
|
* non-canonical return address, and SYSRET will explode dangerously.
|
||||||
* We avoid this particular problem by preventing anything executable
|
* We avoid this particular problem by preventing anything
|
||||||
* from being mapped at the maximum canonical address.
|
* from being mapped at the maximum canonical address.
|
||||||
*
|
*
|
||||||
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
|
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
|
||||||
|
|
|
@ -177,8 +177,6 @@ enum page_cache_mode {
|
||||||
#define __pgprot(x) ((pgprot_t) { (x) } )
|
#define __pgprot(x) ((pgprot_t) { (x) } )
|
||||||
#define __pg(x) __pgprot(x)
|
#define __pg(x) __pgprot(x)
|
||||||
|
|
||||||
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
|
||||||
|
|
||||||
#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
|
#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
|
||||||
#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
|
#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
|
||||||
#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
|
#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
|
||||||
|
|
|
@ -36,7 +36,6 @@ struct vm86 {
|
||||||
unsigned long saved_sp0;
|
unsigned long saved_sp0;
|
||||||
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long screen_bitmap;
|
|
||||||
unsigned long cpu_type;
|
unsigned long cpu_type;
|
||||||
struct revectored_struct int_revectored;
|
struct revectored_struct int_revectored;
|
||||||
struct revectored_struct int21_revectored;
|
struct revectored_struct int21_revectored;
|
||||||
|
|
|
@ -97,7 +97,7 @@ struct revectored_struct {
|
||||||
struct vm86_struct {
|
struct vm86_struct {
|
||||||
struct vm86_regs regs;
|
struct vm86_regs regs;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long screen_bitmap;
|
unsigned long screen_bitmap; /* unused, preserved by vm86() */
|
||||||
unsigned long cpu_type;
|
unsigned long cpu_type;
|
||||||
struct revectored_struct int_revectored;
|
struct revectored_struct int_revectored;
|
||||||
struct revectored_struct int21_revectored;
|
struct revectored_struct int21_revectored;
|
||||||
|
@ -106,7 +106,7 @@ struct vm86_struct {
|
||||||
/*
|
/*
|
||||||
* flags masks
|
* flags masks
|
||||||
*/
|
*/
|
||||||
#define VM86_SCREEN_BITMAP 0x0001
|
#define VM86_SCREEN_BITMAP 0x0001 /* no longer supported */
|
||||||
|
|
||||||
struct vm86plus_info_struct {
|
struct vm86plus_info_struct {
|
||||||
unsigned long force_return_for_pic:1;
|
unsigned long force_return_for_pic:1;
|
||||||
|
|
|
@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
|
||||||
if (!size_base)
|
if (!size_base)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
size_base = to_size_factor(size_base, &size_factor),
|
size_base = to_size_factor(size_base, &size_factor);
|
||||||
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
|
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
|
||||||
start_base = to_size_factor(start_base, &start_factor),
|
start_base = to_size_factor(start_base, &start_factor);
|
||||||
type = range_state[i].type;
|
type = range_state[i].type;
|
||||||
|
|
||||||
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
|
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
|
||||||
* because MTRRs can span up to 40 bits (36bits on most modern x86)
|
* because MTRRs can span up to 40 bits (36bits on most modern x86)
|
||||||
*/
|
*/
|
||||||
#define DEBUG
|
|
||||||
|
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
|
|
@ -31,8 +31,6 @@
|
||||||
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define DEBUG
|
|
||||||
|
|
||||||
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
||||||
|
|
||||||
#include <linux/stop_machine.h>
|
#include <linux/stop_machine.h>
|
||||||
|
|
|
@ -4,9 +4,6 @@
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/kallsyms.h>
|
#include <linux/kallsyms.h>
|
||||||
|
|
||||||
|
|
||||||
#define DEBUG 1
|
|
||||||
|
|
||||||
static struct iommu_table_entry * __init
|
static struct iommu_table_entry * __init
|
||||||
find_dependents_of(struct iommu_table_entry *start,
|
find_dependents_of(struct iommu_table_entry *start,
|
||||||
struct iommu_table_entry *finish,
|
struct iommu_table_entry *finish,
|
||||||
|
|
|
@ -704,6 +704,9 @@ void ptrace_disable(struct task_struct *child)
|
||||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||||
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
|
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
static const struct user_regset_view user_x86_64_view; /* Initialized below. */
|
||||||
|
#endif
|
||||||
|
|
||||||
long arch_ptrace(struct task_struct *child, long request,
|
long arch_ptrace(struct task_struct *child, long request,
|
||||||
unsigned long addr, unsigned long data)
|
unsigned long addr, unsigned long data)
|
||||||
|
@ -711,6 +714,14 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||||
int ret;
|
int ret;
|
||||||
unsigned long __user *datap = (unsigned long __user *)data;
|
unsigned long __user *datap = (unsigned long __user *)data;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/* This is native 64-bit ptrace() */
|
||||||
|
const struct user_regset_view *regset_view = &user_x86_64_view;
|
||||||
|
#else
|
||||||
|
/* This is native 32-bit ptrace() */
|
||||||
|
const struct user_regset_view *regset_view = &user_x86_32_view;
|
||||||
|
#endif
|
||||||
|
|
||||||
switch (request) {
|
switch (request) {
|
||||||
/* read the word at location addr in the USER area. */
|
/* read the word at location addr in the USER area. */
|
||||||
case PTRACE_PEEKUSR: {
|
case PTRACE_PEEKUSR: {
|
||||||
|
@ -749,28 +760,28 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||||
|
|
||||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||||
return copy_regset_to_user(child,
|
return copy_regset_to_user(child,
|
||||||
task_user_regset_view(current),
|
regset_view,
|
||||||
REGSET_GENERAL,
|
REGSET_GENERAL,
|
||||||
0, sizeof(struct user_regs_struct),
|
0, sizeof(struct user_regs_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||||
return copy_regset_from_user(child,
|
return copy_regset_from_user(child,
|
||||||
task_user_regset_view(current),
|
regset_view,
|
||||||
REGSET_GENERAL,
|
REGSET_GENERAL,
|
||||||
0, sizeof(struct user_regs_struct),
|
0, sizeof(struct user_regs_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||||
return copy_regset_to_user(child,
|
return copy_regset_to_user(child,
|
||||||
task_user_regset_view(current),
|
regset_view,
|
||||||
REGSET_FP,
|
REGSET_FP,
|
||||||
0, sizeof(struct user_i387_struct),
|
0, sizeof(struct user_i387_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||||
return copy_regset_from_user(child,
|
return copy_regset_from_user(child,
|
||||||
task_user_regset_view(current),
|
regset_view,
|
||||||
REGSET_FP,
|
REGSET_FP,
|
||||||
0, sizeof(struct user_i387_struct),
|
0, sizeof(struct user_i387_struct),
|
||||||
datap);
|
datap);
|
||||||
|
@ -1152,28 +1163,28 @@ static long x32_arch_ptrace(struct task_struct *child,
|
||||||
|
|
||||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||||
return copy_regset_to_user(child,
|
return copy_regset_to_user(child,
|
||||||
task_user_regset_view(current),
|
&user_x86_64_view,
|
||||||
REGSET_GENERAL,
|
REGSET_GENERAL,
|
||||||
0, sizeof(struct user_regs_struct),
|
0, sizeof(struct user_regs_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||||
return copy_regset_from_user(child,
|
return copy_regset_from_user(child,
|
||||||
task_user_regset_view(current),
|
&user_x86_64_view,
|
||||||
REGSET_GENERAL,
|
REGSET_GENERAL,
|
||||||
0, sizeof(struct user_regs_struct),
|
0, sizeof(struct user_regs_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||||
return copy_regset_to_user(child,
|
return copy_regset_to_user(child,
|
||||||
task_user_regset_view(current),
|
&user_x86_64_view,
|
||||||
REGSET_FP,
|
REGSET_FP,
|
||||||
0, sizeof(struct user_i387_struct),
|
0, sizeof(struct user_i387_struct),
|
||||||
datap);
|
datap);
|
||||||
|
|
||||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||||
return copy_regset_from_user(child,
|
return copy_regset_from_user(child,
|
||||||
task_user_regset_view(current),
|
&user_x86_64_view,
|
||||||
REGSET_FP,
|
REGSET_FP,
|
||||||
0, sizeof(struct user_i387_struct),
|
0, sizeof(struct user_i387_struct),
|
||||||
datap);
|
datap);
|
||||||
|
@ -1309,6 +1320,25 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
|
||||||
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
|
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is used by the core dump code to decide which regset to dump. The
|
||||||
|
* core dump code writes out the resulting .e_machine and the corresponding
|
||||||
|
* regsets. This is suboptimal if the task is messing around with its CS.L
|
||||||
|
* field, but at worst the core dump will end up missing some information.
|
||||||
|
*
|
||||||
|
* Unfortunately, it is also used by the broken PTRACE_GETREGSET and
|
||||||
|
* PTRACE_SETREGSET APIs. These APIs look at the .regsets field but have
|
||||||
|
* no way to make sure that the e_machine they use matches the caller's
|
||||||
|
* expectations. The result is that the data format returned by
|
||||||
|
* PTRACE_GETREGSET depends on the returned CS field (and even the offset
|
||||||
|
* of the returned CS field depends on its value!) and the data format
|
||||||
|
* accepted by PTRACE_SETREGSET is determined by the old CS value. The
|
||||||
|
* upshot is that it is basically impossible to use these APIs correctly.
|
||||||
|
*
|
||||||
|
* The best way to fix it in the long run would probably be to add new
|
||||||
|
* improved ptrace() APIs to read and write registers reliably, possibly by
|
||||||
|
* allowing userspace to select the ELF e_machine variant that they expect.
|
||||||
|
*/
|
||||||
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_IA32_EMULATION
|
#ifdef CONFIG_IA32_EMULATION
|
||||||
|
|
|
@ -90,14 +90,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
|
||||||
unsigned long, prot, unsigned long, flags,
|
unsigned long, prot, unsigned long, flags,
|
||||||
unsigned long, fd, unsigned long, off)
|
unsigned long, fd, unsigned long, off)
|
||||||
{
|
{
|
||||||
long error;
|
|
||||||
error = -EINVAL;
|
|
||||||
if (off & ~PAGE_MASK)
|
if (off & ~PAGE_MASK)
|
||||||
goto out;
|
return -EINVAL;
|
||||||
|
|
||||||
error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
||||||
out:
|
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void find_start_end(unsigned long addr, unsigned long flags,
|
static void find_start_end(unsigned long addr, unsigned long flags,
|
||||||
|
|
|
@ -134,7 +134,11 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||||
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
|
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
|
||||||
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
|
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
|
||||||
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
|
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
|
||||||
unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
|
|
||||||
|
/*
|
||||||
|
* Don't write screen_bitmap in case some user had a value there
|
||||||
|
* and expected it to remain unchanged.
|
||||||
|
*/
|
||||||
|
|
||||||
user_access_end();
|
user_access_end();
|
||||||
|
|
||||||
|
@ -160,49 +164,6 @@ Efault:
|
||||||
do_exit(SIGSEGV);
|
do_exit(SIGSEGV);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
struct vm_area_struct *vma;
|
|
||||||
spinlock_t *ptl;
|
|
||||||
pgd_t *pgd;
|
|
||||||
p4d_t *p4d;
|
|
||||||
pud_t *pud;
|
|
||||||
pmd_t *pmd;
|
|
||||||
pte_t *pte;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
|
||||||
pgd = pgd_offset(mm, 0xA0000);
|
|
||||||
if (pgd_none_or_clear_bad(pgd))
|
|
||||||
goto out;
|
|
||||||
p4d = p4d_offset(pgd, 0xA0000);
|
|
||||||
if (p4d_none_or_clear_bad(p4d))
|
|
||||||
goto out;
|
|
||||||
pud = pud_offset(p4d, 0xA0000);
|
|
||||||
if (pud_none_or_clear_bad(pud))
|
|
||||||
goto out;
|
|
||||||
pmd = pmd_offset(pud, 0xA0000);
|
|
||||||
|
|
||||||
if (pmd_trans_huge(*pmd)) {
|
|
||||||
vma = find_vma(mm, 0xA0000);
|
|
||||||
split_huge_pmd(vma, pmd, 0xA0000);
|
|
||||||
}
|
|
||||||
if (pmd_none_or_clear_bad(pmd))
|
|
||||||
goto out;
|
|
||||||
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
|
|
||||||
for (i = 0; i < 32; i++) {
|
|
||||||
if (pte_present(*pte))
|
|
||||||
set_pte(pte, pte_wrprotect(*pte));
|
|
||||||
pte++;
|
|
||||||
}
|
|
||||||
pte_unmap_unlock(pte, ptl);
|
|
||||||
out:
|
|
||||||
mmap_write_unlock(mm);
|
|
||||||
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||||
|
|
||||||
|
@ -282,6 +243,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||||
offsetof(struct vm86_struct, int_revectored)))
|
offsetof(struct vm86_struct, int_revectored)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
|
|
||||||
|
/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
|
||||||
|
if (v.flags & VM86_SCREEN_BITMAP) {
|
||||||
|
char comm[TASK_COMM_LEN];
|
||||||
|
|
||||||
|
pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||||
|
|
||||||
vm86regs.pt.bx = v.regs.ebx;
|
vm86regs.pt.bx = v.regs.ebx;
|
||||||
|
@ -302,7 +272,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||||
vm86regs.gs = v.regs.gs;
|
vm86regs.gs = v.regs.gs;
|
||||||
|
|
||||||
vm86->flags = v.flags;
|
vm86->flags = v.flags;
|
||||||
vm86->screen_bitmap = v.screen_bitmap;
|
|
||||||
vm86->cpu_type = v.cpu_type;
|
vm86->cpu_type = v.cpu_type;
|
||||||
|
|
||||||
if (copy_from_user(&vm86->int_revectored,
|
if (copy_from_user(&vm86->int_revectored,
|
||||||
|
@ -370,9 +339,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||||
update_task_stack(tsk);
|
update_task_stack(tsk);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
|
||||||
mark_screen_rdonly(tsk->mm);
|
|
||||||
|
|
||||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||||
return regs->ax;
|
return regs->ax;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
#include <linux/prefetch.h> /* prefetchw */
|
#include <linux/prefetch.h> /* prefetchw */
|
||||||
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
||||||
#include <linux/uaccess.h> /* faulthandler_disabled() */
|
#include <linux/uaccess.h> /* faulthandler_disabled() */
|
||||||
#include <linux/efi.h> /* efi_recover_from_page_fault()*/
|
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
|
|
||||||
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
||||||
|
@ -25,7 +25,7 @@
|
||||||
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
||||||
#include <asm/vm86.h> /* struct vm86 */
|
#include <asm/vm86.h> /* struct vm86 */
|
||||||
#include <asm/mmu_context.h> /* vma_pkey() */
|
#include <asm/mmu_context.h> /* vma_pkey() */
|
||||||
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
#include <asm/efi.h> /* efi_crash_gracefully_on_page_fault()*/
|
||||||
#include <asm/desc.h> /* store_idt(), ... */
|
#include <asm/desc.h> /* store_idt(), ... */
|
||||||
#include <asm/cpu_entry_area.h> /* exception stack */
|
#include <asm/cpu_entry_area.h> /* exception stack */
|
||||||
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
|
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
|
||||||
|
@ -54,7 +54,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
||||||
* 32-bit mode:
|
* 32-bit mode:
|
||||||
*
|
*
|
||||||
* Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
|
* Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
|
||||||
* Check that here and ignore it.
|
* Check that here and ignore it. This is AMD erratum #91.
|
||||||
*
|
*
|
||||||
* 64-bit mode:
|
* 64-bit mode:
|
||||||
*
|
*
|
||||||
|
@ -83,11 +83,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
case 0x40:
|
case 0x40:
|
||||||
/*
|
/*
|
||||||
* In AMD64 long mode 0x40..0x4F are valid REX prefixes
|
* In 64-bit mode 0x40..0x4F are valid REX prefixes
|
||||||
* Need to figure out under what instruction mode the
|
|
||||||
* instruction was issued. Could check the LDT for lm,
|
|
||||||
* but for now it's good enough to assume that long
|
|
||||||
* mode only uses well known segments or kernel.
|
|
||||||
*/
|
*/
|
||||||
return (!user_mode(regs) || user_64bit_mode(regs));
|
return (!user_mode(regs) || user_64bit_mode(regs));
|
||||||
#endif
|
#endif
|
||||||
|
@ -110,6 +106,15 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_amd_k8_pre_npt(void)
|
||||||
|
{
|
||||||
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||||
|
|
||||||
|
return unlikely(IS_ENABLED(CONFIG_CPU_SUP_AMD) &&
|
||||||
|
c->x86_vendor == X86_VENDOR_AMD &&
|
||||||
|
c->x86 == 0xf && c->x86_model < 0x40);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
{
|
{
|
||||||
|
@ -117,6 +122,10 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
unsigned char *instr;
|
unsigned char *instr;
|
||||||
int prefetch = 0;
|
int prefetch = 0;
|
||||||
|
|
||||||
|
/* Erratum #91 affects AMD K8, pre-NPT CPUs */
|
||||||
|
if (!is_amd_k8_pre_npt())
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If it was a exec (instruction fetch) fault on NX page, then
|
* If it was a exec (instruction fetch) fault on NX page, then
|
||||||
* do not ignore the fault:
|
* do not ignore the fault:
|
||||||
|
@ -127,20 +136,31 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
instr = (void *)convert_ip_to_linear(current, regs);
|
instr = (void *)convert_ip_to_linear(current, regs);
|
||||||
max_instr = instr + 15;
|
max_instr = instr + 15;
|
||||||
|
|
||||||
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
|
/*
|
||||||
return 0;
|
* This code has historically always bailed out if IP points to a
|
||||||
|
* not-present page (e.g. due to a race). No one has ever
|
||||||
|
* complained about this.
|
||||||
|
*/
|
||||||
|
pagefault_disable();
|
||||||
|
|
||||||
while (instr < max_instr) {
|
while (instr < max_instr) {
|
||||||
unsigned char opcode;
|
unsigned char opcode;
|
||||||
|
|
||||||
if (get_kernel_nofault(opcode, instr))
|
if (user_mode(regs)) {
|
||||||
break;
|
if (get_user(opcode, instr))
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
if (get_kernel_nofault(opcode, instr))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
instr++;
|
instr++;
|
||||||
|
|
||||||
if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
|
if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pagefault_enable();
|
||||||
return prefetch;
|
return prefetch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,25 +282,6 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Did it hit the DOS screen memory VA from vm86 mode?
|
|
||||||
*/
|
|
||||||
static inline void
|
|
||||||
check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
|
||||||
struct task_struct *tsk)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_VM86
|
|
||||||
unsigned long bit;
|
|
||||||
|
|
||||||
if (!v8086_mode(regs) || !tsk->thread.vm86)
|
|
||||||
return;
|
|
||||||
|
|
||||||
bit = (address - 0xA0000) >> PAGE_SHIFT;
|
|
||||||
if (bit < 32)
|
|
||||||
tsk->thread.vm86->screen_bitmap |= 1 << bit;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool low_pfn(unsigned long pfn)
|
static bool low_pfn(unsigned long pfn)
|
||||||
{
|
{
|
||||||
return pfn < max_low_pfn;
|
return pfn < max_low_pfn;
|
||||||
|
@ -335,15 +336,6 @@ KERN_ERR
|
||||||
"******* Disabling USB legacy in the BIOS may also help.\n";
|
"******* Disabling USB legacy in the BIOS may also help.\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* No vm86 mode in 64-bit mode:
|
|
||||||
*/
|
|
||||||
static inline void
|
|
||||||
check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
|
||||||
struct task_struct *tsk)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bad_address(void *p)
|
static int bad_address(void *p)
|
||||||
{
|
{
|
||||||
unsigned long dummy;
|
unsigned long dummy;
|
||||||
|
@ -427,6 +419,9 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
|
||||||
|| boot_cpu_data.x86 != 0xf)
|
|| boot_cpu_data.x86 != 0xf)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (user_mode(regs))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (address != regs->ip)
|
if (address != regs->ip)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -462,10 +457,12 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pentium F0 0F C7 C8 bug workaround: */
|
/* Pentium F0 0F C7 C8 bug workaround: */
|
||||||
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
|
static int is_f00f_bug(struct pt_regs *regs, unsigned long error_code,
|
||||||
|
unsigned long address)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_X86_F00F_BUG
|
#ifdef CONFIG_X86_F00F_BUG
|
||||||
if (boot_cpu_has_bug(X86_BUG_F00F) && idt_is_f00f_address(address)) {
|
if (boot_cpu_has_bug(X86_BUG_F00F) && !(error_code & X86_PF_USER) &&
|
||||||
|
idt_is_f00f_address(address)) {
|
||||||
handle_invalid_op(regs);
|
handle_invalid_op(regs);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -630,22 +627,87 @@ static void set_signal_archinfo(unsigned long address,
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void
|
static noinline void
|
||||||
no_context(struct pt_regs *regs, unsigned long error_code,
|
page_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||||
unsigned long address, int signal, int si_code)
|
unsigned long address)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk = current;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int sig;
|
int sig;
|
||||||
|
|
||||||
if (user_mode(regs)) {
|
if (user_mode(regs)) {
|
||||||
/*
|
/*
|
||||||
* This is an implicit supervisor-mode access from user
|
* Implicit kernel access from user mode? Skip the stack
|
||||||
* mode. Bypass all the kernel-mode recovery code and just
|
* overflow and EFI special cases.
|
||||||
* OOPS.
|
|
||||||
*/
|
*/
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_VMAP_STACK
|
||||||
|
/*
|
||||||
|
* Stack overflow? During boot, we can fault near the initial
|
||||||
|
* stack in the direct map, but that's not an overflow -- check
|
||||||
|
* that we're in vmalloc space to avoid this.
|
||||||
|
*/
|
||||||
|
if (is_vmalloc_addr((void *)address) &&
|
||||||
|
(((unsigned long)current->stack - 1 - address < PAGE_SIZE) ||
|
||||||
|
address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||||
|
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
||||||
|
/*
|
||||||
|
* We're likely to be running with very little stack space
|
||||||
|
* left. It's plausible that we'd hit this condition but
|
||||||
|
* double-fault even before we get this far, in which case
|
||||||
|
* we're fine: the double-fault handler will deal with it.
|
||||||
|
*
|
||||||
|
* We don't want to make it all the way into the oops code
|
||||||
|
* and then double-fault, though, because we're likely to
|
||||||
|
* break the console driver and lose most of the stack dump.
|
||||||
|
*/
|
||||||
|
asm volatile ("movq %[stack], %%rsp\n\t"
|
||||||
|
"call handle_stack_overflow\n\t"
|
||||||
|
"1: jmp 1b"
|
||||||
|
: ASM_CALL_CONSTRAINT
|
||||||
|
: "D" ("kernel stack overflow (page fault)"),
|
||||||
|
"S" (regs), "d" (address),
|
||||||
|
[stack] "rm" (stack));
|
||||||
|
unreachable();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Buggy firmware could access regions which might page fault. If
|
||||||
|
* this happens, EFI has a special OOPS path that will try to
|
||||||
|
* avoid hanging the system.
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_EFI))
|
||||||
|
efi_crash_gracefully_on_page_fault(address);
|
||||||
|
|
||||||
|
oops:
|
||||||
|
/*
|
||||||
|
* Oops. The kernel tried to access some bad page. We'll have to
|
||||||
|
* terminate things with extreme prejudice:
|
||||||
|
*/
|
||||||
|
flags = oops_begin();
|
||||||
|
|
||||||
|
show_fault_oops(regs, error_code, address);
|
||||||
|
|
||||||
|
if (task_stack_end_corrupted(current))
|
||||||
|
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
|
||||||
|
|
||||||
|
sig = SIGKILL;
|
||||||
|
if (__die("Oops", regs, error_code))
|
||||||
|
sig = 0;
|
||||||
|
|
||||||
|
/* Executive summary in case the body of the oops scrolled away */
|
||||||
|
printk(KERN_DEFAULT "CR2: %016lx\n", address);
|
||||||
|
|
||||||
|
oops_end(flags, regs, sig);
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline void
|
||||||
|
kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
|
||||||
|
unsigned long address, int signal, int si_code)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(user_mode(regs));
|
||||||
|
|
||||||
/* Are we prepared to handle this kernel fault? */
|
/* Are we prepared to handle this kernel fault? */
|
||||||
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
|
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
|
||||||
/*
|
/*
|
||||||
|
@ -677,81 +739,14 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_VMAP_STACK
|
|
||||||
/*
|
/*
|
||||||
* Stack overflow? During boot, we can fault near the initial
|
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
|
||||||
* stack in the direct map, but that's not an overflow -- check
|
* instruction.
|
||||||
* that we're in vmalloc space to avoid this.
|
|
||||||
*/
|
|
||||||
if (is_vmalloc_addr((void *)address) &&
|
|
||||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
|
||||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
|
||||||
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
|
||||||
/*
|
|
||||||
* We're likely to be running with very little stack space
|
|
||||||
* left. It's plausible that we'd hit this condition but
|
|
||||||
* double-fault even before we get this far, in which case
|
|
||||||
* we're fine: the double-fault handler will deal with it.
|
|
||||||
*
|
|
||||||
* We don't want to make it all the way into the oops code
|
|
||||||
* and then double-fault, though, because we're likely to
|
|
||||||
* break the console driver and lose most of the stack dump.
|
|
||||||
*/
|
|
||||||
asm volatile ("movq %[stack], %%rsp\n\t"
|
|
||||||
"call handle_stack_overflow\n\t"
|
|
||||||
"1: jmp 1b"
|
|
||||||
: ASM_CALL_CONSTRAINT
|
|
||||||
: "D" ("kernel stack overflow (page fault)"),
|
|
||||||
"S" (regs), "d" (address),
|
|
||||||
[stack] "rm" (stack));
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-bit:
|
|
||||||
*
|
|
||||||
* Valid to do another page fault here, because if this fault
|
|
||||||
* had been triggered by is_prefetch fixup_exception would have
|
|
||||||
* handled it.
|
|
||||||
*
|
|
||||||
* 64-bit:
|
|
||||||
*
|
|
||||||
* Hall of shame of CPU/BIOS bugs.
|
|
||||||
*/
|
*/
|
||||||
if (is_prefetch(regs, error_code, address))
|
if (is_prefetch(regs, error_code, address))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (is_errata93(regs, address))
|
page_fault_oops(regs, error_code, address);
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Buggy firmware could access regions which might page fault, try to
|
|
||||||
* recover from such faults.
|
|
||||||
*/
|
|
||||||
if (IS_ENABLED(CONFIG_EFI))
|
|
||||||
efi_recover_from_page_fault(address);
|
|
||||||
|
|
||||||
oops:
|
|
||||||
/*
|
|
||||||
* Oops. The kernel tried to access some bad page. We'll have to
|
|
||||||
* terminate things with extreme prejudice:
|
|
||||||
*/
|
|
||||||
flags = oops_begin();
|
|
||||||
|
|
||||||
show_fault_oops(regs, error_code, address);
|
|
||||||
|
|
||||||
if (task_stack_end_corrupted(tsk))
|
|
||||||
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
|
|
||||||
|
|
||||||
sig = SIGKILL;
|
|
||||||
if (__die("Oops", regs, error_code))
|
|
||||||
sig = 0;
|
|
||||||
|
|
||||||
/* Executive summary in case the body of the oops scrolled away */
|
|
||||||
printk(KERN_DEFAULT "CR2: %016lx\n", address);
|
|
||||||
|
|
||||||
oops_end(flags, regs, sig);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -796,47 +791,49 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||||
{
|
{
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
|
|
||||||
/* User mode accesses just cause a SIGSEGV */
|
if (!user_mode(regs)) {
|
||||||
if (user_mode(regs) && (error_code & X86_PF_USER)) {
|
kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code);
|
||||||
/*
|
|
||||||
* It's possible to have interrupts off here:
|
|
||||||
*/
|
|
||||||
local_irq_enable();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Valid to do another page fault here because this one came
|
|
||||||
* from user space:
|
|
||||||
*/
|
|
||||||
if (is_prefetch(regs, error_code, address))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (is_errata100(regs, address))
|
|
||||||
return;
|
|
||||||
|
|
||||||
sanitize_error_code(address, &error_code);
|
|
||||||
|
|
||||||
if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (likely(show_unhandled_signals))
|
|
||||||
show_signal_msg(regs, error_code, address, tsk);
|
|
||||||
|
|
||||||
set_signal_archinfo(address, error_code);
|
|
||||||
|
|
||||||
if (si_code == SEGV_PKUERR)
|
|
||||||
force_sig_pkuerr((void __user *)address, pkey);
|
|
||||||
|
|
||||||
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
|
|
||||||
|
|
||||||
local_irq_disable();
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_f00f_bug(regs, address))
|
if (!(error_code & X86_PF_USER)) {
|
||||||
|
/* Implicit user access to kernel memory -- just oops */
|
||||||
|
page_fault_oops(regs, error_code, address);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* User mode accesses just cause a SIGSEGV.
|
||||||
|
* It's possible to have interrupts off here:
|
||||||
|
*/
|
||||||
|
local_irq_enable();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Valid to do another page fault here because this one came
|
||||||
|
* from user space:
|
||||||
|
*/
|
||||||
|
if (is_prefetch(regs, error_code, address))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
no_context(regs, error_code, address, SIGSEGV, si_code);
|
if (is_errata100(regs, address))
|
||||||
|
return;
|
||||||
|
|
||||||
|
sanitize_error_code(address, &error_code);
|
||||||
|
|
||||||
|
if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (likely(show_unhandled_signals))
|
||||||
|
show_signal_msg(regs, error_code, address, tsk);
|
||||||
|
|
||||||
|
set_signal_archinfo(address, error_code);
|
||||||
|
|
||||||
|
if (si_code == SEGV_PKUERR)
|
||||||
|
force_sig_pkuerr((void __user *)address, pkey);
|
||||||
|
|
||||||
|
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
|
||||||
|
|
||||||
|
local_irq_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void
|
static noinline void
|
||||||
|
@ -926,8 +923,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||||
vm_fault_t fault)
|
vm_fault_t fault)
|
||||||
{
|
{
|
||||||
/* Kernel mode? Handle exceptions or die: */
|
/* Kernel mode? Handle exceptions or die: */
|
||||||
if (!(error_code & X86_PF_USER)) {
|
if (!user_mode(regs)) {
|
||||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -961,40 +958,6 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||||
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
|
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void
|
|
||||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|
||||||
unsigned long address, vm_fault_t fault)
|
|
||||||
{
|
|
||||||
if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
|
||||||
no_context(regs, error_code, address, 0, 0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fault & VM_FAULT_OOM) {
|
|
||||||
/* Kernel mode? Handle exceptions or die: */
|
|
||||||
if (!(error_code & X86_PF_USER)) {
|
|
||||||
no_context(regs, error_code, address,
|
|
||||||
SIGSEGV, SEGV_MAPERR);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We ran out of memory, call the OOM killer, and return the
|
|
||||||
* userspace (which will retry the fault, or kill us if we got
|
|
||||||
* oom-killed):
|
|
||||||
*/
|
|
||||||
pagefault_out_of_memory();
|
|
||||||
} else {
|
|
||||||
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
|
||||||
VM_FAULT_HWPOISON_LARGE))
|
|
||||||
do_sigbus(regs, error_code, address, fault);
|
|
||||||
else if (fault & VM_FAULT_SIGSEGV)
|
|
||||||
bad_area_nosemaphore(regs, error_code, address);
|
|
||||||
else
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
|
static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
|
||||||
{
|
{
|
||||||
if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||||
|
@ -1209,6 +1172,9 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (is_f00f_bug(regs, hw_error_code, address))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Was the fault spurious, caused by lazy TLB invalidation? */
|
/* Was the fault spurious, caused by lazy TLB invalidation? */
|
||||||
if (spurious_kernel_fault(hw_error_code, address))
|
if (spurious_kernel_fault(hw_error_code, address))
|
||||||
return;
|
return;
|
||||||
|
@ -1229,10 +1195,17 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(do_kern_addr_fault);
|
NOKPROBE_SYMBOL(do_kern_addr_fault);
|
||||||
|
|
||||||
/* Handle faults in the user portion of the address space */
|
/*
|
||||||
|
* Handle faults in the user portion of the address space. Nothing in here
|
||||||
|
* should check X86_PF_USER without a specific justification: for almost
|
||||||
|
* all purposes, we should treat a normal kernel access to user memory
|
||||||
|
* (e.g. get_user(), put_user(), etc.) the same as the WRUSS instruction.
|
||||||
|
* The one exception is AC flag handling, which is, per the x86
|
||||||
|
* architecture, special for WRUSS.
|
||||||
|
*/
|
||||||
static inline
|
static inline
|
||||||
void do_user_addr_fault(struct pt_regs *regs,
|
void do_user_addr_fault(struct pt_regs *regs,
|
||||||
unsigned long hw_error_code,
|
unsigned long error_code,
|
||||||
unsigned long address)
|
unsigned long address)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
@ -1244,6 +1217,21 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
tsk = current;
|
tsk = current;
|
||||||
mm = tsk->mm;
|
mm = tsk->mm;
|
||||||
|
|
||||||
|
if (unlikely((error_code & (X86_PF_USER | X86_PF_INSTR)) == X86_PF_INSTR)) {
|
||||||
|
/*
|
||||||
|
* Whoops, this is kernel mode code trying to execute from
|
||||||
|
* user memory. Unless this is AMD erratum #93, which
|
||||||
|
* corrupts RIP such that it looks like a user address,
|
||||||
|
* this is unrecoverable. Don't even try to look up the
|
||||||
|
* VMA or look for extable entries.
|
||||||
|
*/
|
||||||
|
if (is_errata93(regs, address))
|
||||||
|
return;
|
||||||
|
|
||||||
|
page_fault_oops(regs, error_code, address);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* kprobes don't want to hook the spurious faults: */
|
/* kprobes don't want to hook the spurious faults: */
|
||||||
if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
|
if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
|
||||||
return;
|
return;
|
||||||
|
@ -1252,8 +1240,8 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
* Reserved bits are never expected to be set on
|
* Reserved bits are never expected to be set on
|
||||||
* entries in the user portion of the page tables.
|
* entries in the user portion of the page tables.
|
||||||
*/
|
*/
|
||||||
if (unlikely(hw_error_code & X86_PF_RSVD))
|
if (unlikely(error_code & X86_PF_RSVD))
|
||||||
pgtable_bad(regs, hw_error_code, address);
|
pgtable_bad(regs, error_code, address);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If SMAP is on, check for invalid kernel (supervisor) access to user
|
* If SMAP is on, check for invalid kernel (supervisor) access to user
|
||||||
|
@ -1263,10 +1251,13 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
* enforcement appears to be consistent with the USER bit.
|
* enforcement appears to be consistent with the USER bit.
|
||||||
*/
|
*/
|
||||||
if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
|
if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
|
||||||
!(hw_error_code & X86_PF_USER) &&
|
!(error_code & X86_PF_USER) &&
|
||||||
!(regs->flags & X86_EFLAGS_AC)))
|
!(regs->flags & X86_EFLAGS_AC))) {
|
||||||
{
|
/*
|
||||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
* No extable entry here. This was a kernel access to an
|
||||||
|
* invalid pointer. get_kernel_nofault() will not get here.
|
||||||
|
*/
|
||||||
|
page_fault_oops(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1275,7 +1266,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
* in a region with pagefaults disabled then we must not take the fault
|
* in a region with pagefaults disabled then we must not take the fault
|
||||||
*/
|
*/
|
||||||
if (unlikely(faulthandler_disabled() || !mm)) {
|
if (unlikely(faulthandler_disabled() || !mm)) {
|
||||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
bad_area_nosemaphore(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1296,9 +1287,9 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
if (hw_error_code & X86_PF_WRITE)
|
if (error_code & X86_PF_WRITE)
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
if (hw_error_code & X86_PF_INSTR)
|
if (error_code & X86_PF_INSTR)
|
||||||
flags |= FAULT_FLAG_INSTRUCTION;
|
flags |= FAULT_FLAG_INSTRUCTION;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
@ -1314,7 +1305,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
* to consider the PF_PK bit.
|
* to consider the PF_PK bit.
|
||||||
*/
|
*/
|
||||||
if (is_vsyscall_vaddr(address)) {
|
if (is_vsyscall_vaddr(address)) {
|
||||||
if (emulate_vsyscall(hw_error_code, regs, address))
|
if (emulate_vsyscall(error_code, regs, address))
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1337,7 +1328,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||||
* Fault from code in kernel from
|
* Fault from code in kernel from
|
||||||
* which we do not expect faults.
|
* which we do not expect faults.
|
||||||
*/
|
*/
|
||||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
bad_area_nosemaphore(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
retry:
|
retry:
|
||||||
|
@ -1353,17 +1344,17 @@ retry:
|
||||||
|
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
if (unlikely(!vma)) {
|
if (unlikely(!vma)) {
|
||||||
bad_area(regs, hw_error_code, address);
|
bad_area(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (likely(vma->vm_start <= address))
|
if (likely(vma->vm_start <= address))
|
||||||
goto good_area;
|
goto good_area;
|
||||||
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
|
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
|
||||||
bad_area(regs, hw_error_code, address);
|
bad_area(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (unlikely(expand_stack(vma, address))) {
|
if (unlikely(expand_stack(vma, address))) {
|
||||||
bad_area(regs, hw_error_code, address);
|
bad_area(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1372,8 +1363,8 @@ retry:
|
||||||
* we can handle it..
|
* we can handle it..
|
||||||
*/
|
*/
|
||||||
good_area:
|
good_area:
|
||||||
if (unlikely(access_error(hw_error_code, vma))) {
|
if (unlikely(access_error(error_code, vma))) {
|
||||||
bad_area_access_error(regs, hw_error_code, address, vma);
|
bad_area_access_error(regs, error_code, address, vma);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1392,11 +1383,14 @@ good_area:
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags, regs);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
/* Quick path to respond to signals */
|
|
||||||
if (fault_signal_pending(fault, regs)) {
|
if (fault_signal_pending(fault, regs)) {
|
||||||
|
/*
|
||||||
|
* Quick path to respond to signals. The core mm code
|
||||||
|
* has unlocked the mm for us if we get here.
|
||||||
|
*/
|
||||||
if (!user_mode(regs))
|
if (!user_mode(regs))
|
||||||
no_context(regs, hw_error_code, address, SIGBUS,
|
kernelmode_fixup_or_oops(regs, error_code, address,
|
||||||
BUS_ADRERR);
|
SIGBUS, BUS_ADRERR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1412,12 +1406,37 @@ good_area:
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
if (likely(!(fault & VM_FAULT_ERROR)))
|
||||||
mm_fault_error(regs, hw_error_code, address, fault);
|
return;
|
||||||
|
|
||||||
|
if (fatal_signal_pending(current) && !user_mode(regs)) {
|
||||||
|
kernelmode_fixup_or_oops(regs, error_code, address, 0, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
check_v8086_mode(regs, address, tsk);
|
if (fault & VM_FAULT_OOM) {
|
||||||
|
/* Kernel mode? Handle exceptions or die: */
|
||||||
|
if (!user_mode(regs)) {
|
||||||
|
kernelmode_fixup_or_oops(regs, error_code, address,
|
||||||
|
SIGSEGV, SEGV_MAPERR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We ran out of memory, call the OOM killer, and return the
|
||||||
|
* userspace (which will retry the fault, or kill us if we got
|
||||||
|
* oom-killed):
|
||||||
|
*/
|
||||||
|
pagefault_out_of_memory();
|
||||||
|
} else {
|
||||||
|
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
||||||
|
VM_FAULT_HWPOISON_LARGE))
|
||||||
|
do_sigbus(regs, error_code, address, fault);
|
||||||
|
else if (fault & VM_FAULT_SIGSEGV)
|
||||||
|
bad_area_nosemaphore(regs, error_code, address);
|
||||||
|
else
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(do_user_addr_fault);
|
NOKPROBE_SYMBOL(do_user_addr_fault);
|
||||||
|
|
||||||
|
|
|
@ -157,16 +157,25 @@ __ref void *alloc_low_pages(unsigned int num)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
|
* By default need to be able to allocate page tables below PGD firstly for
|
||||||
* With KASLR memory randomization, depending on the machine e820 memory
|
* the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping.
|
||||||
* and the PUD alignment. We may need twice more pages when KASLR memory
|
* With KASLR memory randomization, depending on the machine e820 memory and the
|
||||||
|
* PUD alignment, twice that many pages may be needed when KASLR memory
|
||||||
* randomization is enabled.
|
* randomization is enabled.
|
||||||
*/
|
*/
|
||||||
#ifndef CONFIG_RANDOMIZE_MEMORY
|
|
||||||
#define INIT_PGD_PAGE_COUNT 6
|
#ifndef CONFIG_X86_5LEVEL
|
||||||
|
#define INIT_PGD_PAGE_TABLES 3
|
||||||
#else
|
#else
|
||||||
#define INIT_PGD_PAGE_COUNT 12
|
#define INIT_PGD_PAGE_TABLES 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef CONFIG_RANDOMIZE_MEMORY
|
||||||
|
#define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
|
||||||
|
#else
|
||||||
|
#define INIT_PGD_PAGE_COUNT (4 * INIT_PGD_PAGE_TABLES)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
|
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
|
||||||
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
|
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
|
||||||
void __init early_alloc_pgt_buf(void)
|
void __init early_alloc_pgt_buf(void)
|
||||||
|
|
|
@ -10,8 +10,6 @@
|
||||||
|
|
||||||
#define pr_fmt(fmt) "mmiotrace: " fmt
|
#define pr_fmt(fmt) "mmiotrace: " fmt
|
||||||
|
|
||||||
#define DEBUG 1
|
|
||||||
|
|
||||||
#include <linux/moduleparam.h>
|
#include <linux/moduleparam.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
|
@ -687,15 +687,25 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
|
||||||
* @return: Returns, if the page fault is not handled. This function
|
* @return: Returns, if the page fault is not handled. This function
|
||||||
* will never return if the page fault is handled successfully.
|
* will never return if the page fault is handled successfully.
|
||||||
*/
|
*/
|
||||||
void efi_recover_from_page_fault(unsigned long phys_addr)
|
void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
|
||||||
{
|
{
|
||||||
if (!IS_ENABLED(CONFIG_X86_64))
|
if (!IS_ENABLED(CONFIG_X86_64))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure that an efi runtime service caused the page fault.
|
* If we get an interrupt/NMI while processing an EFI runtime service
|
||||||
|
* then this is a regular OOPS, not an EFI failure.
|
||||||
*/
|
*/
|
||||||
if (efi_rts_work.efi_rts_id == EFI_NONE)
|
if (in_interrupt())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure that an efi runtime service caused the page fault.
|
||||||
|
* READ_ONCE() because we might be OOPSing in a different thread,
|
||||||
|
* and we don't want to trip KTSAN while trying to OOPS.
|
||||||
|
*/
|
||||||
|
if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE ||
|
||||||
|
current_work() != &efi_rts_work.work)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -747,6 +757,4 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
|
||||||
set_current_state(TASK_IDLE);
|
set_current_state(TASK_IDLE);
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue