- PTRACE_GETREGS/PTRACE_PUTREGS regset selection cleanup
- Another initial cleanup - more to follow - to the fault handling code. - Other minor cleanups and corrections. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmAqU0oACgkQEsHwGGHe VUruWw//VA+/K7Ykd8tjZdmJPWdfsdqBtOrolh4hiajM6iYckTip/FdwHpeEQwM9 ff0iNMrxICG3gbQxCX6WNzPeJatYsnjtF67whfat2SEzNHSDtZDb1Bm20s2/1fbY OurRBTEBzuYMolpEJ2XABpu7LQ+6TV3LJ6yUBungILMOjP7KvrCK0SUrWj253VDU XljK5XBZnmYlEjPU6dlhn64Wsl/GD7AWCAeZGq47EgjH2cR6gxNmu9kYAArGbdiJ WjF8MWE7qVwCPUTiCBv+P1CjsQawvlcUY54wtG65dBYAZvpjmN82T2ypguzAt8KT 12A38vFlBuEUAWC0rUymNouh8Q20AElpdw/odLElHkpNxbHhf/7RyZ1E00LjsFtn MF9Gp9aSIQbfYWK+Hin9oRvqXckV08u3KtzUNeyMbdCmpyqHh6prj8JEZaxKZZUp zCaX8Qasn+Q9zL0DO51WI9EPOwpvSpifUYHmd5RHGbQDW9DjYK4mkBCHhjVfYXd/ NcxRO5rrMLmMG+XuNPg9vuHMi2HJnClJ6odD6b80xGvBodTZxZnqnYO9tUImbYnW pdmt73YDvakei8XY7cAdNWcsTi0kQYZGfInna6z43Ri2l+I1TZaoKGDqn7TbzNbb 9RB0lrD0tfW0PvvDbVwco0Q+8/ykIbvPkHPvjQGWioxHi6yI49s= =uVEk -----END PGP SIGNATURE----- Merge tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mm cleanups from Borislav Petkov: - PTRACE_GETREGS/PTRACE_PUTREGS regset selection cleanup - Another initial cleanup - more to follow - to the fault handling code. - Other minor cleanups and corrections. * tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/{fault,efi}: Fix and rename efi_recover_from_page_fault() x86/fault: Don't run fixups for SMAP violations x86/fault: Don't look for extable entries for SMEP violations x86/fault: Rename no_context() to kernelmode_fixup_or_oops() x86/fault: Bypass no_context() for implicit kernel faults from usermode x86/fault: Split the OOPS code out from no_context() x86/fault: Improve kernel-executing-user-memory handling x86/fault: Correct a few user vs kernel checks wrt WRUSS x86/fault: Document the locking in the fault_signal_pending() path x86/fault/32: Move is_f00f_bug() to do_kern_addr_fault() x86/fault: Fold mm_fault_error() into do_user_addr_fault() x86/fault: Skip the AMD erratum #91 workaround on unaffected CPUs x86/fault: Fix AMD erratum #91 errata fixup for user code x86/Kconfig: Remove HPET_EMULATE_RTC depends on RTC x86/asm: Fixup TASK_SIZE_MAX comment x86/ptrace: Clean up PTRACE_GETREGS/PTRACE_PUTREGS regset selection x86/vm86/32: Remove VM86_SCREEN_BITMAP support x86: Remove definition of DEBUG x86/entry: Remove now unused do_IRQ() declaration x86/mm: Remove duplicate definition of _PAGE_PAT_LARGE ...
This commit is contained in:
commit
ae821d2107
|
@ -890,7 +890,7 @@ config HPET_TIMER
|
|||
|
||||
config HPET_EMULATE_RTC
|
||||
def_bool y
|
||||
depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
|
||||
depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
|
||||
|
||||
config APB_TIMER
|
||||
def_bool y if X86_INTEL_MID
|
||||
|
|
|
@ -139,7 +139,7 @@ extern void __init efi_dump_pagetable(void);
|
|||
extern void __init efi_apply_memmap_quirks(void);
|
||||
extern int __init efi_reuse_config(u64 tables, int nr_tables);
|
||||
extern void efi_delete_dummy_variable(void);
|
||||
extern void efi_recover_from_page_fault(unsigned long phys_addr);
|
||||
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
|
||||
extern void efi_free_boot_services(void);
|
||||
|
||||
void efi_enter_mm(void);
|
||||
|
|
|
@ -40,8 +40,6 @@ extern void native_init_IRQ(void);
|
|||
|
||||
extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
|
||||
|
||||
extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
|
||||
|
||||
extern void init_ISA_irqs(void);
|
||||
|
||||
extern void __init init_IRQ(void);
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
|
||||
* address, then that syscall will enter the kernel with a
|
||||
* non-canonical return address, and SYSRET will explode dangerously.
|
||||
* We avoid this particular problem by preventing anything executable
|
||||
* We avoid this particular problem by preventing anything
|
||||
* from being mapped at the maximum canonical address.
|
||||
*
|
||||
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
|
||||
|
|
|
@ -177,8 +177,6 @@ enum page_cache_mode {
|
|||
#define __pgprot(x) ((pgprot_t) { (x) } )
|
||||
#define __pg(x) __pgprot(x)
|
||||
|
||||
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
||||
|
||||
#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
|
||||
#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
|
||||
#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
|
||||
|
|
|
@ -36,7 +36,6 @@ struct vm86 {
|
|||
unsigned long saved_sp0;
|
||||
|
||||
unsigned long flags;
|
||||
unsigned long screen_bitmap;
|
||||
unsigned long cpu_type;
|
||||
struct revectored_struct int_revectored;
|
||||
struct revectored_struct int21_revectored;
|
||||
|
|
|
@ -97,7 +97,7 @@ struct revectored_struct {
|
|||
struct vm86_struct {
|
||||
struct vm86_regs regs;
|
||||
unsigned long flags;
|
||||
unsigned long screen_bitmap;
|
||||
unsigned long screen_bitmap; /* unused, preserved by vm86() */
|
||||
unsigned long cpu_type;
|
||||
struct revectored_struct int_revectored;
|
||||
struct revectored_struct int21_revectored;
|
||||
|
@ -106,7 +106,7 @@ struct vm86_struct {
|
|||
/*
|
||||
* flags masks
|
||||
*/
|
||||
#define VM86_SCREEN_BITMAP 0x0001
|
||||
#define VM86_SCREEN_BITMAP 0x0001 /* no longer supported */
|
||||
|
||||
struct vm86plus_info_struct {
|
||||
unsigned long force_return_for_pic:1;
|
||||
|
|
|
@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
|
|||
if (!size_base)
|
||||
continue;
|
||||
|
||||
size_base = to_size_factor(size_base, &size_factor),
|
||||
size_base = to_size_factor(size_base, &size_factor);
|
||||
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
|
||||
start_base = to_size_factor(start_base, &start_factor),
|
||||
start_base = to_size_factor(start_base, &start_factor);
|
||||
type = range_state[i].type;
|
||||
|
||||
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
|
||||
* because MTRRs can span up to 40 bits (36bits on most modern x86)
|
||||
*/
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/init.h>
|
||||
|
|
|
@ -31,8 +31,6 @@
|
|||
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
||||
*/
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
||||
|
||||
#include <linux/stop_machine.h>
|
||||
|
|
|
@ -4,9 +4,6 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
|
||||
#define DEBUG 1
|
||||
|
||||
static struct iommu_table_entry * __init
|
||||
find_dependents_of(struct iommu_table_entry *start,
|
||||
struct iommu_table_entry *finish,
|
||||
|
|
|
@ -704,6 +704,9 @@ void ptrace_disable(struct task_struct *child)
|
|||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
static const struct user_regset_view user_x86_64_view; /* Initialized below. */
|
||||
#endif
|
||||
|
||||
long arch_ptrace(struct task_struct *child, long request,
|
||||
unsigned long addr, unsigned long data)
|
||||
|
@ -711,6 +714,14 @@ long arch_ptrace(struct task_struct *child, long request,
|
|||
int ret;
|
||||
unsigned long __user *datap = (unsigned long __user *)data;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* This is native 64-bit ptrace() */
|
||||
const struct user_regset_view *regset_view = &user_x86_64_view;
|
||||
#else
|
||||
/* This is native 32-bit ptrace() */
|
||||
const struct user_regset_view *regset_view = &user_x86_32_view;
|
||||
#endif
|
||||
|
||||
switch (request) {
|
||||
/* read the word at location addr in the USER area. */
|
||||
case PTRACE_PEEKUSR: {
|
||||
|
@ -749,28 +760,28 @@ long arch_ptrace(struct task_struct *child, long request,
|
|||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
@ -1152,28 +1163,28 @@ static long x32_arch_ptrace(struct task_struct *child,
|
|||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
@ -1309,6 +1320,25 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
|
|||
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used by the core dump code to decide which regset to dump. The
|
||||
* core dump code writes out the resulting .e_machine and the corresponding
|
||||
* regsets. This is suboptimal if the task is messing around with its CS.L
|
||||
* field, but at worst the core dump will end up missing some information.
|
||||
*
|
||||
* Unfortunately, it is also used by the broken PTRACE_GETREGSET and
|
||||
* PTRACE_SETREGSET APIs. These APIs look at the .regsets field but have
|
||||
* no way to make sure that the e_machine they use matches the caller's
|
||||
* expectations. The result is that the data format returned by
|
||||
* PTRACE_GETREGSET depends on the returned CS field (and even the offset
|
||||
* of the returned CS field depends on its value!) and the data format
|
||||
* accepted by PTRACE_SETREGSET is determined by the old CS value. The
|
||||
* upshot is that it is basically impossible to use these APIs correctly.
|
||||
*
|
||||
* The best way to fix it in the long run would probably be to add new
|
||||
* improved ptrace() APIs to read and write registers reliably, possibly by
|
||||
* allowing userspace to select the ELF e_machine variant that they expect.
|
||||
*/
|
||||
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
|
|
@ -90,14 +90,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
|
|||
unsigned long, prot, unsigned long, flags,
|
||||
unsigned long, fd, unsigned long, off)
|
||||
{
|
||||
long error;
|
||||
error = -EINVAL;
|
||||
if (off & ~PAGE_MASK)
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
|
||||
error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
||||
out:
|
||||
return error;
|
||||
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void find_start_end(unsigned long addr, unsigned long flags,
|
||||
|
|
|
@ -134,7 +134,11 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
|||
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
|
||||
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
|
||||
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
|
||||
unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
|
||||
|
||||
/*
|
||||
* Don't write screen_bitmap in case some user had a value there
|
||||
* and expected it to remain unchanged.
|
||||
*/
|
||||
|
||||
user_access_end();
|
||||
|
||||
|
@ -160,49 +164,6 @@ Efault:
|
|||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
int i;
|
||||
|
||||
mmap_write_lock(mm);
|
||||
pgd = pgd_offset(mm, 0xA0000);
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
goto out;
|
||||
p4d = p4d_offset(pgd, 0xA0000);
|
||||
if (p4d_none_or_clear_bad(p4d))
|
||||
goto out;
|
||||
pud = pud_offset(p4d, 0xA0000);
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
goto out;
|
||||
pmd = pmd_offset(pud, 0xA0000);
|
||||
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
vma = find_vma(mm, 0xA0000);
|
||||
split_huge_pmd(vma, pmd, 0xA0000);
|
||||
}
|
||||
if (pmd_none_or_clear_bad(pmd))
|
||||
goto out;
|
||||
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (pte_present(*pte))
|
||||
set_pte(pte, pte_wrprotect(*pte));
|
||||
pte++;
|
||||
}
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
out:
|
||||
mmap_write_unlock(mm);
|
||||
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||
|
||||
|
@ -282,6 +243,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
|||
offsetof(struct vm86_struct, int_revectored)))
|
||||
return -EFAULT;
|
||||
|
||||
|
||||
/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
|
||||
if (v.flags & VM86_SCREEN_BITMAP) {
|
||||
char comm[TASK_COMM_LEN];
|
||||
|
||||
pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||
|
||||
vm86regs.pt.bx = v.regs.ebx;
|
||||
|
@ -302,7 +272,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
|||
vm86regs.gs = v.regs.gs;
|
||||
|
||||
vm86->flags = v.flags;
|
||||
vm86->screen_bitmap = v.screen_bitmap;
|
||||
vm86->cpu_type = v.cpu_type;
|
||||
|
||||
if (copy_from_user(&vm86->int_revectored,
|
||||
|
@ -370,9 +339,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
|||
update_task_stack(tsk);
|
||||
preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||
return regs->ax;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <linux/prefetch.h> /* prefetchw */
|
||||
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
||||
#include <linux/uaccess.h> /* faulthandler_disabled() */
|
||||
#include <linux/efi.h> /* efi_recover_from_page_fault()*/
|
||||
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
||||
|
@ -25,7 +25,7 @@
|
|||
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
||||
#include <asm/vm86.h> /* struct vm86 */
|
||||
#include <asm/mmu_context.h> /* vma_pkey() */
|
||||
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
||||
#include <asm/efi.h> /* efi_crash_gracefully_on_page_fault()*/
|
||||
#include <asm/desc.h> /* store_idt(), ... */
|
||||
#include <asm/cpu_entry_area.h> /* exception stack */
|
||||
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
|
||||
|
@ -54,7 +54,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|||
* 32-bit mode:
|
||||
*
|
||||
* Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
|
||||
* Check that here and ignore it.
|
||||
* Check that here and ignore it. This is AMD erratum #91.
|
||||
*
|
||||
* 64-bit mode:
|
||||
*
|
||||
|
@ -83,11 +83,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
|||
#ifdef CONFIG_X86_64
|
||||
case 0x40:
|
||||
/*
|
||||
* In AMD64 long mode 0x40..0x4F are valid REX prefixes
|
||||
* Need to figure out under what instruction mode the
|
||||
* instruction was issued. Could check the LDT for lm,
|
||||
* but for now it's good enough to assume that long
|
||||
* mode only uses well known segments or kernel.
|
||||
* In 64-bit mode 0x40..0x4F are valid REX prefixes
|
||||
*/
|
||||
return (!user_mode(regs) || user_64bit_mode(regs));
|
||||
#endif
|
||||
|
@ -110,6 +106,15 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_amd_k8_pre_npt(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
return unlikely(IS_ENABLED(CONFIG_CPU_SUP_AMD) &&
|
||||
c->x86_vendor == X86_VENDOR_AMD &&
|
||||
c->x86 == 0xf && c->x86_model < 0x40);
|
||||
}
|
||||
|
||||
static int
|
||||
is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
{
|
||||
|
@ -117,6 +122,10 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
|||
unsigned char *instr;
|
||||
int prefetch = 0;
|
||||
|
||||
/* Erratum #91 affects AMD K8, pre-NPT CPUs */
|
||||
if (!is_amd_k8_pre_npt())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If it was a exec (instruction fetch) fault on NX page, then
|
||||
* do not ignore the fault:
|
||||
|
@ -127,20 +136,31 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
|||
instr = (void *)convert_ip_to_linear(current, regs);
|
||||
max_instr = instr + 15;
|
||||
|
||||
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
|
||||
return 0;
|
||||
/*
|
||||
* This code has historically always bailed out if IP points to a
|
||||
* not-present page (e.g. due to a race). No one has ever
|
||||
* complained about this.
|
||||
*/
|
||||
pagefault_disable();
|
||||
|
||||
while (instr < max_instr) {
|
||||
unsigned char opcode;
|
||||
|
||||
if (get_kernel_nofault(opcode, instr))
|
||||
break;
|
||||
if (user_mode(regs)) {
|
||||
if (get_user(opcode, instr))
|
||||
break;
|
||||
} else {
|
||||
if (get_kernel_nofault(opcode, instr))
|
||||
break;
|
||||
}
|
||||
|
||||
instr++;
|
||||
|
||||
if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
|
||||
break;
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
return prefetch;
|
||||
}
|
||||
|
||||
|
@ -262,25 +282,6 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Did it hit the DOS screen memory VA from vm86 mode?
|
||||
*/
|
||||
static inline void
|
||||
check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_VM86
|
||||
unsigned long bit;
|
||||
|
||||
if (!v8086_mode(regs) || !tsk->thread.vm86)
|
||||
return;
|
||||
|
||||
bit = (address - 0xA0000) >> PAGE_SHIFT;
|
||||
if (bit < 32)
|
||||
tsk->thread.vm86->screen_bitmap |= 1 << bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool low_pfn(unsigned long pfn)
|
||||
{
|
||||
return pfn < max_low_pfn;
|
||||
|
@ -335,15 +336,6 @@ KERN_ERR
|
|||
"******* Disabling USB legacy in the BIOS may also help.\n";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* No vm86 mode in 64-bit mode:
|
||||
*/
|
||||
static inline void
|
||||
check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
|
||||
static int bad_address(void *p)
|
||||
{
|
||||
unsigned long dummy;
|
||||
|
@ -427,6 +419,9 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
|
|||
|| boot_cpu_data.x86 != 0xf)
|
||||
return 0;
|
||||
|
||||
if (user_mode(regs))
|
||||
return 0;
|
||||
|
||||
if (address != regs->ip)
|
||||
return 0;
|
||||
|
||||
|
@ -462,10 +457,12 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
|
|||
}
|
||||
|
||||
/* Pentium F0 0F C7 C8 bug workaround: */
|
||||
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
|
||||
static int is_f00f_bug(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address)
|
||||
{
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
if (boot_cpu_has_bug(X86_BUG_F00F) && idt_is_f00f_address(address)) {
|
||||
if (boot_cpu_has_bug(X86_BUG_F00F) && !(error_code & X86_PF_USER) &&
|
||||
idt_is_f00f_address(address)) {
|
||||
handle_invalid_op(regs);
|
||||
return 1;
|
||||
}
|
||||
|
@ -630,22 +627,87 @@ static void set_signal_archinfo(unsigned long address,
|
|||
}
|
||||
|
||||
static noinline void
|
||||
no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, int signal, int si_code)
|
||||
page_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long flags;
|
||||
int sig;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
/*
|
||||
* This is an implicit supervisor-mode access from user
|
||||
* mode. Bypass all the kernel-mode recovery code and just
|
||||
* OOPS.
|
||||
* Implicit kernel access from user mode? Skip the stack
|
||||
* overflow and EFI special cases.
|
||||
*/
|
||||
goto oops;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* Stack overflow? During boot, we can fault near the initial
|
||||
* stack in the direct map, but that's not an overflow -- check
|
||||
* that we're in vmalloc space to avoid this.
|
||||
*/
|
||||
if (is_vmalloc_addr((void *)address) &&
|
||||
(((unsigned long)current->stack - 1 - address < PAGE_SIZE) ||
|
||||
address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
||||
/*
|
||||
* We're likely to be running with very little stack space
|
||||
* left. It's plausible that we'd hit this condition but
|
||||
* double-fault even before we get this far, in which case
|
||||
* we're fine: the double-fault handler will deal with it.
|
||||
*
|
||||
* We don't want to make it all the way into the oops code
|
||||
* and then double-fault, though, because we're likely to
|
||||
* break the console driver and lose most of the stack dump.
|
||||
*/
|
||||
asm volatile ("movq %[stack], %%rsp\n\t"
|
||||
"call handle_stack_overflow\n\t"
|
||||
"1: jmp 1b"
|
||||
: ASM_CALL_CONSTRAINT
|
||||
: "D" ("kernel stack overflow (page fault)"),
|
||||
"S" (regs), "d" (address),
|
||||
[stack] "rm" (stack));
|
||||
unreachable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Buggy firmware could access regions which might page fault. If
|
||||
* this happens, EFI has a special OOPS path that will try to
|
||||
* avoid hanging the system.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_EFI))
|
||||
efi_crash_gracefully_on_page_fault(address);
|
||||
|
||||
oops:
|
||||
/*
|
||||
* Oops. The kernel tried to access some bad page. We'll have to
|
||||
* terminate things with extreme prejudice:
|
||||
*/
|
||||
flags = oops_begin();
|
||||
|
||||
show_fault_oops(regs, error_code, address);
|
||||
|
||||
if (task_stack_end_corrupted(current))
|
||||
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
|
||||
|
||||
sig = SIGKILL;
|
||||
if (__die("Oops", regs, error_code))
|
||||
sig = 0;
|
||||
|
||||
/* Executive summary in case the body of the oops scrolled away */
|
||||
printk(KERN_DEFAULT "CR2: %016lx\n", address);
|
||||
|
||||
oops_end(flags, regs, sig);
|
||||
}
|
||||
|
||||
static noinline void
|
||||
kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, int signal, int si_code)
|
||||
{
|
||||
WARN_ON_ONCE(user_mode(regs));
|
||||
|
||||
/* Are we prepared to handle this kernel fault? */
|
||||
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
|
||||
/*
|
||||
|
@ -677,81 +739,14 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* Stack overflow? During boot, we can fault near the initial
|
||||
* stack in the direct map, but that's not an overflow -- check
|
||||
* that we're in vmalloc space to avoid this.
|
||||
*/
|
||||
if (is_vmalloc_addr((void *)address) &&
|
||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
||||
/*
|
||||
* We're likely to be running with very little stack space
|
||||
* left. It's plausible that we'd hit this condition but
|
||||
* double-fault even before we get this far, in which case
|
||||
* we're fine: the double-fault handler will deal with it.
|
||||
*
|
||||
* We don't want to make it all the way into the oops code
|
||||
* and then double-fault, though, because we're likely to
|
||||
* break the console driver and lose most of the stack dump.
|
||||
*/
|
||||
asm volatile ("movq %[stack], %%rsp\n\t"
|
||||
"call handle_stack_overflow\n\t"
|
||||
"1: jmp 1b"
|
||||
: ASM_CALL_CONSTRAINT
|
||||
: "D" ("kernel stack overflow (page fault)"),
|
||||
"S" (regs), "d" (address),
|
||||
[stack] "rm" (stack));
|
||||
unreachable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit:
|
||||
*
|
||||
* Valid to do another page fault here, because if this fault
|
||||
* had been triggered by is_prefetch fixup_exception would have
|
||||
* handled it.
|
||||
*
|
||||
* 64-bit:
|
||||
*
|
||||
* Hall of shame of CPU/BIOS bugs.
|
||||
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
|
||||
* instruction.
|
||||
*/
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
|
||||
if (is_errata93(regs, address))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Buggy firmware could access regions which might page fault, try to
|
||||
* recover from such faults.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_EFI))
|
||||
efi_recover_from_page_fault(address);
|
||||
|
||||
oops:
|
||||
/*
|
||||
* Oops. The kernel tried to access some bad page. We'll have to
|
||||
* terminate things with extreme prejudice:
|
||||
*/
|
||||
flags = oops_begin();
|
||||
|
||||
show_fault_oops(regs, error_code, address);
|
||||
|
||||
if (task_stack_end_corrupted(tsk))
|
||||
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
|
||||
|
||||
sig = SIGKILL;
|
||||
if (__die("Oops", regs, error_code))
|
||||
sig = 0;
|
||||
|
||||
/* Executive summary in case the body of the oops scrolled away */
|
||||
printk(KERN_DEFAULT "CR2: %016lx\n", address);
|
||||
|
||||
oops_end(flags, regs, sig);
|
||||
page_fault_oops(regs, error_code, address);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -796,47 +791,49 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
if (user_mode(regs) && (error_code & X86_PF_USER)) {
|
||||
/*
|
||||
* It's possible to have interrupts off here:
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Valid to do another page fault here because this one came
|
||||
* from user space:
|
||||
*/
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
|
||||
if (is_errata100(regs, address))
|
||||
return;
|
||||
|
||||
sanitize_error_code(address, &error_code);
|
||||
|
||||
if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
|
||||
return;
|
||||
|
||||
if (likely(show_unhandled_signals))
|
||||
show_signal_msg(regs, error_code, address, tsk);
|
||||
|
||||
set_signal_archinfo(address, error_code);
|
||||
|
||||
if (si_code == SEGV_PKUERR)
|
||||
force_sig_pkuerr((void __user *)address, pkey);
|
||||
|
||||
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_f00f_bug(regs, address))
|
||||
if (!(error_code & X86_PF_USER)) {
|
||||
/* Implicit user access to kernel memory -- just oops */
|
||||
page_fault_oops(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* User mode accesses just cause a SIGSEGV.
|
||||
* It's possible to have interrupts off here:
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Valid to do another page fault here because this one came
|
||||
* from user space:
|
||||
*/
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
|
||||
no_context(regs, error_code, address, SIGSEGV, si_code);
|
||||
if (is_errata100(regs, address))
|
||||
return;
|
||||
|
||||
sanitize_error_code(address, &error_code);
|
||||
|
||||
if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
|
||||
return;
|
||||
|
||||
if (likely(show_unhandled_signals))
|
||||
show_signal_msg(regs, error_code, address, tsk);
|
||||
|
||||
set_signal_archinfo(address, error_code);
|
||||
|
||||
if (si_code == SEGV_PKUERR)
|
||||
force_sig_pkuerr((void __user *)address, pkey);
|
||||
|
||||
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
|
||||
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
static noinline void
|
||||
|
@ -926,8 +923,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
|||
vm_fault_t fault)
|
||||
{
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||
if (!user_mode(regs)) {
|
||||
kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -961,40 +958,6 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
|||
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
|
||||
}
|
||||
|
||||
static noinline void
|
||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, vm_fault_t fault)
|
||||
{
|
||||
if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address,
|
||||
SIGSEGV, SEGV_MAPERR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We ran out of memory, call the OOM killer, and return the
|
||||
* userspace (which will retry the fault, or kill us if we got
|
||||
* oom-killed):
|
||||
*/
|
||||
pagefault_out_of_memory();
|
||||
} else {
|
||||
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
||||
VM_FAULT_HWPOISON_LARGE))
|
||||
do_sigbus(regs, error_code, address, fault);
|
||||
else if (fault & VM_FAULT_SIGSEGV)
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
|
||||
{
|
||||
if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||
|
@ -1209,6 +1172,9 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
|
|||
}
|
||||
#endif
|
||||
|
||||
if (is_f00f_bug(regs, hw_error_code, address))
|
||||
return;
|
||||
|
||||
/* Was the fault spurious, caused by lazy TLB invalidation? */
|
||||
if (spurious_kernel_fault(hw_error_code, address))
|
||||
return;
|
||||
|
@ -1229,10 +1195,17 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
|
|||
}
|
||||
NOKPROBE_SYMBOL(do_kern_addr_fault);
|
||||
|
||||
/* Handle faults in the user portion of the address space */
|
||||
/*
|
||||
* Handle faults in the user portion of the address space. Nothing in here
|
||||
* should check X86_PF_USER without a specific justification: for almost
|
||||
* all purposes, we should treat a normal kernel access to user memory
|
||||
* (e.g. get_user(), put_user(), etc.) the same as the WRUSS instruction.
|
||||
* The one exception is AC flag handling, which is, per the x86
|
||||
* architecture, special for WRUSS.
|
||||
*/
|
||||
static inline
|
||||
void do_user_addr_fault(struct pt_regs *regs,
|
||||
unsigned long hw_error_code,
|
||||
unsigned long error_code,
|
||||
unsigned long address)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -1244,6 +1217,21 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
|
||||
if (unlikely((error_code & (X86_PF_USER | X86_PF_INSTR)) == X86_PF_INSTR)) {
|
||||
/*
|
||||
* Whoops, this is kernel mode code trying to execute from
|
||||
* user memory. Unless this is AMD erratum #93, which
|
||||
* corrupts RIP such that it looks like a user address,
|
||||
* this is unrecoverable. Don't even try to look up the
|
||||
* VMA or look for extable entries.
|
||||
*/
|
||||
if (is_errata93(regs, address))
|
||||
return;
|
||||
|
||||
page_fault_oops(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
/* kprobes don't want to hook the spurious faults: */
|
||||
if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
|
||||
return;
|
||||
|
@ -1252,8 +1240,8 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
* Reserved bits are never expected to be set on
|
||||
* entries in the user portion of the page tables.
|
||||
*/
|
||||
if (unlikely(hw_error_code & X86_PF_RSVD))
|
||||
pgtable_bad(regs, hw_error_code, address);
|
||||
if (unlikely(error_code & X86_PF_RSVD))
|
||||
pgtable_bad(regs, error_code, address);
|
||||
|
||||
/*
|
||||
* If SMAP is on, check for invalid kernel (supervisor) access to user
|
||||
|
@ -1263,10 +1251,13 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
* enforcement appears to be consistent with the USER bit.
|
||||
*/
|
||||
if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
|
||||
!(hw_error_code & X86_PF_USER) &&
|
||||
!(regs->flags & X86_EFLAGS_AC)))
|
||||
{
|
||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
||||
!(error_code & X86_PF_USER) &&
|
||||
!(regs->flags & X86_EFLAGS_AC))) {
|
||||
/*
|
||||
* No extable entry here. This was a kernel access to an
|
||||
* invalid pointer. get_kernel_nofault() will not get here.
|
||||
*/
|
||||
page_fault_oops(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1275,7 +1266,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
* in a region with pagefaults disabled then we must not take the fault
|
||||
*/
|
||||
if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1296,9 +1287,9 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
if (hw_error_code & X86_PF_WRITE)
|
||||
if (error_code & X86_PF_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
if (hw_error_code & X86_PF_INSTR)
|
||||
if (error_code & X86_PF_INSTR)
|
||||
flags |= FAULT_FLAG_INSTRUCTION;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -1314,7 +1305,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
* to consider the PF_PK bit.
|
||||
*/
|
||||
if (is_vsyscall_vaddr(address)) {
|
||||
if (emulate_vsyscall(hw_error_code, regs, address))
|
||||
if (emulate_vsyscall(error_code, regs, address))
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -1337,7 +1328,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
* Fault from code in kernel from
|
||||
* which we do not expect faults.
|
||||
*/
|
||||
bad_area_nosemaphore(regs, hw_error_code, address);
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
retry:
|
||||
|
@ -1353,17 +1344,17 @@ retry:
|
|||
|
||||
vma = find_vma(mm, address);
|
||||
if (unlikely(!vma)) {
|
||||
bad_area(regs, hw_error_code, address);
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
if (likely(vma->vm_start <= address))
|
||||
goto good_area;
|
||||
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
|
||||
bad_area(regs, hw_error_code, address);
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
if (unlikely(expand_stack(vma, address))) {
|
||||
bad_area(regs, hw_error_code, address);
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1372,8 +1363,8 @@ retry:
|
|||
* we can handle it..
|
||||
*/
|
||||
good_area:
|
||||
if (unlikely(access_error(hw_error_code, vma))) {
|
||||
bad_area_access_error(regs, hw_error_code, address, vma);
|
||||
if (unlikely(access_error(error_code, vma))) {
|
||||
bad_area_access_error(regs, error_code, address, vma);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1392,11 +1383,14 @@ good_area:
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags, regs);
|
||||
|
||||
/* Quick path to respond to signals */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
/*
|
||||
* Quick path to respond to signals. The core mm code
|
||||
* has unlocked the mm for us if we get here.
|
||||
*/
|
||||
if (!user_mode(regs))
|
||||
no_context(regs, hw_error_code, address, SIGBUS,
|
||||
BUS_ADRERR);
|
||||
kernelmode_fixup_or_oops(regs, error_code, address,
|
||||
SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1412,12 +1406,37 @@ good_area:
|
|||
}
|
||||
|
||||
mmap_read_unlock(mm);
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
mm_fault_error(regs, hw_error_code, address, fault);
|
||||
if (likely(!(fault & VM_FAULT_ERROR)))
|
||||
return;
|
||||
|
||||
if (fatal_signal_pending(current) && !user_mode(regs)) {
|
||||
kernelmode_fixup_or_oops(regs, error_code, address, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
check_v8086_mode(regs, address, tsk);
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!user_mode(regs)) {
|
||||
kernelmode_fixup_or_oops(regs, error_code, address,
|
||||
SIGSEGV, SEGV_MAPERR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We ran out of memory, call the OOM killer, and return the
|
||||
* userspace (which will retry the fault, or kill us if we got
|
||||
* oom-killed):
|
||||
*/
|
||||
pagefault_out_of_memory();
|
||||
} else {
|
||||
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
|
||||
VM_FAULT_HWPOISON_LARGE))
|
||||
do_sigbus(regs, error_code, address, fault);
|
||||
else if (fault & VM_FAULT_SIGSEGV)
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_user_addr_fault);
|
||||
|
||||
|
|
|
@ -157,16 +157,25 @@ __ref void *alloc_low_pages(unsigned int num)
|
|||
}
|
||||
|
||||
/*
|
||||
* By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
|
||||
* With KASLR memory randomization, depending on the machine e820 memory
|
||||
* and the PUD alignment. We may need twice more pages when KASLR memory
|
||||
* By default need to be able to allocate page tables below PGD firstly for
|
||||
* the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping.
|
||||
* With KASLR memory randomization, depending on the machine e820 memory and the
|
||||
* PUD alignment, twice that many pages may be needed when KASLR memory
|
||||
* randomization is enabled.
|
||||
*/
|
||||
#ifndef CONFIG_RANDOMIZE_MEMORY
|
||||
#define INIT_PGD_PAGE_COUNT 6
|
||||
|
||||
#ifndef CONFIG_X86_5LEVEL
|
||||
#define INIT_PGD_PAGE_TABLES 3
|
||||
#else
|
||||
#define INIT_PGD_PAGE_COUNT 12
|
||||
#define INIT_PGD_PAGE_TABLES 4
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_RANDOMIZE_MEMORY
|
||||
#define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
|
||||
#else
|
||||
#define INIT_PGD_PAGE_COUNT (4 * INIT_PGD_PAGE_TABLES)
|
||||
#endif
|
||||
|
||||
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
|
||||
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
|
||||
void __init early_alloc_pgt_buf(void)
|
||||
|
|
|
@ -10,8 +10,6 @@
|
|||
|
||||
#define pr_fmt(fmt) "mmiotrace: " fmt
|
||||
|
||||
#define DEBUG 1
|
||||
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/slab.h>
|
||||
|
|
|
@ -687,15 +687,25 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
|
|||
* @return: Returns, if the page fault is not handled. This function
|
||||
* will never return if the page fault is handled successfully.
|
||||
*/
|
||||
void efi_recover_from_page_fault(unsigned long phys_addr)
|
||||
void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_X86_64))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Make sure that an efi runtime service caused the page fault.
|
||||
* If we get an interrupt/NMI while processing an EFI runtime service
|
||||
* then this is a regular OOPS, not an EFI failure.
|
||||
*/
|
||||
if (efi_rts_work.efi_rts_id == EFI_NONE)
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Make sure that an efi runtime service caused the page fault.
|
||||
* READ_ONCE() because we might be OOPSing in a different thread,
|
||||
* and we don't want to trip KTSAN while trying to OOPS.
|
||||
*/
|
||||
if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE ||
|
||||
current_work() != &efi_rts_work.work)
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -747,6 +757,4 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
|
|||
set_current_state(TASK_IDLE);
|
||||
schedule();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue