diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 67eec55093a5..783099f2ac72 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". -config DEBUG_NX_TEST - tristate "Testcase for the NX non-executable stack feature" - depends on DEBUG_KERNEL && m - ---help--- - This option enables a testcase for the CPU NX capability - and the software setup of this feature. - If in doubt, say "N" - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EXPERT diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d34bd370074b..7afb0e2f07f4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt +/* + * The default ioremap() behavior is non-cached; if you need something + * else, you probably want one of the following. + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); +#define ioremap_uc ioremap_uc + +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); + /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); -#define ioremap_uc ioremap_uc - -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); @@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void); */ #define xlate_dev_kmem_ptr(p) p +/** + * memset_io Set a range of I/O memory to a constant value + * @addr: The beginning of the I/O-memory range to set + * @val: The value to set the memory to + * @count: The number of bytes to set + * + * Set a range of I/O memory to a given value. + */ static inline void memset_io(volatile void __iomem *addr, unsigned char val, size_t count) { memset((void __force *)addr, val, count); } +/** + * memcpy_fromio Copy a block of data from I/O memory + * @dst: The (RAM) destination for the copy + * @src: The (I/O memory) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data from I/O memory. + */ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) { memcpy(dst, (const void __force *)src, count); } +/** + * memcpy_toio Copy a block of data into I/O memory + * @dst: The (I/O memory) destination for the copy + * @src: The (RAM) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data to I/O memory. + */ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 581386c7e429..bdcdb3b3a219 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o -obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c deleted file mode 100644 index a3b875c9e6af..000000000000 --- a/arch/x86/kernel/test_nx.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * test_nx.c: functional test for NX functionality - * - * (C) Copyright 2008 Intel Corporation - * Author: Arjan van de Ven - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include -#include -#include - -#include -#include - -extern int rodata_test_data; - -/* - * This file checks 4 things: - * 1) Check if the stack is not executable - * 2) Check if kmalloc memory is not executable - * 3) Check if the .rodata section is not executable - * 4) Check if the .data section of a module is not executable - * - * To do this, the test code tries to execute memory in stack/kmalloc/etc, - * and then checks if the expected trap happens. - * - * Sadly, this implies having a dynamic exception handling table entry. - * ... which can be done (and will make Rusty cry)... but it can only - * be done in a stand-alone module with only 1 entry total. - * (otherwise we'd have to sort and that's just too messy) - */ - - - -/* - * We want to set up an exception handling point on our stack, - * which means a variable value. This function is rather dirty - * and walks the exception table of the module, looking for a magic - * marker and replaces it with a specific function. - */ -static void fudze_exception_table(void *marker, void *new) -{ - struct module *mod = THIS_MODULE; - struct exception_table_entry *extable; - - /* - * Note: This module has only 1 exception table entry, - * so searching and sorting is not needed. If that changes, - * this would be the place to search and re-sort the exception - * table. - */ - if (mod->num_exentries > 1) { - printk(KERN_ERR "test_nx: too many exception table entries!\n"); - printk(KERN_ERR "test_nx: test results are not reliable.\n"); - return; - } - extable = (struct exception_table_entry *)mod->extable; - extable[0].insn = (unsigned long)new; -} - - -/* - * exception tables get their symbols translated so we need - * to use a fake function to put in there, which we can then - * replace at runtime. - */ -void foo_label(void); - -/* - * returns 0 for not-executable, negative for executable - * - * Note: we cannot allow this function to be inlined, because - * that would give us more than 1 exception table entry. - * This in turn would break the assumptions above. - */ -static noinline int test_address(void *address) -{ - unsigned long result; - - /* Set up an exception table entry for our address */ - fudze_exception_table(&foo_label, address); - result = 1; - asm volatile( - "foo_label:\n" - "0: call *%[fake_code]\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: mov %[zero], %[rslt]\n" - " ret\n" - ".previous\n" - _ASM_EXTABLE(0b,2b) - : [rslt] "=r" (result) - : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) - ); - /* change the exception table back for the next round */ - fudze_exception_table(address, &foo_label); - - if (result) - return -ENODEV; - return 0; -} - -static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */ - -static int test_NX(void) -{ - int ret = 0; - /* 0xC3 is the opcode for "ret" */ - char stackcode[] = {0xC3, 0x90, 0 }; - char *heap; - - test_data = 0xC3; - - printk(KERN_INFO "Testing NX protection\n"); - - /* Test 1: check if the stack is not executable */ - if (test_address(&stackcode)) { - printk(KERN_ERR "test_nx: stack was executable\n"); - ret = -ENODEV; - } - - - /* Test 2: Check if the heap is executable */ - heap = kmalloc(64, GFP_KERNEL); - if (!heap) - return -ENOMEM; - heap[0] = 0xC3; /* opcode for "ret" */ - - if (test_address(heap)) { - printk(KERN_ERR "test_nx: heap was executable\n"); - ret = -ENODEV; - } - kfree(heap); - - /* - * The following 2 tests currently fail, this needs to get fixed - * Until then, don't run them to avoid too many people getting scared - * by the error message - */ - - /* Test 3: Check if the .rodata section is executable */ - if (rodata_test_data != 0xC3) { - printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); - ret = -ENODEV; - } else if (test_address(&rodata_test_data)) { - printk(KERN_ERR "test_nx: .rodata section is executable\n"); - ret = -ENODEV; - } - -#if 0 - /* Test 4: Check if the .data section of a module is executable */ - if (test_address(&test_data)) { - printk(KERN_ERR "test_nx: .data section is executable\n"); - ret = -ENODEV; - } - -#endif - return ret; -} - -static void test_exit(void) -{ -} - -module_init(test_NX); -module_exit(test_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the NX infrastructure"); -MODULE_AUTHOR("Arjan van de Ven "); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 8aa6bea1cd6c..58b5bee7ea27 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -18,6 +18,7 @@ #include #include +#include #include /* @@ -51,6 +52,10 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif # ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, # endif @@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = { { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, { 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/* VMEMMAP_START */, "Vmemmap" }, +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "KASAN shadow" }, + { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif # ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, # endif @@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, #if PTRS_PER_PUD > 1 +/* + * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y + * KASAN fills page tables with the same values. Since there is no + * point in checking page table more than once we just skip repeated + * entries. This saves us dozens of seconds during boot. + */ +static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) +{ + return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); +} + static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; pud_t *start; pgprotval_t prot; + pud_t *prev_pud = NULL; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { + if (!pud_none(*start) && + !pud_already_checked(prev_pud, start, st->check_wx)) { if (pud_large(*start) || !pud_present(*start)) { prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); @@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } else note_page(m, st, __pgprot(0), 2); + prev_pud = start; start++; } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5a287e523eab..28d42130243c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; +#ifdef CONFIG_PREEMPT + /* + * Avoid wbinvd() because it causes latencies on all CPUs, + * regardless of any CPU isolation that may be in effect. + * + * This should be extended for CAT enabled systems independent of + * PREEMPT because wbinvd() does not respect the CAT partitions and + * this is exposed to unpriviledged users through the graphics + * subsystem. + */ + unsigned long do_wbinvd = 0; +#else unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ +#endif BUG_ON(irqs_disabled()); diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 159b52ccd600..d76485b22824 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node) { u64 ret = 0; if (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); ret = data->subtree_max_end; } return ret; @@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, struct memtype *last_lower = NULL; while (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); if (get_subtree_max_end(node->rb_left) > start) { /* Lowest overlap if any must be on left side */ @@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root, node = rb_next(&match->rb); if (node) - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); else match = NULL; } @@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root, node = rb_next(&match->rb); while (node) { - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); if (match->start >= end) /* Checked all possible matches */ goto success; @@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct rb_node *parent = NULL; while (*node) { - struct memtype *data = container_of(*node, struct memtype, rb); + struct memtype *data = rb_entry(*node, struct memtype, rb); parent = *node; if (data->subtree_max_end < newdata->end) @@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) } if (node) { /* pos == i */ - struct memtype *this = container_of(node, struct memtype, rb); + struct memtype *this = rb_entry(node, struct memtype, rb); *out = *this; return 0; } else { diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 25d4067c11e4..83d8b1c6cb0e 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl \ + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ protection_keys test_vdso TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \