From d52e7d5a952c5e35783f96e8c5b7fcffbb0d7c60 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 13 May 2017 13:46:28 +0800 Subject: [PATCH 1/6] x86/KASLR: Parse all 'memmap=' boot option entries In commit: f28442497b5c ("x86/boot: Fix KASLR and memmap= collision") ... the memmap= option is parsed so that KASLR can avoid those reserved regions. It uses cmdline_find_option() to get the value if memmap= is specified, however the problem is that cmdline_find_option() can only find the last entry if multiple memmap entries are provided. This is not correct. Address this by checking each command line token for a "memmap=" match and parse each instance instead of using cmdline_find_option(). Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: douly.fnst@cn.fujitsu.com Cc: dyoung@redhat.com Cc: m.mizuma@jp.fujitsu.com Link: http://lkml.kernel.org/r/1494654390-23861-2-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/cmdline.c | 2 +- arch/x86/boot/compressed/kaslr.c | 136 +++++++++++++++++------------ arch/x86/boot/string.c | 8 ++ 3 files changed, 91 insertions(+), 55 deletions(-) diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 73ccf63b0f48..9dc1ce6ba3c0 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -13,7 +13,7 @@ static inline char rdfs8(addr_t addr) return *((char *)(fs + addr)); } #include "../cmdline.c" -static unsigned long get_cmd_line_ptr(void) +unsigned long get_cmd_line_ptr(void) { unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 54c24f0a43d3..106e13b89ff6 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -9,16 +9,41 @@ * contain the entire properly aligned running kernel image. * */ + +/* + * isspace() in linux/ctype.h is expected by next_args() to filter + * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, + * since isdigit() is implemented in both of them. Hence disable it + * here. + */ +#define BOOT_CTYPE_H + +/* + * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. + * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL + * which is meaningless and will cause compiling error in some cases. + * So do not include linux/export.h and define EXPORT_SYMBOL(sym) + * as empty. + */ +#define _LINUX_EXPORT_H +#define EXPORT_SYMBOL(sym) + #include "misc.h" #include "error.h" -#include "../boot.h" #include #include #include #include +#include #include +/* Macros used by the included decompressor code below. */ +#define STATIC +#include + +extern unsigned long get_cmd_line_ptr(void); + /* Simplified build-specific string for starting entropy. */ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; @@ -62,6 +87,7 @@ struct mem_vector { static bool memmap_too_large; + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, @@ -85,49 +111,14 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } -/** - * _memparse - Parse a string with mem suffixes into a number - * @ptr: Where parse begins - * @retptr: (output) Optional pointer to next char after parse completes - * - * Parses a string into a number. The number stored at @ptr is - * potentially suffixed with K, M, G, T, P, E. - */ -static unsigned long long _memparse(const char *ptr, char **retptr) +char *skip_spaces(const char *str) { - char *endptr; /* Local pointer to end of parsed string */ - - unsigned long long ret = simple_strtoull(ptr, &endptr, 0); - - switch (*endptr) { - case 'E': - case 'e': - ret <<= 10; - case 'P': - case 'p': - ret <<= 10; - case 'T': - case 't': - ret <<= 10; - case 'G': - case 'g': - ret <<= 10; - case 'M': - case 'm': - ret <<= 10; - case 'K': - case 'k': - ret <<= 10; - endptr++; - default: - break; - } - - if (retptr) - *retptr = endptr; - - return ret; + while (isspace(*str)) + ++str; + return (char *)str; } +#include "../../../../lib/ctype.c" +#include "../../../../lib/cmdline.c" static int parse_memmap(char *p, unsigned long long *start, unsigned long long *size) @@ -142,7 +133,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) return -EINVAL; oldp = p; - *size = _memparse(p, &p); + *size = memparse(p, &p); if (p == oldp) return -EINVAL; @@ -155,27 +146,21 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) case '#': case '$': case '!': - *start = _memparse(p + 1, &p); + *start = memparse(p + 1, &p); return 0; } return -EINVAL; } -static void mem_avoid_memmap(void) +static void mem_avoid_memmap(char *str) { - char arg[128]; + static int i; int rc; - int i; - char *str; - /* See if we have any memmap areas */ - rc = cmdline_find_option("memmap", arg, sizeof(arg)); - if (rc <= 0) + if (i >= MAX_MEMMAP_REGIONS) return; - i = 0; - str = arg; while (str && (i < MAX_MEMMAP_REGIONS)) { int rc; unsigned long long start, size; @@ -202,6 +187,49 @@ static void mem_avoid_memmap(void) memmap_too_large = true; } + +/* + * handle_mem_memmap will also cover 'mem=' issue in next patch. Will remove + * this note later. + */ +static int handle_mem_memmap(void) +{ + char *args = (char *)get_cmd_line_ptr(); + size_t len = strlen((char *)args); + char *tmp_cmdline; + char *param, *val; + + if (!strstr(args, "memmap=")) + return 0; + + tmp_cmdline = malloc(len + 1); + if (!tmp_cmdline ) + error("Failed to allocate space for tmp_cmdline"); + + memcpy(tmp_cmdline, args, len); + tmp_cmdline[len] = 0; + args = tmp_cmdline; + + /* Chew leading spaces */ + args = skip_spaces(args); + + while (*args) { + args = next_arg(args, ¶m, &val); + /* Stop at -- */ + if (!val && strcmp(param, "--") == 0) { + warn("Only '--' specified in cmdline"); + free(tmp_cmdline); + return -1; + } + + if (!strcmp(param, "memmap")) + mem_avoid_memmap(val); + } + + free(tmp_cmdline); + return 0; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -323,7 +351,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ /* Mark the memmap regions we need to avoid */ - mem_avoid_memmap(); + handle_mem_memmap(); #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 5457b02fc050..630e3664906b 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -122,6 +122,14 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas return result; } +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} + /** * strlen - Find the length of a string * @s: The string to be sized From 4cdba14f84c9102c4434384731cd61018b970d59 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 13 May 2017 13:46:29 +0800 Subject: [PATCH 2/6] x86/KASLR: Handle the memory limit specified by the 'memmap=' and 'mem=' boot options The 'mem=' boot option limits the max address a system can use - any memory region above the limit will be removed. Furthermore, the 'memmap=nn[KMG]' variant (with no offset specified) has the same behaviour as 'mem='. KASLR needs to consider this when choosing the random position for decompressing the kernel. Do it. Tested-by: Masayoshi Mizuma Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: douly.fnst@cn.fujitsu.com Cc: dyoung@redhat.com Link: http://lkml.kernel.org/r/1494654390-23861-3-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 68 +++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 106e13b89ff6..e0eba12bffe7 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -88,6 +88,10 @@ struct mem_vector { static bool memmap_too_large; +/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ +unsigned long long mem_limit = ULLONG_MAX; + + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, @@ -138,16 +142,23 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) return -EINVAL; switch (*p) { - case '@': - /* Skip this region, usable */ - *start = 0; - *size = 0; - return 0; case '#': case '$': case '!': *start = memparse(p + 1, &p); return 0; + case '@': + /* memmap=nn@ss specifies usable region, should be skipped */ + *size = 0; + /* Fall through */ + default: + /* + * If w/o offset, only size specified, memmap=nn[KMG] has the + * same behaviour as mem=nn[KMG]. It limits the max address + * system can use. Region above the limit should be avoided. + */ + *start = 0; + return 0; } return -EINVAL; @@ -173,9 +184,14 @@ static void mem_avoid_memmap(char *str) if (rc < 0) break; str = k; - /* A usable region that should not be skipped */ - if (size == 0) + + if (start == 0) { + /* Store the specified memory limit if size > 0 */ + if (size > 0) + mem_limit = size; + continue; + } mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; @@ -187,19 +203,15 @@ static void mem_avoid_memmap(char *str) memmap_too_large = true; } - -/* - * handle_mem_memmap will also cover 'mem=' issue in next patch. Will remove - * this note later. - */ static int handle_mem_memmap(void) { char *args = (char *)get_cmd_line_ptr(); size_t len = strlen((char *)args); char *tmp_cmdline; char *param, *val; + u64 mem_size; - if (!strstr(args, "memmap=")) + if (!strstr(args, "memmap=") && !strstr(args, "mem=")) return 0; tmp_cmdline = malloc(len + 1); @@ -222,8 +234,20 @@ static int handle_mem_memmap(void) return -1; } - if (!strcmp(param, "memmap")) + if (!strcmp(param, "memmap")) { mem_avoid_memmap(val); + } else if (!strcmp(param, "mem")) { + char *p = val; + + if (!strcmp(p, "nopentium")) + continue; + mem_size = memparse(p, &p); + if (mem_size == 0) { + free(tmp_cmdline); + return -EINVAL; + } + mem_limit = mem_size; + } } free(tmp_cmdline); @@ -460,7 +484,8 @@ static void process_e820_entry(struct boot_e820_entry *entry, { struct mem_vector region, overlap; struct slot_area slot_area; - unsigned long start_orig; + unsigned long start_orig, end; + struct boot_e820_entry cur_entry; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) @@ -474,8 +499,15 @@ static void process_e820_entry(struct boot_e820_entry *entry, if (entry->addr + entry->size < minimum) return; - region.start = entry->addr; - region.size = entry->size; + /* Ignore entries above memory limit */ + end = min(entry->size + entry->addr, mem_limit); + if (entry->addr >= end) + return; + cur_entry.addr = entry->addr; + cur_entry.size = end - entry->addr; + + region.start = cur_entry.addr; + region.size = cur_entry.size; /* Give up if slot area array is full. */ while (slot_area_index < MAX_SLOT_AREA) { @@ -489,7 +521,7 @@ static void process_e820_entry(struct boot_e820_entry *entry, region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); /* Did we raise the address above this e820 region? */ - if (region.start > entry->addr + entry->size) + if (region.start > cur_entry.addr + cur_entry.size) return; /* Reduce size by any delta from the original address. */ From 8fcc9bc3eaa2ef8345e2b4b22e3a88804ac46337 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 13 May 2017 13:46:30 +0800 Subject: [PATCH 3/6] Documentation/kernel-parameters.txt: Update 'memmap=' boot option description In commit: 9710f581bb4c ("x86, mm: Let "memmap=" take more entries one time") ... 'memmap=' was changed to adopt multiple, comma delimited values in a single entry, so update the related description. In the special case of only specifying size value without an offset, like memmap=nn[KMG], memmap behaves similarly to mem=nn[KMG], so update it too here. Furthermore, for memmap=nn[KMG]$ss[KMG], an escape character needs be added before '$' for some bootloaders. E.g in grub2, if we specify memmap=100M$5G as suggested by the documentation, "memmap=100MG" gets passed to the kernel. Clarify all this. Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: douly.fnst@cn.fujitsu.com Cc: dyoung@redhat.com Cc: m.mizuma@jp.fujitsu.com Link: http://lkml.kernel.org/r/1494654390-23861-4-git-send-email-bhe@redhat.com [ Various spelling fixes. ] Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..4e4c3402412e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2127,6 +2127,12 @@ memmap=nn[KMG]@ss[KMG] [KNL] Force usage of a specific region of memory. Region of memory to be used is from ss to ss+nn. + If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG], + which limits max address to nn[KMG]. + Multiple different regions can be specified, + comma delimited. + Example: + memmap=100M@2G,100M#3G,1G!1024G memmap=nn[KMG]#ss[KMG] [KNL,ACPI] Mark specific memory as ACPI data. @@ -2139,6 +2145,9 @@ memmap=64K$0x18690000 or memmap=0x10000$0x18690000 + Some bootloaders may need an escape character before '$', + like Grub2, otherwise '$' and the following number + will be eaten. memmap=nn[KMG]!ss[KMG] [KNL,X86] Mark specific memory as protected. From 5b8b9cf76add98e19ff8ceb4247c2920687591a0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 May 2017 11:14:17 +0200 Subject: [PATCH 4/6] x86/KASLR: Use the right memcpy() implementation The decompressor has its own implementation of the string functions, but has to include the right header to get those, while implicitly including linux/string.h may result in a link error: arch/x86/boot/compressed/kaslr.o: In function `choose_random_location': kaslr.c:(.text+0xf51): undefined reference to `_mmx_memcpy' This has appeared now as KASLR started using memcpy(), via: d52e7d5a952c ("x86/KASLR: Parse all 'memmap=' boot option entries") Other files in the decompressor already do the same thing. Signed-off-by: Arnd Bergmann Acked-by: Baoquan He Cc: Dave Jiang Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Garnier Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170530091446.1000183-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e0eba12bffe7..fe318b44f7b8 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -30,6 +30,7 @@ #include "misc.h" #include "error.h" +#include "../string.h" #include #include From 28be1b454c2bb60e317b3135211a378fa2718886 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 31 May 2017 10:12:39 +0200 Subject: [PATCH 5/6] x86/boot: Remove unused copy_*_gs() functions copy_from_gs() and copy_to_gs() are unused in the boot code. They have actually never been used -- they were always commented out since their addition in 2007: 5be865661516 ("String-handling functions for the new x86 setup code.") So remove them -- they can be restored from history if needed. Signed-off-by: Jiri Slaby Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170531081243.5709-1-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/boot/copy.S | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 1eb7d298b47d..15d9f74b0008 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -65,23 +65,3 @@ GLOBAL(copy_to_fs) popw %es retl ENDPROC(copy_to_fs) - -#if 0 /* Not currently used, but can be enabled as needed */ -GLOBAL(copy_from_gs) - pushw %ds - pushw %gs - popw %ds - calll memcpy - popw %ds - retl -ENDPROC(copy_from_gs) - -GLOBAL(copy_to_gs) - pushw %es - pushw %gs - popw %es - calll memcpy - popw %es - retl -ENDPROC(copy_to_gs) -#endif From fe2d48b805d01e14ddb8144de01de43171eb516f Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Fri, 16 Jun 2017 18:16:02 +0200 Subject: [PATCH 6/6] x86/debug: Extend the lower bound of crash kernel low reservations The following change in 2013: 0212f9159694 ("x86: Add Crash kernel low reservation") ... introduced reserve_crashkernel_low(). This function is used to reserve crash kernel memory either if crashkernel=size,low is given on the command line or if the region reserved by reserve_crashkernel is entirely above 4G. reserve_crashkernel_low() tries to find a block of 'low_size' bytes. But there seems to be no good reason to restrict the lower bound of the range to 'low_size'. Make memblock_find_in_range() search from the start of memory. Signed-off-by: Jiri Bohac Cc: Eric Biederman Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20170616161602.2r7birrf2y3ylv6v@dwarf.suse.cz Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0b4d3c686b1e..848d0489ad00 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -503,7 +503,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20));