From 5122daa017578ebc4818de8fb3c9d0e131f8b335 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Jul 2017 09:31:29 +0100 Subject: [PATCH 1/5] x86/platform/uv/BAU: Minor cleanup, make some local functions static The functions handle_uv2_busy, uv_flush_send_and_wait and find_another_by_swack are local to the source, so make them static. Also remove normal_busy as it is no longer used. Fixes various smatch warnings, such as: "symbol 'find_another_by_swack' was not declared. Should it be static?" "symbol 'handle_uv2_busy' was not declared. Should it be static?" Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Mike Travis Cc: Andrew Banman Cc: kernel-janitors@vger.kernel.org Cc: Dimitri Sivanich Cc: Dou Liyang Link: http://lkml.kernel.org/r/20170704083129.10559-1-colin.king@canonical.com --- arch/x86/platform/uv/tlb_uv.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 2983faab5b18..d4a61ddf9e62 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -587,32 +587,12 @@ static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; } -/* - * Return whether the status of the descriptor that is normally used for this - * cpu (the one indexed by its hub-relative cpu number) is busy. - * The status of the original 32 descriptors is always reflected in the 64 - * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0. - * The bit provided by the activation_status_2 register is irrelevant to - * the status if it is only being tested for busy or not busy. - */ -int normal_busy(struct bau_control *bcp) -{ - int cpu = bcp->uvhub_cpu; - int mmr_offset; - int right_shift; - - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - return (((((read_lmmr(mmr_offset) >> right_shift) & - UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY); -} - /* * Entered when a bau descriptor has gone into a permanent busy wait because * of a hardware bug. * Workaround the bug. */ -int handle_uv2_busy(struct bau_control *bcp) +static int handle_uv2_busy(struct bau_control *bcp) { struct ptc_stats *stat = bcp->statp; @@ -917,8 +897,9 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, * Returns 1 if it gives up entirely and the original cpu mask is to be * returned to the kernel. */ -int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, - struct bau_desc *bau_desc) +static int uv_flush_send_and_wait(struct cpumask *flush_mask, + struct bau_control *bcp, + struct bau_desc *bau_desc) { int seq_number = 0; int completion_stat = 0; @@ -1212,8 +1193,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, * Search the message queue for any 'other' unprocessed message with the * same software acknowledge resource bit vector as the 'msg' message. */ -struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, - struct bau_control *bcp) +static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, + struct bau_control *bcp) { struct bau_pq_entry *msg_next = msg + 1; unsigned char swack_vec = msg->swack_vec; From 99c13b8c8896d7bcb92753bf0c63a8de4326e78d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 4 Jul 2017 19:04:23 -0400 Subject: [PATCH 2/5] x86/mm/pat: Don't report PAT on CPUs that don't support it The pat_enabled() logic is broken on CPUs which do not support PAT and where the initialization code fails to call pat_init(). Due to that the enabled flag stays true and pat_enabled() returns true wrongfully. As a consequence the mappings, e.g. for Xorg, are set up with the wrong caching mode and the required MTRR setups are omitted. To cure this the following changes are required: 1) Make pat_enabled() return true only if PAT initialization was invoked and successful. 2) Invoke init_cache_modes() unconditionally in setup_arch() and remove the extra callsites in pat_disable() and the pat disabled code path in pat_init(). Also rename __pat_enabled to pat_disabled to reflect the real purpose of this variable. Fixes: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled") Signed-off-by: Mikulas Patocka Signed-off-by: Thomas Gleixner Cc: Bernhard Held Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: "Luis R. Rodriguez" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Andrew Morton Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1707041749300.3456@file01.intranet.prod.int.rdu2.redhat.com --- arch/x86/include/asm/pat.h | 1 + arch/x86/kernel/setup.c | 7 +++++++ arch/x86/mm/pat.c | 28 ++++++++++++---------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 0b1ff4c1c14e..fffb2794dd89 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -7,6 +7,7 @@ bool pat_enabled(void); void pat_disable(const char *reason); extern void pat_init(void); +extern void init_cache_modes(void); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 65622f07e633..3486d0498800 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1075,6 +1075,13 @@ void __init setup_arch(char **cmdline_p) max_possible_pfn = max_pfn; + /* + * This call is required when the CPU does not support PAT. If + * mtrr_bp_init() invoked it already via pat_init() the call has no + * effect. + */ + init_cache_modes(); + /* * Define random base addresses for memory sections after max_pfn is * defined and before each memory section base is used. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 9b78685b66e6..45979502f64b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -37,14 +37,14 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool boot_cpu_done; - -static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); -static void init_cache_modes(void); +static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __read_mostly pat_initialized; +static bool __read_mostly init_cm_done; void pat_disable(const char *reason) { - if (!__pat_enabled) + if (pat_disabled) return; if (boot_cpu_done) { @@ -52,10 +52,8 @@ void pat_disable(const char *reason) return; } - __pat_enabled = 0; + pat_disabled = true; pr_info("x86/PAT: %s\n", reason); - - init_cache_modes(); } static int __init nopat(char *str) @@ -67,7 +65,7 @@ early_param("nopat", nopat); bool pat_enabled(void) { - return !!__pat_enabled; + return pat_initialized; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -205,6 +203,8 @@ static void __init_cache_modes(u64 pat) update_cache_mode_entry(i, cache); } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); + + init_cm_done = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) @@ -225,6 +225,7 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); + pat_initialized = true; __init_cache_modes(pat); } @@ -242,10 +243,9 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -static void init_cache_modes(void) +void init_cache_modes(void) { u64 pat = 0; - static int init_cm_done; if (init_cm_done) return; @@ -287,8 +287,6 @@ static void init_cache_modes(void) } __init_cache_modes(pat); - - init_cm_done = 1; } /** @@ -306,10 +304,8 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!pat_enabled()) { - init_cache_modes(); + if (pat_disabled) return; - } if ((c->x86_vendor == X86_VENDOR_INTEL) && (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || From b7a67e02cd2b0d632114dcfb4bfb9b1d85dee325 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:06:28 +0800 Subject: [PATCH 3/5] x86/boot/e820: Avoid overwriting e820_table_firmware The following commit in 2013: 77ea8c948953 ("x86: Reserve setup_data ranges late after parsing memmap cmdline") has fixed the issue of losing setup_data information by deferring the e820_reserve_setup_data() call until the early params have been parsed. But this also introduced a new problem that, during early params parsing, the kexec kernel might fake a mptable and saves it into the e820_table_firmware[] table (without saving the mptable to the e820_table[]), however the subsequent invoking of e820_reserve_setup_data() will overwrite the e820_table_firmware[] according to the e820_table[], thus the fake mptable information is lost. Fix this issue by updating the e820_table_firmware[] according to the setup_data information, but without overwriting it. Signed-off-by: Chen Yu Cc: Dave Young Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d78a586ba8dc..c87223e5b1aa 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -669,6 +669,8 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) __append_e820_table(extmap, entries); e820__update_table(e820_table); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); e820__print_table("extended"); @@ -923,13 +925,13 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_firmware(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); pa_data = data->next; early_memunmap(data, sizeof(*data)); } e820__update_table(e820_table); - - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + e820__update_table(e820_table_firmware); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); From a09bae0f8aa08d4d76d0ebece26062a49ec51ac9 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:07:12 +0800 Subject: [PATCH 4/5] x86/boot/e820: Rename the e820_table_firmware to e820_table_kexec Currently the e820_table_firmware[] table is mainly used by the kexec, and it is not what it's supposed to be - despite its name it might be modified by the kernel. So change its name to e820_table_kexec[]. In the next patch we will introduce the real e820_table_firmware[] table. No functional change. Signed-off-by: Chen Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/e820/api.h | 2 +- arch/x86/kernel/e820.c | 42 +++++++++++++++---------------- arch/x86/kernel/kexec-bzimage64.c | 4 +-- arch/x86/power/hibernate_64.c | 4 +-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 8e0f8b85b209..a688095421ab 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -4,7 +4,7 @@ #include extern struct e820_table *e820_table; -extern struct e820_table *e820_table_firmware; +extern struct e820_table *e820_table_kexec; extern unsigned long pci_mem_start; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c87223e5b1aa..591019031e23 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -22,7 +22,7 @@ /* * We organize the E820 table into two main data structures: * - * - 'e820_table_firmware': the original firmware version passed to us by the + * - 'e820_table_kexec': the original firmware version passed to us by the * bootloader - not modified by the kernel. We use this to: * * - inform the user about the firmware's notion of memory layout @@ -46,10 +46,10 @@ * specific memory layout data during early bootup. */ static struct e820_table e820_table_init __initdata; -static struct e820_table e820_table_firmware_init __initdata; +static struct e820_table e820_table_kexec_init __initdata; struct e820_table *e820_table __refdata = &e820_table_init; -struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; +struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -470,9 +470,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum return __e820__range_update(e820_table, start, size, old_type, new_type); } -static u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) +static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) { - return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); + return __e820__range_update(e820_table_kexec, start, size, old_type, new_type); } /* Remove a range of memory from the E820 table: */ @@ -546,9 +546,9 @@ void __init e820__update_table_print(void) e820__print_table("modified"); } -static void __init e820__update_table_firmware(void) +static void __init e820__update_table_kexec(void) { - e820__update_table(e820_table_firmware); + e820__update_table(e820_table_kexec); } #define MAX_GAP_END 0x100000000ull @@ -623,7 +623,7 @@ __init void e820__setup_pci_gap(void) /* * Called late during init, in free_initmem(). * - * Initial e820_table and e820_table_firmware are largish __initdata arrays. + * Initial e820_table and e820_table_kexec are largish __initdata arrays. * * Copy them to a (usually much smaller) dynamically allocated area that is * sized precisely after the number of e820 entries. @@ -643,11 +643,11 @@ __init void e820__reallocate_tables(void) memcpy(n, e820_table, size); e820_table = n; - size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_table_firmware, size); - e820_table_firmware = n; + memcpy(n, e820_table_kexec, size); + e820_table_kexec = n; } /* @@ -669,7 +669,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) __append_e820_table(extmap, entries); e820__update_table(e820_table); - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); @@ -729,7 +729,7 @@ core_initcall(e820__register_nvs_regions); /* * Allocate the requested number of bytes with the requsted alignment * and return (the physical address) to the caller. Also register this - * range in the 'firmware' E820 table as a reserved range. + * range in the 'kexec' E820 table as a reserved range. * * This allows kexec to fake a new mptable, as if it came from the real * system. @@ -740,9 +740,9 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); if (addr) { - e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); - pr_info("e820: update e820_table_firmware for e820__memblock_alloc_reserved()\n"); - e820__update_table_firmware(); + e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n"); + e820__update_table_kexec(); } return addr; @@ -925,13 +925,13 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_firmware(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); pa_data = data->next; early_memunmap(data, sizeof(*data)); } e820__update_table(e820_table); - e820__update_table(e820_table_firmware); + e820__update_table(e820_table_kexec); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); @@ -1064,8 +1064,8 @@ void __init e820__reserve_resources(void) res++; } - for (i = 0; i < e820_table_firmware->nr_entries; i++) { - struct e820_entry *entry = e820_table_firmware->entries + i; + for (i = 0; i < e820_table_kexec->nr_entries; i++) { + struct e820_entry *entry = e820_table_kexec->entries + i; firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); } @@ -1177,7 +1177,7 @@ void __init e820__memory_setup(void) who = x86_init.resources.memory_setup(); - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); pr_info("e820: BIOS-provided physical RAM map:\n"); e820__print_table(who); diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 9d7fd5e6689a..fb095ba0c02f 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -100,14 +100,14 @@ static int setup_e820_entries(struct boot_params *params) { unsigned int nr_e820_entries; - nr_e820_entries = e820_table_firmware->nr_entries; + nr_e820_entries = e820_table_kexec->nr_entries; /* TODO: Pass entries more than E820_MAX_ENTRIES_ZEROPAGE in bootparams setup data */ if (nr_e820_entries > E820_MAX_ENTRIES_ZEROPAGE) nr_e820_entries = E820_MAX_ENTRIES_ZEROPAGE; params->e820_entries = nr_e820_entries; - memcpy(¶ms->e820_table, &e820_table_firmware->entries, nr_e820_entries*sizeof(struct e820_entry)); + memcpy(¶ms->e820_table, &e820_table_kexec->entries, nr_e820_entries*sizeof(struct e820_entry)); return 0; } diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index e3e62c8a8e70..3ba161a08d6e 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -251,7 +251,7 @@ static int get_e820_md5(struct e820_table *table, void *buf) static void hibernation_e820_save(void *buf) { - get_e820_md5(e820_table_firmware, buf); + get_e820_md5(e820_table_kexec, buf); } static bool hibernation_e820_mismatch(void *buf) @@ -264,7 +264,7 @@ static bool hibernation_e820_mismatch(void *buf) if (!memcmp(result, buf, MD5_DIGEST_SIZE)) return false; - ret = get_e820_md5(e820_table_firmware, result); + ret = get_e820_md5(e820_table_kexec, result); if (ret) return true; From 12df216c61c89e31e27e74146115a9728880ca6f Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:07:32 +0800 Subject: [PATCH 5/5] x86/boot/e820: Introduce the bootloader provided e820_table_firmware[] table Add the real e820_tabel_firmware[] that will not be modified by the kernel or the EFI boot stub under any circumstance. In addition to that modify the code so that e820_table_firmwarep[] is exposed via sysfs to represent the real firmware memory layout, rather than exposing the e820_table_kexec[] table. This fixes a hibernation bug/warning, which uses e820_table_kexec[] to check RAM layout consistency across hibernation/resume: The suspend kernel: [ 0.000000] e820: update [mem 0x76671018-0x76679457] usable ==> usable The resume kernel: [ 0.000000] e820: update [mem 0x7666f018-0x76677457] usable ==> usable ... [ 15.752088] PM: Using 3 thread(s) for decompression. [ 15.752088] PM: Loading and decompressing image data (471870 pages)... [ 15.764971] Hibernate inconsistent memory map detected! [ 15.770833] PM: Image mismatch: architecture specific data Actually it is safe to restore these pages because E820_TYPE_RAM and E820_TYPE_RESERVED_KERN are treated the same during hibernation, so the original e820 table provided by the bootloader is used for hibernation MD5 fingerprint checking. The side effect is that, this newly introduced variable might increase the kernel size at compile time. Suggested-by: Ingo Molnar Signed-off-by: Chen Yu Cc: Dave Young Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/e820/api.h | 1 + arch/x86/kernel/e820.c | 31 ++++++++++++++++++++++++++----- arch/x86/power/hibernate_64.c | 4 ++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index a688095421ab..a504adc661a4 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -5,6 +5,7 @@ extern struct e820_table *e820_table; extern struct e820_table *e820_table_kexec; +extern struct e820_table *e820_table_firmware; extern unsigned long pci_mem_start; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 591019031e23..532da61d605c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -20,10 +20,12 @@ #include /* - * We organize the E820 table into two main data structures: + * We organize the E820 table into three main data structures: * - * - 'e820_table_kexec': the original firmware version passed to us by the - * bootloader - not modified by the kernel. We use this to: + * - 'e820_table_firmware': the original firmware version passed to us by the + * bootloader - not modified by the kernel. It is composed of two parts: + * the first 128 E820 memory entries in boot_params.e820_table and the remaining + * (if any) entries of the SETUP_E820_EXT nodes. We use this to: * * - inform the user about the firmware's notion of memory layout * via /sys/firmware/memmap @@ -31,6 +33,14 @@ * - the hibernation code uses it to generate a kernel-independent MD5 * fingerprint of the physical memory layout of a system. * + * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version + * passed to us by the bootloader - the major difference between + * e820_table_firmware[] and this one is that, the latter marks the setup_data + * list created by the EFI boot stub as reserved, so that kexec can reuse the + * setup_data information in the second kernel. Besides, e820_table_kexec[] + * might also be modified by the kexec itself to fake a mptable. + * We use this to: + * * - kexec, which is a bootloader in disguise, uses the original E820 * layout to pass to the kexec-ed kernel. This way the original kernel * can have a restricted E820 map while the kexec()-ed kexec-kernel @@ -47,9 +57,11 @@ */ static struct e820_table e820_table_init __initdata; static struct e820_table e820_table_kexec_init __initdata; +static struct e820_table e820_table_firmware_init __initdata; struct e820_table *e820_table __refdata = &e820_table_init; struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init; +struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -648,6 +660,12 @@ __init void e820__reallocate_tables(void) BUG_ON(!n); memcpy(n, e820_table_kexec, size); e820_table_kexec = n; + + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; + n = kmalloc(size, GFP_KERNEL); + BUG_ON(!n); + memcpy(n, e820_table_firmware, size); + e820_table_firmware = n; } /* @@ -670,6 +688,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) e820__update_table(e820_table); memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); @@ -1064,8 +1083,9 @@ void __init e820__reserve_resources(void) res++; } - for (i = 0; i < e820_table_kexec->nr_entries; i++) { - struct e820_entry *entry = e820_table_kexec->entries + i; + /* Expose the bootloader-provided memory layout to the sysfs. */ + for (i = 0; i < e820_table_firmware->nr_entries; i++) { + struct e820_entry *entry = e820_table_firmware->entries + i; firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); } @@ -1178,6 +1198,7 @@ void __init e820__memory_setup(void) who = x86_init.resources.memory_setup(); memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); pr_info("e820: BIOS-provided physical RAM map:\n"); e820__print_table(who); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 3ba161a08d6e..e3e62c8a8e70 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -251,7 +251,7 @@ static int get_e820_md5(struct e820_table *table, void *buf) static void hibernation_e820_save(void *buf) { - get_e820_md5(e820_table_kexec, buf); + get_e820_md5(e820_table_firmware, buf); } static bool hibernation_e820_mismatch(void *buf) @@ -264,7 +264,7 @@ static bool hibernation_e820_mismatch(void *buf) if (!memcmp(result, buf, MD5_DIGEST_SIZE)) return false; - ret = get_e820_md5(e820_table_kexec, result); + ret = get_e820_md5(e820_table_firmware, result); if (ret) return true;