x86: kdump: move reserve_crashkernel[_low]() into crash_core.c

From: Chen Zhou <chenzhou10@huawei.com>
Link: https://lkml.org/lkml/2021/1/30/53
Link: 8cb8686864

Make the functions reserve_crashkernel[_low]() as generic.
Arm64 will use these to reimplement crashkernel=X.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Tested-by: John Donnelly <John.p.donnelly@oracle.com>
Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Acked-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Bin Lai <robinlai@tencent.com>
This commit is contained in:
Kairui Song 2022-06-06 13:50:12 +08:00 committed by Jianping Liu
parent e5eac006f6
commit 3177fa46ec
6 changed files with 189 additions and 171 deletions

View File

@ -21,6 +21,27 @@
/* 16M alignment for crash kernel regions */
#define CRASH_ALIGN SZ_16M
/*
* Keep the crash kernel below this limit.
*
* Earlier 32-bits kernels would limit the kernel to the low 512 MB range
* due to mapping restrictions.
*
* 64-bit kdump kernels need to be restricted to be under 64 TB, which is
* the upper limit of system RAM in 4-level paging mode. Since the kdump
* jump could be from 5-level paging to 4-level paging, the jump will fail if
* the kernel is put above 64 TB, and during the 1st kernel bootup there's
* no good way to detect the paging mode of the target kernel which will be
* loaded for dumping.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
# define CRASH_ADDR_LOW_MAX SZ_4G
# define CRASH_ADDR_HIGH_MAX SZ_64T
#endif
#ifndef __ASSEMBLY__
#include <linux/string.h>
@ -210,6 +231,10 @@ typedef void crash_vmclear_fn(void);
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_KEXEC_CORE
extern void __init reserve_crashkernel(void);
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */

View File

@ -107,6 +107,7 @@
#include <asm/paravirt.h>
#include <asm/hypervisor.h>
#include <asm/kexec.h>
#include <asm/olpc_ofw.h>
#include <asm/percpu.h>
@ -443,158 +444,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
}
}
/*
* --------- Crashkernel reservation ------------------------------
*/
#ifdef CONFIG_KEXEC_CORE
/*
* Keep the crash kernel below this limit.
*
* On 32 bits earlier kernels would limit the kernel to the low 512 MiB
* due to mapping restrictions.
*
* On 64bit, kdump kernel need be restricted to be under 64TB, which is
* the upper limit of system RAM in 4-level paing mode. Since the kdump
* jumping could be from 5-level to 4-level, the jumping will fail if
* kernel is put above 64TB, and there's no way to detect the paging mode
* of the kernel which will be loaded for dumping during the 1st kernel
* bootup.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
# define CRASH_ADDR_LOW_MAX SZ_4G
# define CRASH_ADDR_HIGH_MAX SZ_64T
#endif
static int __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0;
unsigned long total_low_mem;
int ret;
total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
/* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
if (ret) {
/*
* two parts from kernel/dma/swiotlb.c:
* -swiotlb size: user-specified with swiotlb= or default.
*
* -swiotlb overflow buffer: now hardcoded to 32k. We round it
* to 8M for other buffers that may need to stay low too. Also
* make sure we allocate enough extra low memory so that we
* don't run out of DMA buffers for 32-bit devices.
*/
low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
} else {
/* passed with crashkernel=0,low ? */
if (!low_size)
return 0;
}
low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, low_size, CRASH_ALIGN);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20));
return -ENOMEM;
}
ret = memblock_reserve(low_base, low_size);
if (ret) {
pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
return ret;
}
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
(unsigned long)(total_low_mem >> 20));
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
#endif
return 0;
}
static void __init reserve_crashkernel(void)
{
unsigned long long crash_size, crash_base, total_mem;
bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
/* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0) {
/* crashkernel=X,high */
ret = parse_crashkernel_high(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret != 0 || crash_size <= 0)
return;
high = true;
}
/* 0 means: find the address automatically */
if (!crash_base) {
/*
* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX,
* also allocates 256M extra low memory for DMA buffers
* and swiotlb.
* But the extra memory is not required for all machines.
* So try low memory first and fall back to high memory
* unless "crashkernel=size[KMG],high" is specified.
*/
if (!high)
crash_base = memblock_find_in_range(CRASH_ALIGN,
CRASH_ADDR_LOW_MAX,
crash_size, CRASH_ALIGN);
if (!crash_base)
crash_base = memblock_find_in_range(CRASH_ALIGN,
CRASH_ADDR_HIGH_MAX,
crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
unsigned long long start;
start = memblock_find_in_range(crash_base,
crash_base + crash_size,
crash_size, CRASH_ALIGN);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size);
return;
}
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
}
#else
#ifndef CONFIG_KEXEC_CORE
static void __init reserve_crashkernel(void)
{
}

View File

@ -64,6 +64,9 @@ extern unsigned char *vmcoreinfo_data;
extern size_t vmcoreinfo_size;
extern u32 *vmcoreinfo_note;
extern struct resource crashk_res;
extern struct resource crashk_low_res;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
void final_note(Elf_Word *buf);

View File

@ -335,8 +335,6 @@ extern int kexec_load_disabled;
/* Location of a reserved region to hold the crash kernel.
*/
extern struct resource crashk_res;
extern struct resource crashk_low_res;
extern note_buf_t __percpu *crash_notes;
/* flag to track if kexec reboot is in progress */

View File

@ -7,6 +7,12 @@
#include <linux/crash_core.h>
#include <linux/utsname.h>
#include <linux/vmalloc.h>
#include <linux/memblock.h>
#include <linux/swiotlb.h>
#ifdef CONFIG_KEXEC_CORE
#include <asm/kexec.h>
#endif
#include <asm/page.h>
#include <asm/sections.h>
@ -19,6 +25,22 @@ u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
static unsigned char *vmcoreinfo_data_safecopy;
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
.desc = IORES_DESC_CRASH_KERNEL
};
struct resource crashk_low_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
.desc = IORES_DESC_CRASH_KERNEL
};
/*
* parsing the "crashkernel" commandline
*
@ -292,6 +314,143 @@ int __init parse_crashkernel_low(char *cmdline,
"crashkernel=", suffix_tbl[SUFFIX_LOW]);
}
/*
* --------- Crashkernel reservation ------------------------------
*/
#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_X86
static int __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0;
unsigned long low_mem_limit;
int ret;
low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
/* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
if (ret) {
/*
* two parts from kernel/dma/swiotlb.c:
* -swiotlb size: user-specified with swiotlb= or default.
*
* -swiotlb overflow buffer: now hardcoded to 32k. We round it
* to 8M for other buffers that may need to stay low too. Also
* make sure we allocate enough extra low memory so that we
* don't run out of DMA buffers for 32-bit devices.
*/
low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
} else {
/* passed with crashkernel=0,low ? */
if (!low_size)
return 0;
}
low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_LOW_MAX);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20));
return -ENOMEM;
}
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
(unsigned long)(low_mem_limit >> 20));
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
#endif
return 0;
}
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
* This function reserves memory area given in "crashkernel=" kernel command
* line parameter. The memory reserved is used by dump capture kernel when
* primary kernel is crashing.
*/
void __init reserve_crashkernel(void)
{
unsigned long long crash_size, crash_base, total_mem;
bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
/* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0) {
/* crashkernel=X,high */
ret = parse_crashkernel_high(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret != 0 || crash_size <= 0)
return;
high = true;
}
/* 0 means: find the address automatically */
if (!crash_base) {
/*
* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX,
* also allocates 256M extra low memory for DMA buffers
* and swiotlb.
* But the extra memory is not required for all machines.
* So try low memory first and fall back to high memory
* unless "crashkernel=size[KMG],high" is specified.
*/
if (!high)
crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_LOW_MAX);
if (!crash_base)
crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_HIGH_MAX);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
/* User specifies base address explicitly. */
unsigned long long start;
if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) {
pr_warn("cannot reserve crashkernel: base address is not %ldMB aligned\n",
(unsigned long)CRASH_ALIGN >> 20);
return;
}
start = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base,
crash_base + crash_size);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size);
return;
}
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
}
#endif /* CONFIG_X86 */
#endif /* CONFIG_KEXEC_CORE */
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len)
{

View File

@ -53,23 +53,6 @@ note_buf_t __percpu *crash_notes;
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
.desc = IORES_DESC_CRASH_KERNEL
};
struct resource crashk_low_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
.desc = IORES_DESC_CRASH_KERNEL
};
int kexec_should_crash(struct task_struct *p)
{
/*