2012-02-16 09:14:22 +08:00
|
|
|
/*
|
|
|
|
* Firmware Assisted dump header file.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
*
|
|
|
|
* Copyright 2011 IBM Corporation
|
|
|
|
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __PPC64_FA_DUMP_H__
|
|
|
|
#define __PPC64_FA_DUMP_H__
|
|
|
|
|
|
|
|
#ifdef CONFIG_FA_DUMP
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The RMA region will be saved for later dumping when kernel crashes.
|
|
|
|
* RMA is Real Mode Area, the first block of logical memory address owned
|
|
|
|
* by logical partition, containing the storage that may be accessed with
|
|
|
|
* translate off.
|
|
|
|
*/
|
|
|
|
#define RMA_START 0x0
|
|
|
|
#define RMA_END (ppc64_rma_size)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* On some Power systems where RMO is 128MB, it still requires minimum of
|
|
|
|
* 256MB for kernel to boot successfully. When kdump infrastructure is
|
|
|
|
* configured to save vmcore over network, we run into OOM issue while
|
|
|
|
* loading modules related to network setup. Hence we need aditional 64M
|
|
|
|
* of memory to avoid OOM issue.
|
|
|
|
*/
|
|
|
|
#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
|
|
|
|
+ (0x1UL << 26))
|
|
|
|
|
powerpc/fadump: Set an upper limit for boot memory size
By default, 5% of system RAM is reserved for preserving boot memory.
Alternatively, a user can specify the amount of memory to reserve.
See Documentation/powerpc/firmware-assisted-dump.txt for details. In
addition to the memory reserved for preserving boot memory, some more
memory is reserved, to save HPTE region, CPU state data and ELF core
headers.
Memory Reservation during first kernel looks like below:
Low memory Top of memory
0 boot memory size |
| | |<--Reserved dump area -->|
V V | Permanent Reservation V
+-----------+----------/ /----------+---+----+-----------+----+
| | |CPU|HPTE| DUMP |ELF |
+-----------+----------/ /----------+---+----+-----------+----+
| ^
| |
\ /
-------------------------------------------
Boot memory content gets transferred to
reserved area by firmware at the time of
crash
This implicitly means that the sum of the sizes of boot memory, CPU
state data, HPTE region, DUMP preserving area and ELF core headers
can't be greater than the total memory size. But currently, a user is
allowed to specify any value as boot memory size. So, the above rule
is violated when a boot memory size around 50% of the total available
memory is specified. As the kernel is not handling this currently, it
may lead to undefined behavior. Fix it by setting an upper limit for
boot memory size to 25% of the total available memory. Also, instead
of using memblock_end_of_DRAM(), which doesn't take the holes, if any,
in the memory layout into account, use memblock_phys_mem_size() to
calculate the percentage of total available memory.
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-06-02 15:30:27 +08:00
|
|
|
/* The upper limit percentage for user specified boot memory size (25%) */
|
|
|
|
#define MAX_BOOT_MEM_RATIO 4
|
|
|
|
|
2012-02-16 09:14:37 +08:00
|
|
|
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
|
|
|
|
|
2012-02-16 09:14:22 +08:00
|
|
|
/* Firmware provided dump sections */
|
|
|
|
#define FADUMP_CPU_STATE_DATA 0x0001
|
|
|
|
#define FADUMP_HPTE_REGION 0x0002
|
|
|
|
#define FADUMP_REAL_MODE_REGION 0x0011
|
|
|
|
|
2012-02-20 10:15:03 +08:00
|
|
|
/* Dump request flag */
|
|
|
|
#define FADUMP_REQUEST_FLAG 0x00000001
|
|
|
|
|
|
|
|
/* FAD commands */
|
|
|
|
#define FADUMP_REGISTER 1
|
|
|
|
#define FADUMP_UNREGISTER 2
|
|
|
|
#define FADUMP_INVALIDATE 3
|
|
|
|
|
2012-02-16 09:14:37 +08:00
|
|
|
/* Dump status flag */
|
|
|
|
#define FADUMP_ERROR_FLAG 0x2000
|
|
|
|
|
2012-02-16 09:14:45 +08:00
|
|
|
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
|
|
|
|
|
|
|
|
#define CPU_UNKNOWN (~((u32)0))
|
|
|
|
|
|
|
|
/* Utility macros */
|
2014-10-01 15:02:30 +08:00
|
|
|
#define SKIP_TO_NEXT_CPU(reg_entry) \
|
|
|
|
({ \
|
|
|
|
while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
|
|
|
|
reg_entry++; \
|
|
|
|
reg_entry++; \
|
2012-02-16 09:14:45 +08:00
|
|
|
})
|
|
|
|
|
2017-05-09 06:56:24 +08:00
|
|
|
extern int crashing_cpu;
|
|
|
|
|
2012-02-20 10:15:03 +08:00
|
|
|
/* Kernel Dump section info */
|
|
|
|
struct fadump_section {
|
2014-10-01 15:02:30 +08:00
|
|
|
__be32 request_flag;
|
|
|
|
__be16 source_data_type;
|
|
|
|
__be16 error_flags;
|
|
|
|
__be64 source_address;
|
|
|
|
__be64 source_len;
|
|
|
|
__be64 bytes_dumped;
|
|
|
|
__be64 destination_address;
|
2012-02-20 10:15:03 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ibm,configure-kernel-dump header. */
|
|
|
|
struct fadump_section_header {
|
2014-10-01 15:02:30 +08:00
|
|
|
__be32 dump_format_version;
|
|
|
|
__be16 dump_num_sections;
|
|
|
|
__be16 dump_status_flag;
|
|
|
|
__be32 offset_first_dump_section;
|
2012-02-20 10:15:03 +08:00
|
|
|
|
|
|
|
/* Fields for disk dump option. */
|
2014-10-01 15:02:30 +08:00
|
|
|
__be32 dd_block_size;
|
|
|
|
__be64 dd_block_offset;
|
|
|
|
__be64 dd_num_blocks;
|
|
|
|
__be32 dd_offset_disk_path;
|
2012-02-20 10:15:03 +08:00
|
|
|
|
|
|
|
/* Maximum time allowed to prevent an automatic dump-reboot. */
|
2014-10-01 15:02:30 +08:00
|
|
|
__be32 max_time_auto;
|
2012-02-20 10:15:03 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Firmware Assisted dump memory structure. This structure is required for
|
|
|
|
* registering future kernel dump with power firmware through rtas call.
|
|
|
|
*
|
|
|
|
* No disk dump option. Hence disk dump path string section is not included.
|
|
|
|
*/
|
|
|
|
struct fadump_mem_struct {
|
|
|
|
struct fadump_section_header header;
|
|
|
|
|
|
|
|
/* Kernel dump sections */
|
|
|
|
struct fadump_section cpu_state_data;
|
|
|
|
struct fadump_section hpte_region;
|
|
|
|
struct fadump_section rmr_region;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Firmware-assisted dump configuration details. */
|
2012-02-16 09:14:22 +08:00
|
|
|
struct fw_dump {
|
|
|
|
unsigned long cpu_state_data_size;
|
|
|
|
unsigned long hpte_region_size;
|
|
|
|
unsigned long boot_memory_size;
|
|
|
|
unsigned long reserve_dump_area_start;
|
|
|
|
unsigned long reserve_dump_area_size;
|
|
|
|
/* cmd line option during boot */
|
|
|
|
unsigned long reserve_bootvar;
|
|
|
|
|
2012-02-16 09:14:37 +08:00
|
|
|
unsigned long fadumphdr_addr;
|
2012-02-16 09:14:45 +08:00
|
|
|
unsigned long cpu_notes_buf;
|
|
|
|
unsigned long cpu_notes_buf_size;
|
|
|
|
|
2012-02-16 09:14:22 +08:00
|
|
|
int ibm_configure_kernel_dump;
|
|
|
|
|
|
|
|
unsigned long fadump_enabled:1;
|
|
|
|
unsigned long fadump_supported:1;
|
|
|
|
unsigned long dump_active:1;
|
2012-02-20 10:15:03 +08:00
|
|
|
unsigned long dump_registered:1;
|
2012-02-16 09:14:22 +08:00
|
|
|
};
|
|
|
|
|
2012-02-16 09:14:37 +08:00
|
|
|
/*
|
|
|
|
* Copy the ascii values for first 8 characters from a string into u64
|
|
|
|
* variable at their respective indexes.
|
|
|
|
* e.g.
|
|
|
|
* The string "FADMPINF" will be converted into 0x4641444d50494e46
|
|
|
|
*/
|
|
|
|
static inline u64 str_to_u64(const char *str)
|
|
|
|
{
|
|
|
|
u64 val = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < sizeof(val); i++)
|
|
|
|
val = (*str) ? (val << 8) | *str++ : val << 8;
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
#define STR_TO_HEX(x) str_to_u64(x)
|
2012-02-16 09:14:45 +08:00
|
|
|
#define REG_ID(x) str_to_u64(x)
|
2012-02-16 09:14:37 +08:00
|
|
|
|
|
|
|
#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
|
2012-02-16 09:14:45 +08:00
|
|
|
#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
|
|
|
|
|
|
|
|
/* The firmware-assisted dump format.
|
|
|
|
*
|
|
|
|
* The register save area is an area in the partition's memory used to preserve
|
|
|
|
* the register contents (CPU state data) for the active CPUs during a firmware
|
|
|
|
* assisted dump. The dump format contains register save area header followed
|
|
|
|
* by register entries. Each list of registers for a CPU starts with
|
|
|
|
* "CPUSTRT" and ends with "CPUEND".
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Register save area header. */
|
|
|
|
struct fadump_reg_save_area_header {
|
2014-10-01 15:02:30 +08:00
|
|
|
__be64 magic_number;
|
|
|
|
__be32 version;
|
|
|
|
__be32 num_cpu_offset;
|
2012-02-16 09:14:45 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Register entry. */
|
|
|
|
struct fadump_reg_entry {
|
2014-10-01 15:02:30 +08:00
|
|
|
__be64 reg_id;
|
|
|
|
__be64 reg_value;
|
2012-02-16 09:14:45 +08:00
|
|
|
};
|
2012-02-16 09:14:37 +08:00
|
|
|
|
|
|
|
/* fadump crash info structure */
|
|
|
|
struct fadump_crash_info_header {
|
|
|
|
u64 magic_number;
|
|
|
|
u64 elfcorehdr_addr;
|
2012-02-16 09:14:45 +08:00
|
|
|
u32 crashing_cpu;
|
|
|
|
struct pt_regs regs;
|
powerpc/fadump: rename cpu_online_mask member of struct fadump_crash_info_header
The four cpumasks cpu_{possible,online,present,active}_bits are exposed
readonly via the corresponding const variables cpu_xyz_mask. But they are
also accessible for arbitrary writing via the exposed functions
set_cpu_xyz. There's quite a bit of code throughout the kernel which
iterates over or otherwise accesses these bitmaps, and having the access
go via the cpu_xyz_mask variables is nowadays [1] simply a useless
indirection.
It may be that any problem in CS can be solved by an extra level of
indirection, but that doesn't mean every extra indirection solves a
problem. In this case, it even necessitates some minor ugliness (see
4/6).
Patch 1/6 is new in v2, and fixes a build failure on ppc by renaming a
struct member, to avoid problems when the identifier cpu_online_mask
becomes a macro later in the series. The next four patches eliminate the
cpu_xyz_mask variables by simply exposing the actual bitmaps, after
renaming them to discourage direct access - that still happens through
cpu_xyz_mask, which are now simply macros with the same type and value as
they used to have.
After that, there's no longer any reason to have the setter functions be
out-of-line: The boolean parameter is almost always a literal true or
false, so by making them static inlines they will usually compile to one
or two instructions.
For a defconfig build on x86_64, bloat-o-meter says we save ~3000 bytes.
We also save a little stack (stackdelta says 127 functions have a 16 byte
smaller stack frame, while two grow by that amount). Mostly because, when
iterating over the mask, gcc typically loads the value of cpu_xyz_mask
into a callee-saved register and from there into %rdi before each
find_next_bit call - now it can just load the appropriate immediate
address into %rdi before each call.
[1] See Rusty's kind explanation
http://thread.gmane.org/gmane.linux.kernel/2047078/focus=2047722 for
some historic context.
This patch (of 6):
As preparation for eliminating the indirect access to the various global
cpu_*_bits bitmaps via the pointer variables cpu_*_mask, rename the
cpu_online_mask member of struct fadump_crash_info_header to simply
online_mask, thus allowing cpu_online_mask to become a macro.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 07:00:13 +08:00
|
|
|
struct cpumask online_mask;
|
2012-02-16 09:14:37 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Crash memory ranges */
|
|
|
|
#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
|
|
|
|
|
|
|
|
struct fad_crash_memory_ranges {
|
|
|
|
unsigned long long base;
|
|
|
|
unsigned long long size;
|
|
|
|
};
|
|
|
|
|
2017-06-02 01:21:26 +08:00
|
|
|
extern int is_fadump_boot_memory_area(u64 addr, ulong size);
|
2012-02-16 09:14:22 +08:00
|
|
|
extern int early_init_dt_scan_fw_dump(unsigned long node,
|
|
|
|
const char *uname, int depth, void *data);
|
|
|
|
extern int fadump_reserve_mem(void);
|
2012-02-20 10:15:03 +08:00
|
|
|
extern int setup_fadump(void);
|
|
|
|
extern int is_fadump_active(void);
|
2012-02-16 09:14:45 +08:00
|
|
|
extern void crash_fadump(struct pt_regs *, const char *);
|
2012-02-16 09:15:08 +08:00
|
|
|
extern void fadump_cleanup(void);
|
|
|
|
|
2012-02-20 10:15:03 +08:00
|
|
|
#else /* CONFIG_FA_DUMP */
|
|
|
|
static inline int is_fadump_active(void) { return 0; }
|
2012-02-16 09:14:45 +08:00
|
|
|
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
|
2012-02-16 09:14:22 +08:00
|
|
|
#endif
|
|
|
|
#endif
|