2019-05-27 14:55:01 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2010-07-12 12:36:09 +08:00
|
|
|
#ifndef _LINUX_MEMBLOCK_H
|
|
|
|
#define _LINUX_MEMBLOCK_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Logical memory blocks.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2001 Peter Bergner, IBM Corp.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/mm.h>
|
2018-10-31 06:09:49 +08:00
|
|
|
#include <asm/dma.h>
|
|
|
|
|
|
|
|
extern unsigned long max_low_pfn;
|
|
|
|
extern unsigned long min_low_pfn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* highest page
|
|
|
|
*/
|
|
|
|
extern unsigned long max_pfn;
|
|
|
|
/*
|
|
|
|
* highest possible page
|
|
|
|
*/
|
|
|
|
extern unsigned long long max_possible_pfn;
|
2010-07-12 12:36:09 +08:00
|
|
|
|
2018-06-30 22:55:04 +08:00
|
|
|
/**
|
|
|
|
* enum memblock_flags - definition of memory region attributes
|
|
|
|
* @MEMBLOCK_NONE: no special request
|
memblock: improve MEMBLOCK_HOTPLUG documentation
The description of MEMBLOCK_HOTPLUG is currently short and consequently
misleading: we're actually dealing with a memory region that might get
hotunplugged later (i.e., the platform+firmware supports it), yet it is
indicated in the firmware-provided memory map as system ram that will
just get used by the system for any purpose when not taking special
care. The firmware marked this memory region as a hot(un)plugged (e.g.,
hotplugged before reboot), implying that it might get hotunplugged again
later.
Whether we consider this information depends on the "movable_node"
kernel commandline parameter: only with "movable_node" set, we'll try
keeping this memory hotunpluggable, for example, by not serving early
allocations from this memory region and by letting the buddy manage it
using the ZONE_MOVABLE.
Let's make this clearer by extending the documentation.
Note: kexec *has to* indicate this memory to the second kernel. With
"movable_node" set, we don't want to place kexec-images on this memory.
Without "movable_node" set, we don't care and can place kexec-images on
this memory. In both cases, after successful memory hotunplug, kexec
has to be re-armed to update the memory map for the second kernel and to
place the kexec-images somewhere else.
Link: https://lkml.kernel.org/r/20211004093605.5830-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:46 +08:00
|
|
|
* @MEMBLOCK_HOTPLUG: memory region indicated in the firmware-provided memory
|
|
|
|
* map during early boot as hot(un)pluggable system RAM (e.g., memory range
|
|
|
|
* that might get hotunplugged later). With "movable_node" set on the kernel
|
|
|
|
* commandline, try keeping this memory region hotunpluggable. Does not apply
|
|
|
|
* to memblocks added ("hotplugged") after early boot.
|
2018-06-30 22:55:04 +08:00
|
|
|
* @MEMBLOCK_MIRROR: mirrored region
|
2021-07-01 09:51:16 +08:00
|
|
|
* @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as
|
|
|
|
* reserved in the memory map; refer to memblock_mark_nomap() description
|
|
|
|
* for further details
|
memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED
Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.
Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().
However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.
We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.
Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory. Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().
Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
firmware-provided memory map and added to the system early during
boot; kexec *has to* indicate this memory to the second kernel and
can place kexec-images on this memory. After memory hotunplug,
kexec has to be re-armed. We mostly ignore this flag when
"movable_node" is not set on the kernel command line, because
then we're told to not care about hotunpluggability of such
memory regions.
MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
the firmware-provided memory map; this memory is always detected
and added to the system by a driver; memory might not actually be
physically hotunpluggable. kexec *must not* indicate this memory to
the second kernel and *must not* place kexec-images on this memory.
Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:53 +08:00
|
|
|
* @MEMBLOCK_DRIVER_MANAGED: memory region that is always detected and added
|
|
|
|
* via a driver, and never indicated in the firmware-provided memory map as
|
|
|
|
* system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
|
|
|
|
* kernel resource tree.
|
2018-06-30 22:55:04 +08:00
|
|
|
*/
|
2018-06-30 22:55:01 +08:00
|
|
|
enum memblock_flags {
|
2015-06-25 07:58:09 +08:00
|
|
|
MEMBLOCK_NONE = 0x0, /* No special request */
|
|
|
|
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
|
2015-06-25 07:58:12 +08:00
|
|
|
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
|
2015-11-30 20:28:15 +08:00
|
|
|
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
|
memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED
Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.
Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().
However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.
We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.
Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory. Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().
Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
firmware-provided memory map and added to the system early during
boot; kexec *has to* indicate this memory to the second kernel and
can place kexec-images on this memory. After memory hotunplug,
kexec has to be re-armed. We mostly ignore this flag when
"movable_node" is not set on the kernel command line, because
then we're told to not care about hotunpluggability of such
memory regions.
MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
the firmware-provided memory map; this memory is always detected
and added to the system by a driver; memory might not actually be
physically hotunpluggable. kexec *must not* indicate this memory to
the second kernel and *must not* place kexec-images on this memory.
Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:53 +08:00
|
|
|
MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
|
2015-06-25 07:58:09 +08:00
|
|
|
};
|
2014-01-22 07:49:23 +08:00
|
|
|
|
2018-06-30 22:55:04 +08:00
|
|
|
/**
|
|
|
|
* struct memblock_region - represents a memory region
|
2020-06-04 07:03:28 +08:00
|
|
|
* @base: base address of the region
|
2018-06-30 22:55:04 +08:00
|
|
|
* @size: size of the region
|
|
|
|
* @flags: memory region attributes
|
|
|
|
* @nid: NUMA node id
|
|
|
|
*/
|
2010-08-04 12:06:41 +08:00
|
|
|
struct memblock_region {
|
2010-08-04 11:34:42 +08:00
|
|
|
phys_addr_t base;
|
|
|
|
phys_addr_t size;
|
2018-06-30 22:55:01 +08:00
|
|
|
enum memblock_flags flags;
|
2021-06-29 10:43:01 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
2011-07-14 17:43:42 +08:00
|
|
|
int nid;
|
|
|
|
#endif
|
2010-07-12 12:36:09 +08:00
|
|
|
};
|
|
|
|
|
2018-06-30 22:55:04 +08:00
|
|
|
/**
|
|
|
|
* struct memblock_type - collection of memory regions of certain type
|
|
|
|
* @cnt: number of regions
|
|
|
|
* @max: size of the allocated array
|
|
|
|
* @total_size: size of all regions
|
|
|
|
* @regions: array of regions
|
|
|
|
* @name: the memory type symbolic name
|
|
|
|
*/
|
2010-08-04 12:06:41 +08:00
|
|
|
struct memblock_type {
|
2018-06-30 22:55:04 +08:00
|
|
|
unsigned long cnt;
|
|
|
|
unsigned long max;
|
|
|
|
phys_addr_t total_size;
|
2010-07-07 06:39:06 +08:00
|
|
|
struct memblock_region *regions;
|
2017-02-25 06:55:59 +08:00
|
|
|
char *name;
|
2010-07-12 12:36:09 +08:00
|
|
|
};
|
|
|
|
|
2018-06-30 22:55:04 +08:00
|
|
|
/**
|
|
|
|
* struct memblock - memblock allocator metadata
|
|
|
|
* @bottom_up: is bottom up direction?
|
|
|
|
* @current_limit: physical address of the current allocation limit
|
2020-06-04 07:03:28 +08:00
|
|
|
* @memory: usable memory regions
|
2018-06-30 22:55:04 +08:00
|
|
|
* @reserved: reserved memory regions
|
|
|
|
*/
|
2010-07-12 12:36:09 +08:00
|
|
|
struct memblock {
|
2013-11-13 07:07:59 +08:00
|
|
|
bool bottom_up; /* is bottom up direction? */
|
2010-08-04 11:34:42 +08:00
|
|
|
phys_addr_t current_limit;
|
2010-08-04 12:06:41 +08:00
|
|
|
struct memblock_type memory;
|
|
|
|
struct memblock_type reserved;
|
2010-07-12 12:36:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct memblock memblock;
|
2010-07-28 13:07:21 +08:00
|
|
|
|
2019-05-14 08:22:59 +08:00
|
|
|
#ifndef CONFIG_ARCH_KEEP_MEMBLOCK
|
2016-01-16 08:57:11 +08:00
|
|
|
#define __init_memblock __meminit
|
|
|
|
#define __initdata_memblock __meminitdata
|
2017-08-19 06:16:05 +08:00
|
|
|
void memblock_discard(void);
|
2016-01-16 08:57:11 +08:00
|
|
|
#else
|
|
|
|
#define __init_memblock
|
|
|
|
#define __initdata_memblock
|
2019-05-14 08:22:59 +08:00
|
|
|
static inline void memblock_discard(void) {}
|
2016-01-16 08:57:11 +08:00
|
|
|
#endif
|
|
|
|
|
2011-12-09 02:22:08 +08:00
|
|
|
void memblock_allow_resize(void);
|
2021-11-06 04:44:49 +08:00
|
|
|
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid,
|
|
|
|
enum memblock_flags flags);
|
2011-12-09 02:22:06 +08:00
|
|
|
int memblock_add(phys_addr_t base, phys_addr_t size);
|
|
|
|
int memblock_remove(phys_addr_t base, phys_addr_t size);
|
2021-11-06 04:43:19 +08:00
|
|
|
int memblock_phys_free(phys_addr_t base, phys_addr_t size);
|
2011-12-09 02:22:06 +08:00
|
|
|
int memblock_reserve(phys_addr_t base, phys_addr_t size);
|
2020-01-31 14:14:20 +08:00
|
|
|
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
|
|
|
|
int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
|
|
|
|
#endif
|
2012-10-23 07:35:18 +08:00
|
|
|
void memblock_trim_memory(phys_addr_t align);
|
mem-hotplug: handle node hole when initializing numa_meminfo.
When parsing SRAT, all memory ranges are added into numa_meminfo. In
numa_init(), before entering numa_cleanup_meminfo(), all possible memory
ranges are in numa_meminfo. And numa_cleanup_meminfo() removes all
ranges over max_pfn or empty.
But, this only works if the nodes are continuous. Let's have a look at
the following example:
We have an SRAT like this:
SRAT: Node 0 PXM 0 [mem 0x00000000-0x5fffffff]
SRAT: Node 0 PXM 0 [mem 0x100000000-0x1ffffffffff]
SRAT: Node 1 PXM 1 [mem 0x20000000000-0x3ffffffffff]
SRAT: Node 4 PXM 2 [mem 0x40000000000-0x5ffffffffff] hotplug
SRAT: Node 5 PXM 3 [mem 0x60000000000-0x7ffffffffff] hotplug
SRAT: Node 2 PXM 4 [mem 0x80000000000-0x9ffffffffff] hotplug
SRAT: Node 3 PXM 5 [mem 0xa0000000000-0xbffffffffff] hotplug
SRAT: Node 6 PXM 6 [mem 0xc0000000000-0xdffffffffff] hotplug
SRAT: Node 7 PXM 7 [mem 0xe0000000000-0xfffffffffff] hotplug
On boot, only node 0,1,2,3 exist.
And the numa_meminfo will look like this:
numa_meminfo.nr_blks = 9
1. on node 0: [0, 60000000]
2. on node 0: [100000000, 20000000000]
3. on node 1: [20000000000, 40000000000]
4. on node 4: [40000000000, 60000000000]
5. on node 5: [60000000000, 80000000000]
6. on node 2: [80000000000, a0000000000]
7. on node 3: [a0000000000, a0800000000]
8. on node 6: [c0000000000, a0800000000]
9. on node 7: [e0000000000, a0800000000]
And numa_cleanup_meminfo() will merge 1 and 2, and remove 8,9 because the
end address is over max_pfn, which is a0800000000. But 4 and 5 are not
removed because their end addresses are less then max_pfn. But in fact,
node 4 and 5 don't exist.
In a word, numa_cleanup_meminfo() is not able to handle holes between nodes.
Since memory ranges in node 4 and 5 are in numa_meminfo, in
numa_register_memblks(), node 4 and 5 will be mistakenly set to online.
If you run lscpu, it will show:
NUMA node0 CPU(s): 0-14,128-142
NUMA node1 CPU(s): 15-29,143-157
NUMA node2 CPU(s):
NUMA node3 CPU(s):
NUMA node4 CPU(s): 62-76,190-204
NUMA node5 CPU(s): 78-92,206-220
In this patch, we use memblock_overlaps_region() to check if ranges in
numa_meminfo overlap with ranges in memory_block. Since memory_block
contains all available memory at boot time, if they overlap, it means the
ranges exist. If not, then remove them from numa_meminfo.
After this patch, lscpu will show:
NUMA node0 CPU(s): 0-14,128-142
NUMA node1 CPU(s): 15-29,143-157
NUMA node4 CPU(s): 62-76,190-204
NUMA node5 CPU(s): 78-92,206-220
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-09 06:02:03 +08:00
|
|
|
bool memblock_overlaps_region(struct memblock_type *type,
|
|
|
|
phys_addr_t base, phys_addr_t size);
|
2014-01-22 07:49:23 +08:00
|
|
|
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
|
|
|
|
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
|
2015-06-25 07:58:12 +08:00
|
|
|
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
|
2015-11-30 20:28:15 +08:00
|
|
|
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
|
2017-04-03 10:23:54 +08:00
|
|
|
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
|
2014-01-30 01:16:01 +08:00
|
|
|
|
2021-01-14 15:08:17 +08:00
|
|
|
void memblock_free_all(void);
|
2021-11-06 04:43:22 +08:00
|
|
|
void memblock_free(void *ptr, size_t size);
|
2018-10-31 06:09:49 +08:00
|
|
|
void reset_node_managed_pages(pg_data_t *pgdat);
|
|
|
|
void reset_all_zones_managed_pages(void);
|
|
|
|
|
2014-01-30 01:16:01 +08:00
|
|
|
/* Low level functions */
|
2018-06-30 22:55:01 +08:00
|
|
|
void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
|
2015-06-25 07:58:09 +08:00
|
|
|
struct memblock_type *type_a,
|
2014-01-30 01:16:01 +08:00
|
|
|
struct memblock_type *type_b, phys_addr_t *out_start,
|
|
|
|
phys_addr_t *out_end, int *out_nid);
|
|
|
|
|
2018-06-30 22:55:01 +08:00
|
|
|
void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags,
|
2015-06-25 07:58:09 +08:00
|
|
|
struct memblock_type *type_a,
|
2014-01-30 01:16:01 +08:00
|
|
|
struct memblock_type *type_b, phys_addr_t *out_start,
|
|
|
|
phys_addr_t *out_end, int *out_nid);
|
|
|
|
|
2021-11-06 04:43:16 +08:00
|
|
|
void memblock_free_late(phys_addr_t base, phys_addr_t size);
|
2017-08-19 06:16:05 +08:00
|
|
|
|
2020-07-01 22:18:29 +08:00
|
|
|
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
|
|
|
|
static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
|
|
|
|
phys_addr_t *out_start,
|
|
|
|
phys_addr_t *out_end)
|
|
|
|
{
|
|
|
|
extern struct memblock_type physmem;
|
|
|
|
|
|
|
|
__next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type,
|
|
|
|
out_start, out_end, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_physmem_range - iterate through physmem areas not included in type.
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @type: ptr to memblock_type which excludes from the iteration, can be %NULL
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*/
|
|
|
|
#define for_each_physmem_range(i, type, p_start, p_end) \
|
|
|
|
for (i = 0, __next_physmem_range(&i, type, p_start, p_end); \
|
|
|
|
i != (u64)ULLONG_MAX; \
|
|
|
|
__next_physmem_range(&i, type, p_start, p_end))
|
|
|
|
#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */
|
|
|
|
|
2014-01-30 01:16:01 +08:00
|
|
|
/**
|
2020-10-14 07:57:59 +08:00
|
|
|
* __for_each_mem_range - iterate through memblock areas from type_a and not
|
2014-01-30 01:16:01 +08:00
|
|
|
* included in type_b. Or just type_a if type_b is NULL.
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @type_a: ptr to memblock_type to iterate
|
|
|
|
* @type_b: ptr to memblock_type which excludes from the iteration
|
|
|
|
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
2015-06-25 07:58:09 +08:00
|
|
|
* @flags: pick from blocks based on memory attributes
|
2014-01-30 01:16:01 +08:00
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
* @p_nid: ptr to int for nid of the range, can be %NULL
|
|
|
|
*/
|
2020-10-14 07:57:59 +08:00
|
|
|
#define __for_each_mem_range(i, type_a, type_b, nid, flags, \
|
2014-01-30 01:16:01 +08:00
|
|
|
p_start, p_end, p_nid) \
|
2015-06-25 07:58:09 +08:00
|
|
|
for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \
|
2014-01-30 01:16:01 +08:00
|
|
|
p_start, p_end, p_nid); \
|
|
|
|
i != (u64)ULLONG_MAX; \
|
2015-06-25 07:58:09 +08:00
|
|
|
__next_mem_range(&i, nid, flags, type_a, type_b, \
|
2014-01-30 01:16:01 +08:00
|
|
|
p_start, p_end, p_nid))
|
|
|
|
|
|
|
|
/**
|
2020-10-14 07:57:59 +08:00
|
|
|
* __for_each_mem_range_rev - reverse iterate through memblock areas from
|
2014-01-30 01:16:01 +08:00
|
|
|
* type_a and not included in type_b. Or just type_a if type_b is NULL.
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @type_a: ptr to memblock_type to iterate
|
|
|
|
* @type_b: ptr to memblock_type which excludes from the iteration
|
|
|
|
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
2015-06-25 07:58:09 +08:00
|
|
|
* @flags: pick from blocks based on memory attributes
|
2014-01-30 01:16:01 +08:00
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
* @p_nid: ptr to int for nid of the range, can be %NULL
|
|
|
|
*/
|
2020-10-14 07:57:59 +08:00
|
|
|
#define __for_each_mem_range_rev(i, type_a, type_b, nid, flags, \
|
|
|
|
p_start, p_end, p_nid) \
|
2014-01-30 01:16:01 +08:00
|
|
|
for (i = (u64)ULLONG_MAX, \
|
2020-10-14 07:57:59 +08:00
|
|
|
__next_mem_range_rev(&i, nid, flags, type_a, type_b, \
|
2016-07-27 06:24:47 +08:00
|
|
|
p_start, p_end, p_nid); \
|
2014-01-30 01:16:01 +08:00
|
|
|
i != (u64)ULLONG_MAX; \
|
2015-06-25 07:58:09 +08:00
|
|
|
__next_mem_range_rev(&i, nid, flags, type_a, type_b, \
|
2014-01-30 01:16:01 +08:00
|
|
|
p_start, p_end, p_nid))
|
|
|
|
|
2020-10-14 07:57:59 +08:00
|
|
|
/**
|
|
|
|
* for_each_mem_range - iterate through memory areas.
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*/
|
|
|
|
#define for_each_mem_range(i, p_start, p_end) \
|
|
|
|
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
|
memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED
Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.
Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().
However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.
We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.
Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory. Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().
Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
firmware-provided memory map and added to the system early during
boot; kexec *has to* indicate this memory to the second kernel and
can place kexec-images on this memory. After memory hotunplug,
kexec has to be re-armed. We mostly ignore this flag when
"movable_node" is not set on the kernel command line, because
then we're told to not care about hotunpluggability of such
memory regions.
MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
the firmware-provided memory map; this memory is always detected
and added to the system by a driver; memory might not actually be
physically hotunpluggable. kexec *must not* indicate this memory to
the second kernel and *must not* place kexec-images on this memory.
Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:53 +08:00
|
|
|
MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED, \
|
|
|
|
p_start, p_end, NULL)
|
2020-10-14 07:57:59 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_mem_range_rev - reverse iterate through memblock areas from
|
|
|
|
* type_a and not included in type_b. Or just type_a if type_b is NULL.
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*/
|
|
|
|
#define for_each_mem_range_rev(i, p_start, p_end) \
|
|
|
|
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
|
memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED
Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.
Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().
However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.
We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.
Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory. Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().
Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
firmware-provided memory map and added to the system early during
boot; kexec *has to* indicate this memory to the second kernel and
can place kexec-images on this memory. After memory hotunplug,
kexec has to be re-armed. We mostly ignore this flag when
"movable_node" is not set on the kernel command line, because
then we're told to not care about hotunpluggability of such
memory regions.
MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
the firmware-provided memory map; this memory is always detected
and added to the system by a driver; memory might not actually be
physically hotunpluggable. kexec *must not* indicate this memory to
the second kernel and *must not* place kexec-images on this memory.
Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:53 +08:00
|
|
|
MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED,\
|
|
|
|
p_start, p_end, NULL)
|
2020-10-14 07:57:59 +08:00
|
|
|
|
memblock: introduce a for_each_reserved_mem_region iterator
Struct page initialisation had been identified as one of the reasons why
large machines take a long time to boot. Patches were posted a long time ago
to defer initialisation until they were first used. This was rejected on
the grounds it should not be necessary to hurt the fast paths. This series
reuses much of the work from that time but defers the initialisation of
memory to kswapd so that one thread per node initialises memory local to
that node.
After applying the series and setting the appropriate Kconfig variable I
see this in the boot log on a 64G machine
[ 7.383764] kswapd 0 initialised deferred memory in 188ms
[ 7.404253] kswapd 1 initialised deferred memory in 208ms
[ 7.411044] kswapd 3 initialised deferred memory in 216ms
[ 7.411551] kswapd 2 initialised deferred memory in 216ms
On a 1TB machine, I see
[ 8.406511] kswapd 3 initialised deferred memory in 1116ms
[ 8.428518] kswapd 1 initialised deferred memory in 1140ms
[ 8.435977] kswapd 0 initialised deferred memory in 1148ms
[ 8.437416] kswapd 2 initialised deferred memory in 1148ms
Once booted the machine appears to work as normal. Boot times were measured
from the time shutdown was called until ssh was available again. In the
64G case, the boot time savings are negligible. On the 1TB machine, the
savings were 16 seconds.
Nate Zimmer said:
: On an older 8 TB box with lots and lots of cpus the boot time, as
: measure from grub to login prompt, the boot time improved from 1484
: seconds to exactly 1000 seconds.
Waiman Long said:
: I ran a bootup timing test on a 12-TB 16-socket IvyBridge-EX system. From
: grub menu to ssh login, the bootup time was 453s before the patch and 265s
: after the patch - a saving of 188s (42%).
Daniel Blueman said:
: On a 7TB, 1728-core NumaConnect system with 108 NUMA nodes, we're seeing
: stock 4.0 boot in 7136s. This drops to 2159s, or a 70% reduction with
: this patchset. Non-temporal PMD init (https://lkml.org/lkml/2015/4/23/350)
: drops this to 1045s.
This patch (of 13):
As part of initializing struct page's in 2MiB chunks, we noticed that at
the end of free_all_bootmem(), there was nothing which had forced the
reserved/allocated 4KiB pages to be initialized.
This helper function will be used for that expansion.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nate Zimmer <nzimmer@sgi.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Nate Zimmer <nzimmer@sgi.com>
Tested-by: Waiman Long <waiman.long@hp.com>
Tested-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-07-01 05:56:41 +08:00
|
|
|
/**
|
2020-10-14 07:58:25 +08:00
|
|
|
* for_each_reserved_mem_range - iterate over all reserved memblock areas
|
memblock: introduce a for_each_reserved_mem_region iterator
Struct page initialisation had been identified as one of the reasons why
large machines take a long time to boot. Patches were posted a long time ago
to defer initialisation until they were first used. This was rejected on
the grounds it should not be necessary to hurt the fast paths. This series
reuses much of the work from that time but defers the initialisation of
memory to kswapd so that one thread per node initialises memory local to
that node.
After applying the series and setting the appropriate Kconfig variable I
see this in the boot log on a 64G machine
[ 7.383764] kswapd 0 initialised deferred memory in 188ms
[ 7.404253] kswapd 1 initialised deferred memory in 208ms
[ 7.411044] kswapd 3 initialised deferred memory in 216ms
[ 7.411551] kswapd 2 initialised deferred memory in 216ms
On a 1TB machine, I see
[ 8.406511] kswapd 3 initialised deferred memory in 1116ms
[ 8.428518] kswapd 1 initialised deferred memory in 1140ms
[ 8.435977] kswapd 0 initialised deferred memory in 1148ms
[ 8.437416] kswapd 2 initialised deferred memory in 1148ms
Once booted the machine appears to work as normal. Boot times were measured
from the time shutdown was called until ssh was available again. In the
64G case, the boot time savings are negligible. On the 1TB machine, the
savings were 16 seconds.
Nate Zimmer said:
: On an older 8 TB box with lots and lots of cpus the boot time, as
: measure from grub to login prompt, the boot time improved from 1484
: seconds to exactly 1000 seconds.
Waiman Long said:
: I ran a bootup timing test on a 12-TB 16-socket IvyBridge-EX system. From
: grub menu to ssh login, the bootup time was 453s before the patch and 265s
: after the patch - a saving of 188s (42%).
Daniel Blueman said:
: On a 7TB, 1728-core NumaConnect system with 108 NUMA nodes, we're seeing
: stock 4.0 boot in 7136s. This drops to 2159s, or a 70% reduction with
: this patchset. Non-temporal PMD init (https://lkml.org/lkml/2015/4/23/350)
: drops this to 1045s.
This patch (of 13):
As part of initializing struct page's in 2MiB chunks, we noticed that at
the end of free_all_bootmem(), there was nothing which had forced the
reserved/allocated 4KiB pages to be initialized.
This helper function will be used for that expansion.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nate Zimmer <nzimmer@sgi.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Nate Zimmer <nzimmer@sgi.com>
Tested-by: Waiman Long <waiman.long@hp.com>
Tested-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-07-01 05:56:41 +08:00
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*
|
|
|
|
* Walks over reserved areas of memblock. Available as soon as memblock
|
|
|
|
* is initialized.
|
|
|
|
*/
|
2020-10-14 07:58:25 +08:00
|
|
|
#define for_each_reserved_mem_range(i, p_start, p_end) \
|
|
|
|
__for_each_mem_range(i, &memblock.reserved, NULL, NUMA_NO_NODE, \
|
|
|
|
MEMBLOCK_NONE, p_start, p_end, NULL)
|
memblock: introduce a for_each_reserved_mem_region iterator
Struct page initialisation had been identified as one of the reasons why
large machines take a long time to boot. Patches were posted a long time ago
to defer initialisation until they were first used. This was rejected on
the grounds it should not be necessary to hurt the fast paths. This series
reuses much of the work from that time but defers the initialisation of
memory to kswapd so that one thread per node initialises memory local to
that node.
After applying the series and setting the appropriate Kconfig variable I
see this in the boot log on a 64G machine
[ 7.383764] kswapd 0 initialised deferred memory in 188ms
[ 7.404253] kswapd 1 initialised deferred memory in 208ms
[ 7.411044] kswapd 3 initialised deferred memory in 216ms
[ 7.411551] kswapd 2 initialised deferred memory in 216ms
On a 1TB machine, I see
[ 8.406511] kswapd 3 initialised deferred memory in 1116ms
[ 8.428518] kswapd 1 initialised deferred memory in 1140ms
[ 8.435977] kswapd 0 initialised deferred memory in 1148ms
[ 8.437416] kswapd 2 initialised deferred memory in 1148ms
Once booted the machine appears to work as normal. Boot times were measured
from the time shutdown was called until ssh was available again. In the
64G case, the boot time savings are negligible. On the 1TB machine, the
savings were 16 seconds.
Nate Zimmer said:
: On an older 8 TB box with lots and lots of cpus the boot time, as
: measure from grub to login prompt, the boot time improved from 1484
: seconds to exactly 1000 seconds.
Waiman Long said:
: I ran a bootup timing test on a 12-TB 16-socket IvyBridge-EX system. From
: grub menu to ssh login, the bootup time was 453s before the patch and 265s
: after the patch - a saving of 188s (42%).
Daniel Blueman said:
: On a 7TB, 1728-core NumaConnect system with 108 NUMA nodes, we're seeing
: stock 4.0 boot in 7136s. This drops to 2159s, or a 70% reduction with
: this patchset. Non-temporal PMD init (https://lkml.org/lkml/2015/4/23/350)
: drops this to 1045s.
This patch (of 13):
As part of initializing struct page's in 2MiB chunks, we noticed that at
the end of free_all_bootmem(), there was nothing which had forced the
reserved/allocated 4KiB pages to be initialized.
This helper function will be used for that expansion.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nate Zimmer <nzimmer@sgi.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Nate Zimmer <nzimmer@sgi.com>
Tested-by: Waiman Long <waiman.long@hp.com>
Tested-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-07-01 05:56:41 +08:00
|
|
|
|
2014-01-22 07:49:35 +08:00
|
|
|
static inline bool memblock_is_hotpluggable(struct memblock_region *m)
|
|
|
|
{
|
|
|
|
return m->flags & MEMBLOCK_HOTPLUG;
|
|
|
|
}
|
|
|
|
|
2015-06-25 07:58:12 +08:00
|
|
|
static inline bool memblock_is_mirror(struct memblock_region *m)
|
|
|
|
{
|
|
|
|
return m->flags & MEMBLOCK_MIRROR;
|
|
|
|
}
|
|
|
|
|
2015-11-30 20:28:15 +08:00
|
|
|
static inline bool memblock_is_nomap(struct memblock_region *m)
|
|
|
|
{
|
|
|
|
return m->flags & MEMBLOCK_NOMAP;
|
|
|
|
}
|
|
|
|
|
memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED
Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.
Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().
However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.
We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.
Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory. Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().
Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
firmware-provided memory map and added to the system early during
boot; kexec *has to* indicate this memory to the second kernel and
can place kexec-images on this memory. After memory hotunplug,
kexec has to be re-armed. We mostly ignore this flag when
"movable_node" is not set on the kernel command line, because
then we're told to not care about hotunpluggability of such
memory regions.
MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
the firmware-provided memory map; this memory is always detected
and added to the system by a driver; memory might not actually be
physically hotunpluggable. kexec *must not* indicate this memory to
the second kernel and *must not* place kexec-images on this memory.
Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:53 +08:00
|
|
|
static inline bool memblock_is_driver_managed(struct memblock_region *m)
|
|
|
|
{
|
|
|
|
return m->flags & MEMBLOCK_DRIVER_MANAGED;
|
|
|
|
}
|
|
|
|
|
2013-09-12 05:22:17 +08:00
|
|
|
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
|
|
|
|
unsigned long *end_pfn);
|
2011-12-09 02:22:09 +08:00
|
|
|
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
|
|
|
|
unsigned long *out_end_pfn, int *out_nid);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_mem_pfn_range - early memory pfn range iterator
|
|
|
|
* @i: an integer used as loop variable
|
|
|
|
* @nid: node selector, %MAX_NUMNODES for all nodes
|
|
|
|
* @p_start: ptr to ulong for start pfn of the range, can be %NULL
|
|
|
|
* @p_end: ptr to ulong for end pfn of the range, can be %NULL
|
|
|
|
* @p_nid: ptr to int for nid of the range, can be %NULL
|
|
|
|
*
|
2012-10-09 07:32:24 +08:00
|
|
|
* Walks over configured memory ranges.
|
2011-12-09 02:22:09 +08:00
|
|
|
*/
|
|
|
|
#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
|
|
|
|
for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
|
|
|
|
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
|
|
|
|
|
2019-05-14 08:21:17 +08:00
|
|
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
|
|
|
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
|
|
|
|
unsigned long *out_spfn,
|
|
|
|
unsigned long *out_epfn);
|
|
|
|
/**
|
2021-01-14 16:04:44 +08:00
|
|
|
* for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
|
2019-05-14 08:21:17 +08:00
|
|
|
* memblock areas
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @zone: zone in which all of the memory blocks reside
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*
|
|
|
|
* Walks over free (memory && !reserved) areas of memblock in a specific
|
|
|
|
* zone. Available once memblock and an empty zone is initialized. The main
|
|
|
|
* assumption is that the zone start, end, and pgdat have been associated.
|
|
|
|
* This way we can use the zone to determine NUMA node, and if a given part
|
|
|
|
* of the memblock is valid for the zone.
|
|
|
|
*/
|
|
|
|
#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
|
|
|
|
for (i = 0, \
|
|
|
|
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
|
|
|
|
i != U64_MAX; \
|
|
|
|
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
|
2019-05-14 08:21:20 +08:00
|
|
|
|
|
|
|
/**
|
2021-01-14 16:04:44 +08:00
|
|
|
* for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
|
2019-05-14 08:21:20 +08:00
|
|
|
* free memblock areas from a given point
|
|
|
|
* @i: u64 used as loop variable
|
|
|
|
* @zone: zone in which all of the memory blocks reside
|
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
*
|
|
|
|
* Walks over free (memory && !reserved) areas of memblock in a specific
|
|
|
|
* zone, continuing from current position. Available as soon as memblock is
|
|
|
|
* initialized.
|
|
|
|
*/
|
|
|
|
#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \
|
|
|
|
for (; i != U64_MAX; \
|
|
|
|
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
|
2020-06-04 06:59:55 +08:00
|
|
|
|
|
|
|
int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask);
|
|
|
|
|
2019-05-14 08:21:17 +08:00
|
|
|
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
|
|
|
|
|
2011-07-12 17:15:59 +08:00
|
|
|
/**
|
|
|
|
* for_each_free_mem_range - iterate through free memblock areas
|
|
|
|
* @i: u64 used as loop variable
|
2014-01-22 07:50:16 +08:00
|
|
|
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
2016-01-15 07:22:04 +08:00
|
|
|
* @flags: pick from blocks based on memory attributes
|
2011-07-12 17:15:59 +08:00
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
* @p_nid: ptr to int for nid of the range, can be %NULL
|
|
|
|
*
|
|
|
|
* Walks over free (memory && !reserved) areas of memblock. Available as
|
|
|
|
* soon as memblock is initialized.
|
|
|
|
*/
|
2015-06-25 07:58:09 +08:00
|
|
|
#define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \
|
2020-10-14 07:57:59 +08:00
|
|
|
__for_each_mem_range(i, &memblock.memory, &memblock.reserved, \
|
|
|
|
nid, flags, p_start, p_end, p_nid)
|
2011-12-09 02:22:09 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_free_mem_range_reverse - rev-iterate through free memblock areas
|
|
|
|
* @i: u64 used as loop variable
|
2014-01-22 07:50:16 +08:00
|
|
|
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
2016-01-15 07:22:04 +08:00
|
|
|
* @flags: pick from blocks based on memory attributes
|
2011-12-09 02:22:09 +08:00
|
|
|
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
|
|
|
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
|
|
|
* @p_nid: ptr to int for nid of the range, can be %NULL
|
|
|
|
*
|
|
|
|
* Walks over free (memory && !reserved) areas of memblock in reverse
|
|
|
|
* order. Available as soon as memblock is initialized.
|
|
|
|
*/
|
2015-06-25 07:58:09 +08:00
|
|
|
#define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \
|
|
|
|
p_nid) \
|
2020-10-14 07:57:59 +08:00
|
|
|
__for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \
|
|
|
|
nid, flags, p_start, p_end, p_nid)
|
2011-12-09 02:22:09 +08:00
|
|
|
|
2014-01-22 07:49:26 +08:00
|
|
|
int memblock_set_node(phys_addr_t base, phys_addr_t size,
|
|
|
|
struct memblock_type *type, int nid);
|
2011-07-14 17:43:42 +08:00
|
|
|
|
2021-06-29 10:43:01 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
2011-07-14 17:43:42 +08:00
|
|
|
static inline void memblock_set_region_node(struct memblock_region *r, int nid)
|
|
|
|
{
|
|
|
|
r->nid = nid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int memblock_get_region_node(const struct memblock_region *r)
|
|
|
|
{
|
|
|
|
return r->nid;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void memblock_set_region_node(struct memblock_region *r, int nid)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int memblock_get_region_node(const struct memblock_region *r)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2021-06-29 10:43:01 +08:00
|
|
|
#endif /* CONFIG_NUMA */
|
2011-07-14 17:43:42 +08:00
|
|
|
|
2018-10-31 06:09:49 +08:00
|
|
|
/* Flags for memblock allocation APIs */
|
|
|
|
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
|
|
|
|
#define MEMBLOCK_ALLOC_ACCESSIBLE 0
|
2021-11-05 23:05:09 +08:00
|
|
|
#define MEMBLOCK_ALLOC_NOLEAKTRACE 1
|
2018-10-31 06:09:49 +08:00
|
|
|
|
|
|
|
/* We are using top down, so it is safe to use 0 here */
|
|
|
|
#define MEMBLOCK_LOW_LIMIT 0
|
|
|
|
|
|
|
|
#ifndef ARCH_LOW_ADDRESS_LIMIT
|
|
|
|
#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
|
|
|
|
#endif
|
|
|
|
|
2019-03-12 14:29:16 +08:00
|
|
|
phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
|
|
|
|
phys_addr_t start, phys_addr_t end);
|
2020-04-11 05:32:42 +08:00
|
|
|
phys_addr_t memblock_alloc_range_nid(phys_addr_t size,
|
|
|
|
phys_addr_t align, phys_addr_t start,
|
|
|
|
phys_addr_t end, int nid, bool exact_nid);
|
memblock: rename memblock_alloc{_nid,_try_nid} to memblock_phys_alloc*
Make it explicit that the caller gets a physical address rather than a
virtual one.
This will also allow using meblock_alloc prefix for memblock allocations
returning virtual address, which is done in the following patches.
The conversion is done using the following semantic patch:
@@
expression e1, e2, e3;
@@
(
- memblock_alloc(e1, e2)
+ memblock_phys_alloc(e1, e2)
|
- memblock_alloc_nid(e1, e2, e3)
+ memblock_phys_alloc_nid(e1, e2, e3)
|
- memblock_alloc_try_nid(e1, e2, e3)
+ memblock_phys_alloc_try_nid(e1, e2, e3)
)
Link: http://lkml.kernel.org/r/1536927045-23536-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:07:59 +08:00
|
|
|
phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
|
2010-07-07 06:39:17 +08:00
|
|
|
|
2021-12-17 10:07:54 +08:00
|
|
|
static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
|
|
|
|
phys_addr_t align)
|
2019-03-12 14:29:26 +08:00
|
|
|
{
|
|
|
|
return memblock_phys_alloc_range(size, align, 0,
|
|
|
|
MEMBLOCK_ALLOC_ACCESSIBLE);
|
|
|
|
}
|
2010-07-07 06:39:01 +08:00
|
|
|
|
2019-12-01 09:56:27 +08:00
|
|
|
void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
|
|
|
|
phys_addr_t min_addr, phys_addr_t max_addr,
|
|
|
|
int nid);
|
2018-10-31 06:09:49 +08:00
|
|
|
void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
|
|
|
|
phys_addr_t min_addr, phys_addr_t max_addr,
|
|
|
|
int nid);
|
|
|
|
void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
|
|
|
|
phys_addr_t min_addr, phys_addr_t max_addr,
|
|
|
|
int nid);
|
|
|
|
|
2020-11-16 19:35:37 +08:00
|
|
|
static __always_inline void *memblock_alloc(phys_addr_t size, phys_addr_t align)
|
2018-10-31 06:09:49 +08:00
|
|
|
{
|
|
|
|
return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
|
|
|
|
MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
|
|
|
|
}
|
|
|
|
|
2020-11-16 19:35:37 +08:00
|
|
|
static inline void *memblock_alloc_raw(phys_addr_t size,
|
2018-10-31 06:09:49 +08:00
|
|
|
phys_addr_t align)
|
|
|
|
{
|
|
|
|
return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT,
|
|
|
|
MEMBLOCK_ALLOC_ACCESSIBLE,
|
|
|
|
NUMA_NO_NODE);
|
|
|
|
}
|
|
|
|
|
2020-11-16 19:35:37 +08:00
|
|
|
static inline void *memblock_alloc_from(phys_addr_t size,
|
2018-10-31 06:09:49 +08:00
|
|
|
phys_addr_t align,
|
|
|
|
phys_addr_t min_addr)
|
|
|
|
{
|
|
|
|
return memblock_alloc_try_nid(size, align, min_addr,
|
|
|
|
MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
|
|
|
|
}
|
|
|
|
|
2020-11-16 19:35:37 +08:00
|
|
|
static inline void *memblock_alloc_low(phys_addr_t size,
|
2018-10-31 06:09:49 +08:00
|
|
|
phys_addr_t align)
|
|
|
|
{
|
|
|
|
return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
|
|
|
|
ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE);
|
|
|
|
}
|
|
|
|
|
2020-11-16 19:35:37 +08:00
|
|
|
static inline void *memblock_alloc_node(phys_addr_t size,
|
2018-10-31 06:09:49 +08:00
|
|
|
phys_addr_t align, int nid)
|
|
|
|
{
|
|
|
|
return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
|
|
|
|
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
|
|
|
|
}
|
|
|
|
|
2013-11-13 07:07:59 +08:00
|
|
|
/*
|
|
|
|
* Set the allocation direction to bottom-up or top-down.
|
|
|
|
*/
|
2021-03-25 12:37:50 +08:00
|
|
|
static inline __init_memblock void memblock_set_bottom_up(bool enable)
|
2013-11-13 07:07:59 +08:00
|
|
|
{
|
|
|
|
memblock.bottom_up = enable;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if the allocation direction is bottom-up or not.
|
|
|
|
* if this is true, that said, memblock will allocate memory
|
|
|
|
* in bottom-up direction.
|
|
|
|
*/
|
2021-03-25 12:37:50 +08:00
|
|
|
static inline __init_memblock bool memblock_bottom_up(void)
|
2013-11-13 07:07:59 +08:00
|
|
|
{
|
|
|
|
return memblock.bottom_up;
|
|
|
|
}
|
|
|
|
|
2011-12-09 02:22:06 +08:00
|
|
|
phys_addr_t memblock_phys_mem_size(void);
|
2016-10-08 07:59:18 +08:00
|
|
|
phys_addr_t memblock_reserved_size(void);
|
2011-12-09 02:22:06 +08:00
|
|
|
phys_addr_t memblock_start_of_DRAM(void);
|
|
|
|
phys_addr_t memblock_end_of_DRAM(void);
|
|
|
|
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
|
2017-04-03 10:23:55 +08:00
|
|
|
void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
|
2016-07-29 06:48:26 +08:00
|
|
|
void memblock_mem_limit_remove_map(phys_addr_t limit);
|
2016-01-15 07:18:54 +08:00
|
|
|
bool memblock_is_memory(phys_addr_t addr);
|
2018-02-07 07:41:18 +08:00
|
|
|
bool memblock_is_map_memory(phys_addr_t addr);
|
|
|
|
bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
|
2016-01-15 07:18:54 +08:00
|
|
|
bool memblock_is_reserved(phys_addr_t addr);
|
2015-09-09 06:02:00 +08:00
|
|
|
bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
|
2011-12-09 02:22:06 +08:00
|
|
|
|
2020-10-14 07:57:54 +08:00
|
|
|
void memblock_dump_all(void);
|
2010-07-12 12:36:09 +08:00
|
|
|
|
2010-07-07 06:39:01 +08:00
|
|
|
/**
|
|
|
|
* memblock_set_current_limit - Set the current allocation limit to allow
|
|
|
|
* limiting allocations to what is currently
|
|
|
|
* accessible during boot
|
|
|
|
* @limit: New limit value (physical address)
|
|
|
|
*/
|
2011-12-09 02:22:06 +08:00
|
|
|
void memblock_set_current_limit(phys_addr_t limit);
|
2010-07-07 06:39:01 +08:00
|
|
|
|
2010-07-07 06:38:58 +08:00
|
|
|
|
2014-02-27 08:23:43 +08:00
|
|
|
phys_addr_t memblock_get_current_limit(void);
|
|
|
|
|
2010-08-04 11:40:38 +08:00
|
|
|
/*
|
|
|
|
* pfn conversion functions
|
|
|
|
*
|
|
|
|
* While the memory MEMBLOCKs should always be page aligned, the reserved
|
|
|
|
* MEMBLOCKs may not be. This accessor attempt to provide a very clear
|
|
|
|
* idea of what they return for such non aligned MEMBLOCKs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
2018-06-30 22:55:02 +08:00
|
|
|
* memblock_region_memory_base_pfn - get the lowest pfn of the memory region
|
2010-08-04 11:40:38 +08:00
|
|
|
* @reg: memblock_region structure
|
2018-06-30 22:55:02 +08:00
|
|
|
*
|
|
|
|
* Return: the lowest pfn intersecting with the memory region
|
2010-08-04 11:40:38 +08:00
|
|
|
*/
|
2010-10-13 05:07:09 +08:00
|
|
|
static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg)
|
2010-08-04 11:40:38 +08:00
|
|
|
{
|
2010-10-13 05:07:09 +08:00
|
|
|
return PFN_UP(reg->base);
|
2010-08-04 11:40:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-06-30 22:55:02 +08:00
|
|
|
* memblock_region_memory_end_pfn - get the end pfn of the memory region
|
2010-08-04 11:40:38 +08:00
|
|
|
* @reg: memblock_region structure
|
2018-06-30 22:55:02 +08:00
|
|
|
*
|
|
|
|
* Return: the end_pfn of the reserved region
|
2010-08-04 11:40:38 +08:00
|
|
|
*/
|
2010-10-13 05:07:09 +08:00
|
|
|
static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg)
|
2010-08-04 11:40:38 +08:00
|
|
|
{
|
2010-10-13 05:07:09 +08:00
|
|
|
return PFN_DOWN(reg->base + reg->size);
|
2010-08-04 11:40:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-06-30 22:55:02 +08:00
|
|
|
* memblock_region_reserved_base_pfn - get the lowest pfn of the reserved region
|
2010-08-04 11:40:38 +08:00
|
|
|
* @reg: memblock_region structure
|
2018-06-30 22:55:02 +08:00
|
|
|
*
|
|
|
|
* Return: the lowest pfn intersecting with the reserved region
|
2010-08-04 11:40:38 +08:00
|
|
|
*/
|
2010-10-13 05:07:09 +08:00
|
|
|
static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg)
|
2010-08-04 11:40:38 +08:00
|
|
|
{
|
2010-10-13 05:07:09 +08:00
|
|
|
return PFN_DOWN(reg->base);
|
2010-08-04 11:40:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-06-30 22:55:02 +08:00
|
|
|
* memblock_region_reserved_end_pfn - get the end pfn of the reserved region
|
2010-08-04 11:40:38 +08:00
|
|
|
* @reg: memblock_region structure
|
2018-06-30 22:55:02 +08:00
|
|
|
*
|
|
|
|
* Return: the end_pfn of the reserved region
|
2010-08-04 11:40:38 +08:00
|
|
|
*/
|
2010-10-13 05:07:09 +08:00
|
|
|
static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg)
|
2010-08-04 11:40:38 +08:00
|
|
|
{
|
2010-10-13 05:07:09 +08:00
|
|
|
return PFN_UP(reg->base + reg->size);
|
2010-08-04 11:40:38 +08:00
|
|
|
}
|
|
|
|
|
2020-10-14 07:58:30 +08:00
|
|
|
/**
|
|
|
|
* for_each_mem_region - itereate over memory regions
|
|
|
|
* @region: loop variable
|
|
|
|
*/
|
|
|
|
#define for_each_mem_region(region) \
|
|
|
|
for (region = memblock.memory.regions; \
|
|
|
|
region < (memblock.memory.regions + memblock.memory.cnt); \
|
|
|
|
region++)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_reserved_mem_region - itereate over reserved memory regions
|
|
|
|
* @region: loop variable
|
|
|
|
*/
|
|
|
|
#define for_each_reserved_mem_region(region) \
|
|
|
|
for (region = memblock.reserved.regions; \
|
|
|
|
region < (memblock.reserved.regions + memblock.reserved.cnt); \
|
2010-08-04 11:40:38 +08:00
|
|
|
region++)
|
|
|
|
|
2018-10-31 06:09:49 +08:00
|
|
|
extern void *alloc_large_system_hash(const char *tablename,
|
|
|
|
unsigned long bucketsize,
|
|
|
|
unsigned long numentries,
|
|
|
|
int scale,
|
|
|
|
int flags,
|
|
|
|
unsigned int *_hash_shift,
|
|
|
|
unsigned int *_hash_mask,
|
|
|
|
unsigned long low_limit,
|
|
|
|
unsigned long high_limit);
|
|
|
|
|
|
|
|
#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
|
|
|
|
#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
|
|
|
|
* shift passed via *_hash_shift */
|
|
|
|
#define HASH_ZERO 0x00000004 /* Zero allocated hash table */
|
|
|
|
|
|
|
|
/* Only NUMA needs hash distribution. 64bit NUMA architectures have
|
|
|
|
* sufficient vmalloc space.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
|
|
|
|
extern int hashdist; /* Distribute hashes across NUMA nodes? */
|
|
|
|
#else
|
|
|
|
#define hashdist (0)
|
|
|
|
#endif
|
|
|
|
|
2015-04-15 06:48:27 +08:00
|
|
|
#ifdef CONFIG_MEMTEST
|
2015-04-15 06:48:30 +08:00
|
|
|
extern void early_memtest(phys_addr_t start, phys_addr_t end);
|
2015-04-15 06:48:27 +08:00
|
|
|
#else
|
2015-04-15 06:48:30 +08:00
|
|
|
static inline void early_memtest(phys_addr_t start, phys_addr_t end)
|
2015-04-15 06:48:27 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2010-07-28 13:28:21 +08:00
|
|
|
|
2010-07-12 12:36:09 +08:00
|
|
|
|
|
|
|
#endif /* _LINUX_MEMBLOCK_H */
|