mm/memory_hotplug: embed vmem_altmap details in memory block
With memmap on memory, some architecture needs more details w.r.t altmap such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of computing them again when we remove a memory block, embed vmem_altmap details in struct memory_block if we are using memmap on memory block feature. [yangyingliang@huawei.com: fix error return code in add_memory_resource()] Link: https://lkml.kernel.org/r/20230809081552.1351184-1-yangyingliang@huawei.com Link: https://lkml.kernel.org/r/20230808091501.287660-7-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
603fd64dfa
commit
1a8c64e110
|
@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier);
|
||||||
static void memory_block_release(struct device *dev)
|
static void memory_block_release(struct device *dev)
|
||||||
{
|
{
|
||||||
struct memory_block *mem = to_memory_block(dev);
|
struct memory_block *mem = to_memory_block(dev);
|
||||||
|
/* Verify that the altmap is freed */
|
||||||
|
WARN_ON(mem->altmap);
|
||||||
kfree(mem);
|
kfree(mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem)
|
||||||
{
|
{
|
||||||
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||||
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
||||||
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
|
unsigned long nr_vmemmap_pages = 0;
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem)
|
||||||
* stage helps to keep accounting easier to follow - e.g vmemmaps
|
* stage helps to keep accounting easier to follow - e.g vmemmaps
|
||||||
* belong to the same zone as the memory they backed.
|
* belong to the same zone as the memory they backed.
|
||||||
*/
|
*/
|
||||||
|
if (mem->altmap)
|
||||||
|
nr_vmemmap_pages = mem->altmap->free;
|
||||||
|
|
||||||
if (nr_vmemmap_pages) {
|
if (nr_vmemmap_pages) {
|
||||||
ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
|
ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem)
|
||||||
{
|
{
|
||||||
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||||
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
||||||
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
|
unsigned long nr_vmemmap_pages = 0;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!mem->zone)
|
if (!mem->zone)
|
||||||
|
@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem)
|
||||||
* Unaccount before offlining, such that unpopulated zone and kthreads
|
* Unaccount before offlining, such that unpopulated zone and kthreads
|
||||||
* can properly be torn down in offline_pages().
|
* can properly be torn down in offline_pages().
|
||||||
*/
|
*/
|
||||||
|
if (mem->altmap)
|
||||||
|
nr_vmemmap_pages = mem->altmap->free;
|
||||||
|
|
||||||
if (nr_vmemmap_pages)
|
if (nr_vmemmap_pages)
|
||||||
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
|
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
|
||||||
-nr_vmemmap_pages);
|
-nr_vmemmap_pages);
|
||||||
|
@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int add_memory_block(unsigned long block_id, unsigned long state,
|
static int add_memory_block(unsigned long block_id, unsigned long state,
|
||||||
unsigned long nr_vmemmap_pages,
|
struct vmem_altmap *altmap,
|
||||||
struct memory_group *group)
|
struct memory_group *group)
|
||||||
{
|
{
|
||||||
struct memory_block *mem;
|
struct memory_block *mem;
|
||||||
|
@ -744,7 +751,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
|
||||||
mem->start_section_nr = block_id * sections_per_block;
|
mem->start_section_nr = block_id * sections_per_block;
|
||||||
mem->state = state;
|
mem->state = state;
|
||||||
mem->nid = NUMA_NO_NODE;
|
mem->nid = NUMA_NO_NODE;
|
||||||
mem->nr_vmemmap_pages = nr_vmemmap_pages;
|
mem->altmap = altmap;
|
||||||
INIT_LIST_HEAD(&mem->group_next);
|
INIT_LIST_HEAD(&mem->group_next);
|
||||||
|
|
||||||
#ifndef CONFIG_NUMA
|
#ifndef CONFIG_NUMA
|
||||||
|
@ -783,14 +790,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr)
|
||||||
if (section_count == 0)
|
if (section_count == 0)
|
||||||
return 0;
|
return 0;
|
||||||
return add_memory_block(memory_block_id(base_section_nr),
|
return add_memory_block(memory_block_id(base_section_nr),
|
||||||
MEM_ONLINE, 0, NULL);
|
MEM_ONLINE, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_hotplug_memory_block(unsigned long block_id,
|
static int add_hotplug_memory_block(unsigned long block_id,
|
||||||
unsigned long nr_vmemmap_pages,
|
struct vmem_altmap *altmap,
|
||||||
struct memory_group *group)
|
struct memory_group *group)
|
||||||
{
|
{
|
||||||
return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
|
return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_memory_block(struct memory_block *memory)
|
static void remove_memory_block(struct memory_block *memory)
|
||||||
|
@ -818,7 +825,7 @@ static void remove_memory_block(struct memory_block *memory)
|
||||||
* Called under device_hotplug_lock.
|
* Called under device_hotplug_lock.
|
||||||
*/
|
*/
|
||||||
int create_memory_block_devices(unsigned long start, unsigned long size,
|
int create_memory_block_devices(unsigned long start, unsigned long size,
|
||||||
unsigned long vmemmap_pages,
|
struct vmem_altmap *altmap,
|
||||||
struct memory_group *group)
|
struct memory_group *group)
|
||||||
{
|
{
|
||||||
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
|
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
|
||||||
|
@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
|
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
|
||||||
ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
|
ret = add_hotplug_memory_block(block_id, altmap, group);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,11 +77,7 @@ struct memory_block {
|
||||||
*/
|
*/
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
struct device dev;
|
struct device dev;
|
||||||
/*
|
struct vmem_altmap *altmap;
|
||||||
* Number of vmemmap pages. These pages
|
|
||||||
* lay at the beginning of the memory block.
|
|
||||||
*/
|
|
||||||
unsigned long nr_vmemmap_pages;
|
|
||||||
struct memory_group *group; /* group (if any) for this block */
|
struct memory_group *group; /* group (if any) for this block */
|
||||||
struct list_head group_next; /* next block inside memory group */
|
struct list_head group_next; /* next block inside memory group */
|
||||||
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
|
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
|
||||||
|
@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
|
||||||
extern int register_memory_notifier(struct notifier_block *nb);
|
extern int register_memory_notifier(struct notifier_block *nb);
|
||||||
extern void unregister_memory_notifier(struct notifier_block *nb);
|
extern void unregister_memory_notifier(struct notifier_block *nb);
|
||||||
int create_memory_block_devices(unsigned long start, unsigned long size,
|
int create_memory_block_devices(unsigned long start, unsigned long size,
|
||||||
unsigned long vmemmap_pages,
|
struct vmem_altmap *altmap,
|
||||||
struct memory_group *group);
|
struct memory_group *group);
|
||||||
void remove_memory_block_devices(unsigned long start, unsigned long size);
|
void remove_memory_block_devices(unsigned long start, unsigned long size);
|
||||||
extern void memory_dev_init(void);
|
extern void memory_dev_init(void);
|
||||||
|
|
|
@ -1439,7 +1439,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
|
||||||
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
|
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
|
||||||
if (mhp_supports_memmap_on_memory(size)) {
|
if (mhp_supports_memmap_on_memory(size)) {
|
||||||
mhp_altmap.free = memory_block_memmap_on_memory_pages();
|
mhp_altmap.free = memory_block_memmap_on_memory_pages();
|
||||||
params.altmap = &mhp_altmap;
|
params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
|
||||||
|
if (!params.altmap) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap));
|
||||||
}
|
}
|
||||||
/* fallback to not using altmap */
|
/* fallback to not using altmap */
|
||||||
}
|
}
|
||||||
|
@ -1447,13 +1453,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
|
||||||
/* call arch's memory hotadd */
|
/* call arch's memory hotadd */
|
||||||
ret = arch_add_memory(nid, start, size, ¶ms);
|
ret = arch_add_memory(nid, start, size, ¶ms);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto error;
|
goto error_free;
|
||||||
|
|
||||||
/* create memory block devices after memory was added */
|
/* create memory block devices after memory was added */
|
||||||
ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
|
ret = create_memory_block_devices(start, size, params.altmap, group);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
arch_remove_memory(start, size, NULL);
|
arch_remove_memory(start, size, NULL);
|
||||||
goto error;
|
goto error_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_node) {
|
if (new_node) {
|
||||||
|
@ -1490,6 +1496,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
|
||||||
walk_memory_blocks(start, size, NULL, online_memory_block);
|
walk_memory_blocks(start, size, NULL, online_memory_block);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
error_free:
|
||||||
|
kfree(params.altmap);
|
||||||
error:
|
error:
|
||||||
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
|
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
|
||||||
memblock_remove(start, size);
|
memblock_remove(start, size);
|
||||||
|
@ -2056,12 +2064,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
|
static int test_has_altmap_cb(struct memory_block *mem, void *arg)
|
||||||
{
|
{
|
||||||
|
struct memory_block **mem_ptr = (struct memory_block **)arg;
|
||||||
/*
|
/*
|
||||||
* If not set, continue with the next block.
|
* return the memblock if we have altmap
|
||||||
|
* and break callback.
|
||||||
*/
|
*/
|
||||||
return mem->nr_vmemmap_pages;
|
if (mem->altmap) {
|
||||||
|
*mem_ptr = mem;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_cpu_on_node(int nid)
|
static int check_cpu_on_node(int nid)
|
||||||
|
@ -2136,10 +2150,9 @@ EXPORT_SYMBOL(try_offline_node);
|
||||||
|
|
||||||
static int __ref try_remove_memory(u64 start, u64 size)
|
static int __ref try_remove_memory(u64 start, u64 size)
|
||||||
{
|
{
|
||||||
struct vmem_altmap mhp_altmap = {};
|
struct memory_block *mem;
|
||||||
struct vmem_altmap *altmap = NULL;
|
|
||||||
unsigned long nr_vmemmap_pages;
|
|
||||||
int rc = 0, nid = NUMA_NO_NODE;
|
int rc = 0, nid = NUMA_NO_NODE;
|
||||||
|
struct vmem_altmap *altmap = NULL;
|
||||||
|
|
||||||
BUG_ON(check_hotplug_memory_range(start, size));
|
BUG_ON(check_hotplug_memory_range(start, size));
|
||||||
|
|
||||||
|
@ -2161,25 +2174,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
|
||||||
* the same granularity it was added - a single memory block.
|
* the same granularity it was added - a single memory block.
|
||||||
*/
|
*/
|
||||||
if (mhp_memmap_on_memory()) {
|
if (mhp_memmap_on_memory()) {
|
||||||
nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
|
rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
|
||||||
get_nr_vmemmap_pages_cb);
|
if (rc) {
|
||||||
if (nr_vmemmap_pages) {
|
|
||||||
if (size != memory_block_size_bytes()) {
|
if (size != memory_block_size_bytes()) {
|
||||||
pr_warn("Refuse to remove %#llx - %#llx,"
|
pr_warn("Refuse to remove %#llx - %#llx,"
|
||||||
"wrong granularity\n",
|
"wrong granularity\n",
|
||||||
start, start + size);
|
start, start + size);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
altmap = mem->altmap;
|
||||||
/*
|
/*
|
||||||
* Let remove_pmd_table->free_hugepage_table do the
|
* Mark altmap NULL so that we can add a debug
|
||||||
* right thing if we used vmem_altmap when hot-adding
|
* check on memblock free.
|
||||||
* the range.
|
|
||||||
*/
|
*/
|
||||||
mhp_altmap.base_pfn = PHYS_PFN(start);
|
mem->altmap = NULL;
|
||||||
mhp_altmap.free = nr_vmemmap_pages;
|
|
||||||
mhp_altmap.alloc = nr_vmemmap_pages;
|
|
||||||
altmap = &mhp_altmap;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2196,6 +2204,12 @@ static int __ref try_remove_memory(u64 start, u64 size)
|
||||||
|
|
||||||
arch_remove_memory(start, size, altmap);
|
arch_remove_memory(start, size, altmap);
|
||||||
|
|
||||||
|
/* Verify that all vmemmap pages have actually been freed. */
|
||||||
|
if (altmap) {
|
||||||
|
WARN(altmap->alloc, "Altmap not fully unmapped");
|
||||||
|
kfree(altmap);
|
||||||
|
}
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
|
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
|
||||||
memblock_phys_free(start, size);
|
memblock_phys_free(start, size);
|
||||||
memblock_remove(start, size);
|
memblock_remove(start, size);
|
||||||
|
|
Loading…
Reference in New Issue