memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
For removing memmap region of sparse-vmemmap which is allocated bootmem, memmap region of sparse-vmemmap needs to be registered by get_page_bootmem(). So the patch searches pages of virtual mapping and registers the pages by get_page_bootmem(). NOTE: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform doesn't support it. It's implemented by adding a new Kconfig option named CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected by memory-hotplug feature fully supported archs(currently only on x86_64). Since we have 2 config options called MEMORY_HOTPLUG and MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately, and codes in function register_page_bootmem_info_node() are only used for collecting infomation for hot-remove, so reside it under MEMORY_HOTREMOVE. Besides page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG is also such case, move it too. [mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE] [linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()] [mhocko@suse.cz: remove the arch specific functions without any implementation] [linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed] [rientjes@google.com: fix defined but not used warning] Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Reviewed-by: Wu Jianguo <wujianguo@huawei.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Jianguo Wu <wujianguo@huawei.com> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
24d335ca36
commit
46723bfa54
|
@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page,
|
||||||
{
|
{
|
||||||
return vmemmap_populate_basepages(start_page, size, node);
|
return vmemmap_populate_basepages(start_page, size, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||||
|
|
||||||
|
|
|
@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void)
|
||||||
node_start = 0;
|
node_start = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||||
|
|
||||||
static void prot_init_common(unsigned long page_none,
|
static void prot_init_common(unsigned long page_none,
|
||||||
|
|
|
@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||||
|
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||||
|
struct page *start_page, unsigned long size)
|
||||||
|
{
|
||||||
|
unsigned long addr = (unsigned long)start_page;
|
||||||
|
unsigned long end = (unsigned long)(start_page + size);
|
||||||
|
unsigned long next;
|
||||||
|
pgd_t *pgd;
|
||||||
|
pud_t *pud;
|
||||||
|
pmd_t *pmd;
|
||||||
|
unsigned int nr_pages;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
for (; addr < end; addr = next) {
|
||||||
|
pte_t *pte = NULL;
|
||||||
|
|
||||||
|
pgd = pgd_offset_k(addr);
|
||||||
|
if (pgd_none(*pgd)) {
|
||||||
|
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
|
||||||
|
|
||||||
|
pud = pud_offset(pgd, addr);
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
|
||||||
|
|
||||||
|
if (!cpu_has_pse) {
|
||||||
|
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||||
|
pmd = pmd_offset(pud, addr);
|
||||||
|
if (pmd_none(*pmd))
|
||||||
|
continue;
|
||||||
|
get_page_bootmem(section_nr, pmd_page(*pmd),
|
||||||
|
MIX_SECTION_INFO);
|
||||||
|
|
||||||
|
pte = pte_offset_kernel(pmd, addr);
|
||||||
|
if (pte_none(*pte))
|
||||||
|
continue;
|
||||||
|
get_page_bootmem(section_nr, pte_page(*pte),
|
||||||
|
SECTION_INFO);
|
||||||
|
} else {
|
||||||
|
next = pmd_addr_end(addr, end);
|
||||||
|
|
||||||
|
pmd = pmd_offset(pud, addr);
|
||||||
|
if (pmd_none(*pmd))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nr_pages = 1 << (get_order(PMD_SIZE));
|
||||||
|
page = pmd_page(*pmd);
|
||||||
|
while (nr_pages--)
|
||||||
|
get_page_bootmem(section_nr, page++,
|
||||||
|
SECTION_INFO);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void __meminit vmemmap_populate_print_last(void)
|
void __meminit vmemmap_populate_print_last(void)
|
||||||
{
|
{
|
||||||
if (p_start) {
|
if (p_start) {
|
||||||
|
|
|
@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
|
||||||
#endif /* CONFIG_NUMA */
|
#endif /* CONFIG_NUMA */
|
||||||
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
||||||
|
|
||||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
|
||||||
|
extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
|
||||||
|
#else
|
||||||
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void put_page_bootmem(struct page *page)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
|
|
||||||
extern void put_page_bootmem(struct page *page);
|
|
||||||
#endif
|
#endif
|
||||||
|
extern void put_page_bootmem(struct page *page);
|
||||||
|
extern void get_page_bootmem(unsigned long ingo, struct page *page,
|
||||||
|
unsigned long type);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
|
* Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
|
||||||
|
|
|
@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page,
|
||||||
unsigned long pages, int node);
|
unsigned long pages, int node);
|
||||||
int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
|
int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
|
||||||
void vmemmap_populate_print_last(void);
|
void vmemmap_populate_print_last(void);
|
||||||
|
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
|
||||||
|
unsigned long size);
|
||||||
|
|
||||||
enum mf_flags {
|
enum mf_flags {
|
||||||
MF_COUNT_INCREASED = 1 << 0,
|
MF_COUNT_INCREASED = 1 << 0,
|
||||||
|
|
10
mm/Kconfig
10
mm/Kconfig
|
@ -162,10 +162,16 @@ config MOVABLE_NODE
|
||||||
Say Y here if you want to hotplug a whole node.
|
Say Y here if you want to hotplug a whole node.
|
||||||
Say N here if you want kernel to use memory on all nodes evenly.
|
Say N here if you want kernel to use memory on all nodes evenly.
|
||||||
|
|
||||||
|
#
|
||||||
|
# Only be set on architectures that have completely implemented memory hotplug
|
||||||
|
# feature. If you are not sure, don't touch it.
|
||||||
|
#
|
||||||
|
config HAVE_BOOTMEM_INFO_NODE
|
||||||
|
def_bool n
|
||||||
|
|
||||||
# eventually, we can have this option just 'select SPARSEMEM'
|
# eventually, we can have this option just 'select SPARSEMEM'
|
||||||
config MEMORY_HOTPLUG
|
config MEMORY_HOTPLUG
|
||||||
bool "Allow for memory hot-add"
|
bool "Allow for memory hot-add"
|
||||||
select MEMORY_ISOLATION
|
|
||||||
depends on SPARSEMEM || X86_64_ACPI_NUMA
|
depends on SPARSEMEM || X86_64_ACPI_NUMA
|
||||||
depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
|
depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
|
||||||
depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
|
depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
|
||||||
|
@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE
|
||||||
|
|
||||||
config MEMORY_HOTREMOVE
|
config MEMORY_HOTREMOVE
|
||||||
bool "Allow for memory hot remove"
|
bool "Allow for memory hot remove"
|
||||||
|
select MEMORY_ISOLATION
|
||||||
|
select HAVE_BOOTMEM_INFO_NODE if X86_64
|
||||||
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
|
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
|
||||||
depends on MIGRATION
|
depends on MIGRATION
|
||||||
|
|
||||||
|
|
|
@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
||||||
#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
void get_page_bootmem(unsigned long info, struct page *page,
|
||||||
static void get_page_bootmem(unsigned long info, struct page *page,
|
unsigned long type)
|
||||||
unsigned long type)
|
|
||||||
{
|
{
|
||||||
page->lru.next = (struct list_head *) type;
|
page->lru.next = (struct list_head *) type;
|
||||||
SetPagePrivate(page);
|
SetPagePrivate(page);
|
||||||
|
@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
|
||||||
|
#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
static void register_page_bootmem_info_section(unsigned long start_pfn)
|
static void register_page_bootmem_info_section(unsigned long start_pfn)
|
||||||
{
|
{
|
||||||
unsigned long *usemap, mapsize, section_nr, i;
|
unsigned long *usemap, mapsize, section_nr, i;
|
||||||
|
@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
|
||||||
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
|
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#else /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||||
|
static void register_page_bootmem_info_section(unsigned long start_pfn)
|
||||||
|
{
|
||||||
|
unsigned long *usemap, mapsize, section_nr, i;
|
||||||
|
struct mem_section *ms;
|
||||||
|
struct page *page, *memmap;
|
||||||
|
|
||||||
|
if (!pfn_valid(start_pfn))
|
||||||
|
return;
|
||||||
|
|
||||||
|
section_nr = pfn_to_section_nr(start_pfn);
|
||||||
|
ms = __nr_to_section(section_nr);
|
||||||
|
|
||||||
|
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
|
||||||
|
|
||||||
|
register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
|
||||||
|
|
||||||
|
usemap = __nr_to_section(section_nr)->pageblock_flags;
|
||||||
|
page = virt_to_page(usemap);
|
||||||
|
|
||||||
|
mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
for (i = 0; i < mapsize; i++, page++)
|
||||||
|
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
|
||||||
|
}
|
||||||
|
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
|
||||||
|
|
||||||
void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||||
{
|
{
|
||||||
|
@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||||
register_page_bootmem_info_section(pfn);
|
register_page_bootmem_info_section(pfn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
|
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
|
||||||
|
|
||||||
static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
|
static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
|
||||||
unsigned long end_pfn)
|
unsigned long end_pfn)
|
||||||
|
|
Loading…
Reference in New Issue