parisc: Switch from DISCONTIGMEM to SPARSEMEM

The commit 1c30844d2d ("mm: reclaim small amounts of memory when an
external fragmentation event occurs") breaks memory management on a
parisc c8000 workstation with this memory layout:

	0) Start 0x0000000000000000 End 0x000000003fffffff Size   1024 MB
	1) Start 0x0000000100000000 End 0x00000001bfdfffff Size   3070 MB
	2) Start 0x0000004040000000 End 0x00000040ffffffff Size   3072 MB

With the patch 1c30844d2d, the kernel will incorrectly reclaim the
first zone when it fills up, ignoring the fact that there are two
completely free zones. Basiscally, it limits cache size to 1GiB.

The parisc kernel is currently using the DISCONTIGMEM implementation,
but isn't NUMA. Avoid this issue or strange work-arounds by switching to
the more commonly used SPARSEMEM implementation.

Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1c30844d2d ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Helge Deller <deller@gmx.de>
This commit is contained in:
Helge Deller 2019-04-09 21:52:35 +02:00
parent 6b1370ae39
commit dbdf076099
6 changed files with 68 additions and 128 deletions

View File

@ -36,6 +36,7 @@ config PARISC
select GENERIC_STRNCPY_FROM_USER
select SYSCTL_ARCH_UNALIGN_ALLOW
select SYSCTL_EXCEPTION_TRACE
select ARCH_DISCARD_MEMBLOCK
select HAVE_MOD_ARCH_SPECIFIC
select VIRT_TO_BUS
select MODULES_USE_ELF_RELA
@ -314,21 +315,16 @@ config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on 64BIT
config ARCH_DISCONTIGMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on 64BIT
config ARCH_FLATMEM_ENABLE
def_bool y
config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on ARCH_DISCONTIGMEM_ENABLE
config NODES_SHIFT
int
default "3"
depends on NEED_MULTIPLE_NODES
depends on ARCH_SPARSEMEM_ENABLE
source "kernel/Kconfig.hz"

View File

@ -2,62 +2,6 @@
#ifndef _PARISC_MMZONE_H
#define _PARISC_MMZONE_H
#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */
#ifdef CONFIG_DISCONTIGMEM
extern int npmem_ranges;
struct node_map_data {
pg_data_t pg_data;
};
extern struct node_map_data node_data[];
#define NODE_DATA(nid) (&node_data[nid].pg_data)
/* We have these possible memory map layouts:
* Astro: 0-3.75, 67.75-68, 4-64
* zx1: 0-1, 257-260, 4-256
* Stretch (N-class): 0-2, 4-32, 34-xxx
*/
/* Since each 1GB can only belong to one region (node), we can create
* an index table for pfn to nid lookup; each entry in pfnnid_map
* represents 1GB, and contains the node that the memory belongs to. */
#define PFNNID_SHIFT (30 - PAGE_SHIFT)
#define PFNNID_MAP_MAX 512 /* support 512GB */
extern signed char pfnnid_map[PFNNID_MAP_MAX];
#ifndef CONFIG_64BIT
#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
#else
/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
#endif
static inline int pfn_to_nid(unsigned long pfn)
{
unsigned int i;
if (unlikely(pfn_is_io(pfn)))
return 0;
i = pfn >> PFNNID_SHIFT;
BUG_ON(i >= ARRAY_SIZE(pfnnid_map));
return pfnnid_map[i];
}
static inline int pfn_valid(int pfn)
{
int nid = pfn_to_nid(pfn);
if (nid >= 0)
return (pfn < node_end_pfn(nid));
return 0;
}
#endif
#endif /* _PARISC_MMZONE_H */

View File

@ -147,9 +147,9 @@ extern int npmem_ranges;
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#ifndef CONFIG_DISCONTIGMEM
#ifndef CONFIG_SPARSEMEM
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* CONFIG_DISCONTIGMEM */
#endif
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ASM_PARISC_SPARSEMEM_H
#define ASM_PARISC_SPARSEMEM_H
/* We have these possible memory map layouts:
* Astro: 0-3.75, 67.75-68, 4-64
* zx1: 0-1, 257-260, 4-256
* Stretch (N-class): 0-2, 4-32, 34-xxx
*/
#define MAX_PHYSMEM_BITS 39 /* 512 GB */
#define SECTION_SIZE_BITS 27 /* 128 MB */
#endif

View File

@ -138,12 +138,6 @@ extern void $$dyncall(void);
EXPORT_SYMBOL($$dyncall);
#endif
#ifdef CONFIG_DISCONTIGMEM
#include <asm/mmzone.h>
EXPORT_SYMBOL(node_data);
EXPORT_SYMBOL(pfnnid_map);
#endif
#ifdef CONFIG_FUNCTION_TRACER
extern void _mcount(void);
EXPORT_SYMBOL(_mcount);

View File

@ -32,6 +32,7 @@
#include <asm/mmzone.h>
#include <asm/sections.h>
#include <asm/msgbuf.h>
#include <asm/sparsemem.h>
extern int data_start;
extern void parisc_kernel_start(void); /* Kernel entry point in head.S */
@ -48,11 +49,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE)));
pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE)));
#ifdef CONFIG_DISCONTIGMEM
struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
#endif
static struct resource data_resource = {
.name = "Kernel data",
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
@ -76,11 +72,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly;
* information retrieved in kernel/inventory.c.
*/
physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
int npmem_ranges __read_mostly;
physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata;
int npmem_ranges __initdata;
#ifdef CONFIG_64BIT
#define MAX_MEM (~0UL)
#define MAX_MEM (1UL << MAX_PHYSMEM_BITS)
#else /* !CONFIG_64BIT */
#define MAX_MEM (3584U*1024U*1024U)
#endif /* !CONFIG_64BIT */
@ -119,7 +115,7 @@ static void __init mem_limit_func(void)
static void __init setup_bootmem(void)
{
unsigned long mem_max;
#ifndef CONFIG_DISCONTIGMEM
#ifndef CONFIG_SPARSEMEM
physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1];
int npmem_holes;
#endif
@ -137,23 +133,20 @@ static void __init setup_bootmem(void)
int j;
for (j = i; j > 0; j--) {
unsigned long tmp;
physmem_range_t tmp;
if (pmem_ranges[j-1].start_pfn <
pmem_ranges[j].start_pfn) {
break;
}
tmp = pmem_ranges[j-1].start_pfn;
pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn;
pmem_ranges[j].start_pfn = tmp;
tmp = pmem_ranges[j-1].pages;
pmem_ranges[j-1].pages = pmem_ranges[j].pages;
pmem_ranges[j].pages = tmp;
tmp = pmem_ranges[j-1];
pmem_ranges[j-1] = pmem_ranges[j];
pmem_ranges[j] = tmp;
}
}
#ifndef CONFIG_DISCONTIGMEM
#ifndef CONFIG_SPARSEMEM
/*
* Throw out ranges that are too far apart (controlled by
* MAX_GAP).
@ -165,7 +158,7 @@ static void __init setup_bootmem(void)
pmem_ranges[i-1].pages) > MAX_GAP) {
npmem_ranges = i;
printk("Large gap in memory detected (%ld pages). "
"Consider turning on CONFIG_DISCONTIGMEM\n",
"Consider turning on CONFIG_SPARSEMEM\n",
pmem_ranges[i].start_pfn -
(pmem_ranges[i-1].start_pfn +
pmem_ranges[i-1].pages));
@ -230,9 +223,8 @@ static void __init setup_bootmem(void)
printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20);
#ifndef CONFIG_DISCONTIGMEM
#ifndef CONFIG_SPARSEMEM
/* Merge the ranges, keeping track of the holes */
{
unsigned long end_pfn;
unsigned long hole_pages;
@ -255,18 +247,6 @@ static void __init setup_bootmem(void)
}
#endif
#ifdef CONFIG_DISCONTIGMEM
for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
memset(NODE_DATA(i), 0, sizeof(pg_data_t));
}
memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
for (i = 0; i < npmem_ranges; i++) {
node_set_state(i, N_NORMAL_MEMORY);
node_set_online(i);
}
#endif
/*
* Initialize and free the full range of memory in each range.
*/
@ -314,7 +294,7 @@ static void __init setup_bootmem(void)
memblock_reserve(__pa(KERNEL_BINARY_TEXT_START),
(unsigned long)(_end - KERNEL_BINARY_TEXT_START));
#ifndef CONFIG_DISCONTIGMEM
#ifndef CONFIG_SPARSEMEM
/* reserve the holes */
@ -360,6 +340,9 @@ static void __init setup_bootmem(void)
/* Initialize Page Deallocation Table (PDT) and check for bad memory. */
pdc_pdt_init();
memblock_allow_resize();
memblock_dump_all();
}
static int __init parisc_text_address(unsigned long vaddr)
@ -713,37 +696,46 @@ static void __init gateway_init(void)
PAGE_SIZE, PAGE_GATEWAY, 1);
}
void __init paging_init(void)
static void __init parisc_bootmem_free(void)
{
unsigned long zones_size[MAX_NR_ZONES] = { 0, };
unsigned long holes_size[MAX_NR_ZONES] = { 0, };
unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0;
int i;
for (i = 0; i < npmem_ranges; i++) {
unsigned long start = pmem_ranges[i].start_pfn;
unsigned long size = pmem_ranges[i].pages;
unsigned long end = start + size;
if (mem_start_pfn > start)
mem_start_pfn = start;
if (mem_end_pfn < end)
mem_end_pfn = end;
mem_size_pfn += size;
}
zones_size[0] = mem_end_pfn - mem_start_pfn;
holes_size[0] = zones_size[0] - mem_size_pfn;
free_area_init_node(0, zones_size, mem_start_pfn, holes_size);
}
void __init paging_init(void)
{
setup_bootmem();
pagetable_init();
gateway_init();
flush_cache_all_local(); /* start with known state */
flush_tlb_all_local(NULL);
for (i = 0; i < npmem_ranges; i++) {
unsigned long zones_size[MAX_NR_ZONES] = { 0, };
zones_size[ZONE_NORMAL] = pmem_ranges[i].pages;
#ifdef CONFIG_DISCONTIGMEM
/* Need to initialize the pfnnid_map before we can initialize
the zone */
{
int j;
for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT);
j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT);
j++) {
pfnnid_map[j] = i;
}
}
#endif
free_area_init_node(i, zones_size,
pmem_ranges[i].start_pfn, NULL);
}
/*
* Mark all memblocks as present for sparsemem using
* memory_present() and then initialize sparsemem.
*/
memblocks_present();
sparse_init();
parisc_bootmem_free();
}
#ifdef CONFIG_PA20