From a3f5c338b9f30f328276739d9589beae19254936 Mon Sep 17 00:00:00 2001 From: Zou Nan hai Date: Tue, 20 Mar 2007 13:41:57 -0700 Subject: [PATCH 1/5] [IA64] min_low_pfn and max_low_pfn calculation fix We have seen bad_pte_print when testing crashdump on an SN machine in recent 2.6.20 kernel. There are tons of bad pte print (pfn < max_low_pfn) reports when the crash kernel boots up, all those reported bad pages are inside initmem range; That is because if the crash kernel code and data happens to be at the beginning of the 1st node. build_node_maps in discontig.c will bypass reserved regions with filter_rsvd_memory. Since min_low_pfn is calculated in build_node_map, so in this case, min_low_pfn will be greater than kernel code and data. Because pages inside initmem are freed and reused later, we saw pfn_valid check fail on those pages. I think this theoretically happen on a normal kernel. When I check min_low_pfn and max_low_pfn calculation in contig.c and discontig.c. I found more issues than this. 1. min_low_pfn and max_low_pfn calculation is inconsistent between contig.c and discontig.c, min_low_pfn is calculated as the first page number of boot memmap in contig.c (Why? Though this may work at the most of the time, I don't think it is the right logic). It is calculated as the lowest physical memory page number bypass reserved regions in discontig.c. max_low_pfn is calculated include reserved regions in contig.c. It is calculated exclude reserved regions in discontig.c. 2. If kernel code and data region is happen to be at the begin or the end of physical memory, when min_low_pfn and max_low_pfn calculation is bypassed kernel code and data, pages in initmem will report bad. 3. initrd is also in reserved regions, if it is at the begin or at the end of physical memory, kernel will refuse to reuse the memory. Because the virt_addr_valid check in free_initrd_mem. So it is better to fix and clean up those issues. Calculate min_low_pfn and max_low_pfn in a consistent way. Signed-off-by: Zou Nan hai Acked-by: Jay Lan Signed-off-by: Tony Luck --- arch/ia64/mm/contig.c | 30 ++++++------------------------ arch/ia64/mm/discontig.c | 4 +--- arch/ia64/mm/init.c | 16 ++++++++++++++++ include/asm-ia64/meminit.h | 1 + 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index fb0f4698f5d0..44ce5ed9444c 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -96,26 +96,6 @@ void show_mem(void) /* physical address where the bootmem map is located */ unsigned long bootmap_start; -/** - * find_max_pfn - adjust the maximum page number callback - * @start: start of range - * @end: end of range - * @arg: address of pointer to global max_pfn variable - * - * Passed as a callback function to efi_memmap_walk() to determine the highest - * available page frame number in the system. - */ -int -find_max_pfn (unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfnp = arg, pfn; - - pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT; - if (pfn > *max_pfnp) - *max_pfnp = pfn; - return 0; -} - /** * find_bootmap_location - callback to find a memory area for the bootmap * @start: start of region @@ -177,9 +157,10 @@ find_memory (void) reserve_memory(); /* first find highest page frame number */ - max_pfn = 0; - efi_memmap_walk(find_max_pfn, &max_pfn); - + min_low_pfn = ~0UL; + max_low_pfn = 0; + efi_memmap_walk(find_max_min_low_pfn, NULL); + max_pfn = max_low_pfn; /* how many bytes to cover all the pages */ bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; @@ -189,7 +170,8 @@ find_memory (void) if (bootmap_start == ~0UL) panic("Cannot find %ld bytes for bootmap\n", bootmap_size); - bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); + bootmap_size = init_bootmem_node(NODE_DATA(0), + (bootmap_start >> PAGE_SHIFT), 0, max_pfn); /* Free all available memory, then mark bootmem-map as being in use. */ efi_memmap_walk(filter_rsvd_memory, free_bootmem); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 11a2d8825d89..872da7a2accd 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -88,9 +88,6 @@ static int __init build_node_maps(unsigned long start, unsigned long len, bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); } - min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT); - max_low_pfn = max(max_low_pfn, bdp->node_low_pfn); - return 0; } @@ -438,6 +435,7 @@ void __init find_memory(void) /* These actually end up getting called by call_pernode_memory() */ efi_memmap_walk(filter_rsvd_memory, build_node_maps); efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + efi_memmap_walk(find_max_min_low_pfn, NULL); for_each_online_node(node) if (mem_data[node].bootmem_data.node_low_pfn) { diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index f225dd72968b..c8da621aab17 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -648,6 +648,22 @@ count_reserved_pages (u64 start, u64 end, void *arg) return 0; } +int +find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg) +{ + unsigned long pfn_start, pfn_end; +#ifdef CONFIG_FLATMEM + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT; + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT; +#else + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT; + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT; +#endif + min_low_pfn = min(min_low_pfn, pfn_start); + max_low_pfn = max(max_low_pfn, pfn_end); + return 0; +} + /* * Boot command-line option "nolwsys" can be used to disable the use of any light-weight * system call handler. When this option is in effect, all fsyscalls will end up bubbling diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index 21ec5f3d23de..3a62878e84f3 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -36,6 +36,7 @@ extern void reserve_memory (void); extern void find_initrd (void); extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); extern void efi_memmap_init(unsigned long *, unsigned long *); +extern int find_max_min_low_pfn (unsigned long , unsigned long, void *); extern unsigned long vmcore_find_descriptor_size(unsigned long address); extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end); From c5e83e3f42938c0a84047e125edb98d6513f985b Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 12 Mar 2007 08:07:49 -0500 Subject: [PATCH 2/5] [IA64] Fix get_model_name() for mixed cpu type systems If a system consists of mixed processor types, kmalloc() can be called before the per-cpu data page is initialized. If the slab contains sufficient memory, then kmalloc() works ok. However, if the slabs are empty, slab calls the memory allocator. This requires per-cpu data (NODE_DATA()) & the cpu dies. Also noted by Russ Anderson who had a very similar patch. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 339e8a54c2f1..69b9bb3fd7c5 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -692,12 +692,15 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; -static char brandname[128]; +#define MAX_BRANDS 8 +static char brandname[MAX_BRANDS][128]; static char * __cpuinit get_model_name(__u8 family, __u8 model) { + static int overflow; char brand[128]; + int i; memcpy(brand, "Unknown", 8); if (ia64_pal_get_brand_info(brand)) { @@ -709,12 +712,17 @@ get_model_name(__u8 family, __u8 model) case 2: memcpy(brand, "Madison up to 9M cache", 23); break; } } - if (brandname[0] == '\0') - return strcpy(brandname, brand); - else if (strcmp(brandname, brand) == 0) - return brandname; - else - return kstrdup(brand, GFP_KERNEL); + for (i = 0; i < MAX_BRANDS; i++) + if (strcmp(brandname[i], brand) == 0) + return brandname[i]; + for (i = 0; i < MAX_BRANDS; i++) + if (brandname[i][0] == '\0') + return strcpy(brandname[i], brand); + if (overflow++ == 0) + printk(KERN_ERR + "%s: Table overflow. Some processor model information will be missing\n", + __FUNCTION__); + return "Unknown"; } static void __cpuinit From 60b548dfe4ad178dbf0fa8c2a50e36aaa42d603a Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Tue, 20 Mar 2007 13:47:47 -0700 Subject: [PATCH 3/5] [IA64] Fix typo/thinko in crash.c Clearly should be checking for "val == DIE_INIT_SLAVE_ENTER". Signed-off-by: Jay Lan Acked-by: Simon Horman Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 7d1bbb4403ba..80a94e707827 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -164,7 +164,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) nd = (struct ia64_mca_notify_die *)args->err; /* Reason code 1 means machine check rendezous*/ - if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) && + if ((val == DIE_INIT_MONARCH_ENTER || val == DIE_INIT_SLAVE_ENTER) && nd->sos->rv_rc == 1) return NOTIFY_DONE; From 0bdfc1900705dadef6286d69de7da0feef841734 Mon Sep 17 00:00:00 2001 From: John Keller Date: Tue, 20 Mar 2007 13:50:10 -0500 Subject: [PATCH 4/5] [IA64] Altix: ioremap vga_console_iobase When booting an SN system without specifing a console (i.e., no "console=" on boot line), the system will hang during boot at the point where /sbin/init is run. The problem is that vga_console_iobase is not converted to a virtual address before storing in io_space[0].mmio_base. The conversion was happening in sn_scan_pcdp(), but not in setup_vga_console(). Signed-off-by: John Keller Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index bd5373d593e1..a9bed5ca2ed8 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -348,8 +348,7 @@ sn_scan_pcdp(void) continue; /* not PCI interconnect */ if (if_pci.translation & PCDP_PCI_TRANS_IOPORT) - vga_console_iobase = - if_pci.ioport_tra | __IA64_UNCACHED_OFFSET; + vga_console_iobase = if_pci.ioport_tra; if (if_pci.translation & PCDP_PCI_TRANS_MMIO) vga_console_membase = @@ -429,7 +428,8 @@ void __init sn_setup(char **cmdline_p) * bus containing the VGA console. */ if (vga_console_iobase) { - io_space[0].mmio_base = vga_console_iobase; + io_space[0].mmio_base = + (unsigned long) ioremap(vga_console_iobase, 0); io_space[0].sparse = 0; } From 58a69c367c02a165004a5ce5dd55ce03b59ba43d Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 14 Mar 2007 19:07:47 +0100 Subject: [PATCH 5/5] [IA64] Fix wrong /proc/iomem on SGI Altix In sn_io_slot_fixup(), the parent is re-set from the bus to io(port|mem)_resource because the address is changed in a way that it's not child of the bus any more. However, only the root is set but not the parent/child/sibling relationship in the resource tree which causes 'cat /proc/iomem' to stop after this memory area. Depding on the poition in the tree the iomem may be nearly completely empty. Signed-off-by: Bernhard Walle Acked-by: John Keller Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 600be3ebae05..6b10e5d28488 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -247,10 +247,18 @@ sn_io_slot_fixup(struct pci_dev *dev) addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; dev->resource[idx].start = addr; dev->resource[idx].end = addr + size; + + /* + * if it's already in the device structure, remove it before + * inserting + */ + if (dev->resource[idx].parent && dev->resource[idx].parent->child) + release_resource(&dev->resource[idx]); + if (dev->resource[idx].flags & IORESOURCE_IO) - dev->resource[idx].parent = &ioport_resource; + insert_resource(&ioport_resource, &dev->resource[idx]); else - dev->resource[idx].parent = &iomem_resource; + insert_resource(&iomem_resource, &dev->resource[idx]); /* If ROM, mark as shadowed in PROM */ if (idx == PCI_ROM_RESOURCE) dev->resource[idx].flags |= IORESOURCE_ROM_BIOS_COPY;