From 1c80b990a3411733890eff10817e388d5e25e2dd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Sep 2010 23:09:51 +0200 Subject: [PATCH 1/5] HWPOISON: Improve comments in memory-failure.c Clean up and improve the overview comment in memory-failure.c Tidy some grammar issues in other comments. Signed-off-by: Andi Kleen --- mm/memory-failure.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 757f6b0accfe..eebb9d8efae4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -7,21 +7,26 @@ * Free Software Foundation. * * High level machine check handler. Handles pages reported by the - * hardware as being corrupted usually due to a 2bit ECC memory or cache + * hardware as being corrupted usually due to a multi-bit ECC memory or cache * failure. + * + * In addition there is a "soft offline" entry point that allows stop using + * not-yet-corrupted-by-suspicious pages without killing anything. * * Handles page cache pages in various states. The tricky part - * here is that we can access any page asynchronous to other VM - * users, because memory failures could happen anytime and anywhere, - * possibly violating some of their assumptions. This is why this code - * has to be extremely careful. Generally it tries to use normal locking - * rules, as in get the standard locks, even if that means the - * error handling takes potentially a long time. - * - * The operation to map back from RMAP chains to processes has to walk - * the complete process list and has non linear complexity with the number - * mappings. In short it can be quite slow. But since memory corruptions - * are rare we hope to get away with this. + * here is that we can access any page asynchronously in respect to + * other VM users, because memory failures could happen anytime and + * anywhere. This could violate some of their assumptions. This is why + * this code has to be extremely careful. Generally it tries to use + * normal locking rules, as in get the standard locks, even if that means + * the error handling takes potentially a long time. + * + * There are several operations here with exponential complexity because + * of unsuitable VM data structures. For example the operation to map back + * from RMAP chains to processes has to walk the complete process list and + * has non linear complexity with the number. But since memory corruptions + * are rare we hope to get away with this. This avoids impacting the core + * VM. */ /* @@ -78,7 +83,7 @@ static int hwpoison_filter_dev(struct page *p) return 0; /* - * page_mapping() does not accept slab page + * page_mapping() does not accept slab pages. */ if (PageSlab(p)) return -EINVAL; From fb46e73520940bfc426152cfe5e4a9f1ae3f00b6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Sep 2010 23:31:30 +0200 Subject: [PATCH 2/5] HWPOISON: Convert pr_debugs to pr_info Convert a lot of pr_debugs in memory-failure.c that are generally useful to pr_info. It's reasonable to print at least one message why offlining succeeded or failed by default. Signed-off-by: Andi Kleen --- mm/memory-failure.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eebb9d8efae4..0a2ed9a17e8c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -314,7 +314,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, * a SIGKILL because the error is not contained anymore. */ if (tk->addr == -EFAULT) { - pr_debug("MCE: Unable to find user space address %lx in %s\n", + pr_info("MCE: Unable to find user space address %lx in %s\n", page_to_pfn(p), tsk->comm); tk->addr_valid = 0; } @@ -582,7 +582,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) pfn, err); } else if (page_has_private(p) && !try_to_release_page(p, GFP_NOIO)) { - pr_debug("MCE %#lx: failed to release buffers\n", pfn); + pr_info("MCE %#lx: failed to release buffers\n", pfn); } else { ret = RECOVERED; } @@ -1152,7 +1152,7 @@ int unpoison_memory(unsigned long pfn) page = compound_head(p); if (!PageHWPoison(p)) { - pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn); + pr_info("MCE: Page was already unpoisoned %#lx\n", pfn); return 0; } @@ -1161,7 +1161,7 @@ int unpoison_memory(unsigned long pfn) if (!get_page_unless_zero(page)) { if (TestClearPageHWPoison(p)) atomic_long_sub(nr_pages, &mce_bad_pages); - pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn); + pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); return 0; } @@ -1173,7 +1173,7 @@ int unpoison_memory(unsigned long pfn) * the free buddy page pool. */ if (TestClearPageHWPoison(page)) { - pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn); + pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); atomic_long_sub(nr_pages, &mce_bad_pages); freeit = 1; } @@ -1222,12 +1222,12 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) set_migratetype_isolate(p); if (!get_page_unless_zero(compound_head(p))) { if (is_free_buddy_page(p)) { - pr_debug("get_any_page: %#lx free buddy page\n", pfn); + pr_info("get_any_page: %#lx free buddy page\n", pfn); /* Set hwpoison bit while page is still isolated */ SetPageHWPoison(p); ret = 0; } else { - pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n", + pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", pfn, p->flags); ret = -EIO; } @@ -1293,7 +1293,7 @@ int soft_offline_page(struct page *page, int flags) goto done; } if (!PageLRU(page)) { - pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n", + pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", pfn, page->flags); return -EIO; } @@ -1307,7 +1307,7 @@ int soft_offline_page(struct page *page, int flags) if (PageHWPoison(page)) { unlock_page(page); put_page(page); - pr_debug("soft offline: %#lx page already poisoned\n", pfn); + pr_info("soft offline: %#lx page already poisoned\n", pfn); return -EBUSY; } @@ -1328,7 +1328,7 @@ int soft_offline_page(struct page *page, int flags) put_page(page); if (ret == 1) { ret = 0; - pr_debug("soft_offline: %#lx: invalidated\n", pfn); + pr_info("soft_offline: %#lx: invalidated\n", pfn); goto done; } @@ -1344,13 +1344,13 @@ int soft_offline_page(struct page *page, int flags) list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); if (ret) { - pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", + pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); if (ret > 0) ret = -EIO; } } else { - pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", + pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", pfn, ret, page_count(page), page->flags); } if (ret) From 898e70d1e526d7814bd2f64c907706b83ffca9af Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Sep 2010 23:33:29 +0200 Subject: [PATCH 3/5] HWPOISON: Disable DEBUG by default Now that only a few obscure messages are left as pr_debug disable outputting of pr_debug in memory-failure.c by default. Signed-off-by: Andi Kleen --- mm/memory-failure.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0a2ed9a17e8c..77b3e79528f0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -35,7 +35,6 @@ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages * - pass bad pages to kdump next kernel */ -#define DEBUG 1 /* remove me in 2.6.34 */ #include #include #include From 9033ae16407f46ae06f559f9374281f6e9d89efc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Sep 2010 23:36:05 +0200 Subject: [PATCH 4/5] HWPOISON: Turn addr_valid from bitfield into char The addr_valid flag is the only flag in "to_kill" and it's slightly more efficient to have it as char instead of a bitfield. Signed-off-by: Andi Kleen --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 77b3e79528f0..88653c93e4ce 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -272,7 +272,7 @@ struct to_kill { struct list_head nd; struct task_struct *tsk; unsigned long addr; - unsigned addr_valid:1; + char addr_valid; }; /* From a08c80ebb621a6dc277c91e029acb725f2f20254 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Sep 2010 23:39:30 +0200 Subject: [PATCH 5/5] HWPOISON: Remove retry loop for try_to_unmap We don't reply in other temporary failure cases and there were no reports of replies happening. I think the original reason it was added was also just an early bug, not an observation of the race. So remove the loop for now, but keep a warning message. Signed-off-by: Andi Kleen --- mm/memory-failure.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 88653c93e4ce..2044fe8920c2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -840,8 +840,6 @@ static int page_action(struct page_state *ps, struct page *p, return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; } -#define N_UNMAP_TRIES 5 - /* * Do all that is necessary to remove user space mappings. Unmap * the pages and send SIGBUS to the processes if the data was dirty. @@ -853,7 +851,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, struct address_space *mapping; LIST_HEAD(tokill); int ret; - int i; int kill = 1; struct page *hpage = compound_head(p); @@ -907,17 +904,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, if (kill) collect_procs(hpage, &tokill); - /* - * try_to_unmap can fail temporarily due to races. - * Try a few times (RED-PEN better strategy?) - */ - for (i = 0; i < N_UNMAP_TRIES; i++) { - ret = try_to_unmap(hpage, ttu); - if (ret == SWAP_SUCCESS) - break; - pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); - } - + ret = try_to_unmap(hpage, ttu); if (ret != SWAP_SUCCESS) printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", pfn, page_mapcount(hpage));