mm: help __GFP_NOFAIL allocations which do not trigger OOM killer

Now that __GFP_NOFAIL doesn't override decisions to skip the oom killer we are left with requests which require to loop inside the allocator without invoking the oom killer (e.g. GFP_NOFS|__GFP_NOFAIL used by fs code) and so they might, in very unlikely situations, loop for ever - e.g. other parallel request could starve them. This patch tries to limit the likelihood of such a lockup by giving these __GFP_NOFAIL requests a chance to move on by consuming a small part of memory reserves. We are using ALLOC_HARDER which should be enough to prevent from the starvation by regular allocation requests, yet it shouldn't consume enough from the reserves to disrupt high priority requests (ALLOC_HIGH). While we are at it, let's introduce a helper __alloc_pages_cpuset_fallback which enforces the cpusets but allows to fallback to ignore them if the first attempt fails. __GFP_NOFAIL requests can be considered important enough to allow cpuset runaway in order for the system to move on. It is highly unlikely that any of these will be GFP_USER anyway. Link: http://lkml.kernel.org/r/20161220134904.21023-4-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-02-22 15:46:25 -08:00 · 2017-02-22 15:46:25 -08:00 · 6c18ba7a18
parent 06ad276ac1
commit 6c18ba7a18
1 changed files with 36 additions and 10 deletions
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@ -3055,6 +3055,26 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 	warn_alloc_show_mem(gfp_mask, nm);
 }
 static inline struct page *
 __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
 			      unsigned int alloc_flags,
 			      const struct alloc_context *ac)
 {
 	struct page *page;
 	page = get_page_from_freelist(gfp_mask, order,
 			alloc_flags|ALLOC_CPUSET, ac);
 	/*
 	 * fallback to ignore cpuset restriction if our nodes
 	 * are depleted
 	 */
 	if (!page)
 		page = get_page_from_freelist(gfp_mask, order,
 				alloc_flags, ac);
 	return page;
 }
 static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
@ -3119,18 +3139,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
 		*did_some_progress = 1;
 		if (gfp_mask & __GFP_NOFAIL) {
 			page = get_page_from_freelist(gfp_mask, order,
 					ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
 		/*
-			 * fallback to ignore cpuset restriction if our nodes
+		 * Help non-failing allocations by giving them access to memory
-			 * are depleted
+		 * reserves
 		 */
-			if (!page)
+		if (gfp_mask & __GFP_NOFAIL)
-				page = get_page_from_freelist(gfp_mask, order,
+			page = __alloc_pages_cpuset_fallback(gfp_mask, order,
 					ALLOC_NO_WATERMARKS, ac);
 	}
 	}
 out:
 	mutex_unlock(&oom_lock);
 	return page;
@ -3785,6 +3801,16 @@ nopage:
 		 */
 		WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
 		/*
 		 * Help non-failing allocations by giving them access to memory
 		 * reserves but do not use ALLOC_NO_WATERMARKS because this
 		 * could deplete whole memory reserves which would just make
 		 * the situation worse
 		 */
 		page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
 		if (page)
 			goto got_pg;
 		cond_resched();
 		goto retry;
 	}