mm/page_alloc: integrate classzone_idx and high_zoneidx

classzone_idx is just different name for high_zoneidx now.  So, integrate
them and add some comment to struct alloc_context in order to reduce
future confusion about the meaning of this variable.

The accessor, ac_classzone_idx() is also removed since it isn't needed
after integration.

In addition to integration, this patch also renames high_zoneidx to
highest_zoneidx since it represents more precise meaning.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Baoquan He <bhe@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Ye Xiaolong <xiaolong.ye@intel.com>
Link: http://lkml.kernel.org/r/1587095923-7515-3-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Joonsoo Kim 2020-06-03 15:59:01 -07:00 committed by Linus Torvalds
parent 3334a45eb9
commit 97a225e69a
12 changed files with 175 additions and 150 deletions

View File

@ -97,7 +97,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
struct page **page); struct page **page);
extern void reset_isolation_suitable(pg_data_t *pgdat); extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order, extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int classzone_idx); unsigned int alloc_flags, int highest_zoneidx);
extern void defer_compaction(struct zone *zone, int order); extern void defer_compaction(struct zone *zone, int order);
extern bool compaction_deferred(struct zone *zone, int order); extern bool compaction_deferred(struct zone *zone, int order);
@ -182,7 +182,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
extern int kcompactd_run(int nid); extern int kcompactd_run(int nid);
extern void kcompactd_stop(int nid); extern void kcompactd_stop(int nid);
extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx);
#else #else
static inline void reset_isolation_suitable(pg_data_t *pgdat) static inline void reset_isolation_suitable(pg_data_t *pgdat)
@ -190,7 +190,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
} }
static inline enum compact_result compaction_suitable(struct zone *zone, int order, static inline enum compact_result compaction_suitable(struct zone *zone, int order,
int alloc_flags, int classzone_idx) int alloc_flags, int highest_zoneidx)
{ {
return COMPACT_SKIPPED; return COMPACT_SKIPPED;
} }
@ -232,7 +232,8 @@ static inline void kcompactd_stop(int nid)
{ {
} }
static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) static inline void wakeup_kcompactd(pg_data_t *pgdat,
int order, int highest_zoneidx)
{ {
} }

View File

@ -699,13 +699,13 @@ typedef struct pglist_data {
struct task_struct *kswapd; /* Protected by struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */ mem_hotplug_begin/end() */
int kswapd_order; int kswapd_order;
enum zone_type kswapd_classzone_idx; enum zone_type kswapd_highest_zoneidx;
int kswapd_failures; /* Number of 'reclaimed == 0' runs */ int kswapd_failures; /* Number of 'reclaimed == 0' runs */
#ifdef CONFIG_COMPACTION #ifdef CONFIG_COMPACTION
int kcompactd_max_order; int kcompactd_max_order;
enum zone_type kcompactd_classzone_idx; enum zone_type kcompactd_highest_zoneidx;
wait_queue_head_t kcompactd_wait; wait_queue_head_t kcompactd_wait;
struct task_struct *kcompactd; struct task_struct *kcompactd;
#endif #endif
@ -783,15 +783,15 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
void build_all_zonelists(pg_data_t *pgdat); void build_all_zonelists(pg_data_t *pgdat);
void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
enum zone_type classzone_idx); enum zone_type highest_zoneidx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, unsigned int alloc_flags, int highest_zoneidx, unsigned int alloc_flags,
long free_pages); long free_pages);
bool zone_watermark_ok(struct zone *z, unsigned int order, bool zone_watermark_ok(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, unsigned long mark, int highest_zoneidx,
unsigned int alloc_flags); unsigned int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, unsigned int order, bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx); unsigned long mark, int highest_zoneidx);
enum memmap_context { enum memmap_context {
MEMMAP_EARLY, MEMMAP_EARLY,
MEMMAP_HOTPLUG, MEMMAP_HOTPLUG,

View File

@ -314,40 +314,44 @@ TRACE_EVENT(mm_compaction_kcompactd_sleep,
DECLARE_EVENT_CLASS(kcompactd_wake_template, DECLARE_EVENT_CLASS(kcompactd_wake_template,
TP_PROTO(int nid, int order, enum zone_type classzone_idx), TP_PROTO(int nid, int order, enum zone_type highest_zoneidx),
TP_ARGS(nid, order, classzone_idx), TP_ARGS(nid, order, highest_zoneidx),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, nid) __field(int, nid)
__field(int, order) __field(int, order)
__field(enum zone_type, classzone_idx) __field(enum zone_type, highest_zoneidx)
), ),
TP_fast_assign( TP_fast_assign(
__entry->nid = nid; __entry->nid = nid;
__entry->order = order; __entry->order = order;
__entry->classzone_idx = classzone_idx; __entry->highest_zoneidx = highest_zoneidx;
), ),
/*
* classzone_idx is previous name of the highest_zoneidx.
* Reason not to change it is the ABI requirement of the tracepoint.
*/
TP_printk("nid=%d order=%d classzone_idx=%-8s", TP_printk("nid=%d order=%d classzone_idx=%-8s",
__entry->nid, __entry->nid,
__entry->order, __entry->order,
__print_symbolic(__entry->classzone_idx, ZONE_TYPE)) __print_symbolic(__entry->highest_zoneidx, ZONE_TYPE))
); );
DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd, DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd,
TP_PROTO(int nid, int order, enum zone_type classzone_idx), TP_PROTO(int nid, int order, enum zone_type highest_zoneidx),
TP_ARGS(nid, order, classzone_idx) TP_ARGS(nid, order, highest_zoneidx)
); );
DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake, DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake,
TP_PROTO(int nid, int order, enum zone_type classzone_idx), TP_PROTO(int nid, int order, enum zone_type highest_zoneidx),
TP_ARGS(nid, order, classzone_idx) TP_ARGS(nid, order, highest_zoneidx)
); );
#endif #endif

View File

@ -265,7 +265,7 @@ TRACE_EVENT(mm_shrink_slab_end,
); );
TRACE_EVENT(mm_vmscan_lru_isolate, TRACE_EVENT(mm_vmscan_lru_isolate,
TP_PROTO(int classzone_idx, TP_PROTO(int highest_zoneidx,
int order, int order,
unsigned long nr_requested, unsigned long nr_requested,
unsigned long nr_scanned, unsigned long nr_scanned,
@ -274,10 +274,10 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
isolate_mode_t isolate_mode, isolate_mode_t isolate_mode,
int lru), int lru),
TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, classzone_idx) __field(int, highest_zoneidx)
__field(int, order) __field(int, order)
__field(unsigned long, nr_requested) __field(unsigned long, nr_requested)
__field(unsigned long, nr_scanned) __field(unsigned long, nr_scanned)
@ -288,7 +288,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
), ),
TP_fast_assign( TP_fast_assign(
__entry->classzone_idx = classzone_idx; __entry->highest_zoneidx = highest_zoneidx;
__entry->order = order; __entry->order = order;
__entry->nr_requested = nr_requested; __entry->nr_requested = nr_requested;
__entry->nr_scanned = nr_scanned; __entry->nr_scanned = nr_scanned;
@ -298,9 +298,13 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
__entry->lru = lru; __entry->lru = lru;
), ),
/*
* classzone is previous name of the highest_zoneidx.
* Reason not to change it is the ABI requirement of the tracepoint.
*/
TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
__entry->isolate_mode, __entry->isolate_mode,
__entry->classzone_idx, __entry->highest_zoneidx,
__entry->order, __entry->order,
__entry->nr_requested, __entry->nr_requested,
__entry->nr_scanned, __entry->nr_scanned,

View File

@ -1968,7 +1968,7 @@ static enum compact_result compact_finished(struct compact_control *cc)
*/ */
static enum compact_result __compaction_suitable(struct zone *zone, int order, static enum compact_result __compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, unsigned int alloc_flags,
int classzone_idx, int highest_zoneidx,
unsigned long wmark_target) unsigned long wmark_target)
{ {
unsigned long watermark; unsigned long watermark;
@ -1981,7 +1981,7 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
* If watermarks for high-order allocation are already met, there * If watermarks for high-order allocation are already met, there
* should be no need for compaction at all. * should be no need for compaction at all.
*/ */
if (zone_watermark_ok(zone, order, watermark, classzone_idx, if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
alloc_flags)) alloc_flags))
return COMPACT_SUCCESS; return COMPACT_SUCCESS;
@ -1991,9 +1991,9 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
* watermark and alloc_flags have to match, or be more pessimistic than * watermark and alloc_flags have to match, or be more pessimistic than
* the check in __isolate_free_page(). We don't use the direct * the check in __isolate_free_page(). We don't use the direct
* compactor's alloc_flags, as they are not relevant for freepage * compactor's alloc_flags, as they are not relevant for freepage
* isolation. We however do use the direct compactor's classzone_idx to * isolation. We however do use the direct compactor's highest_zoneidx
* skip over zones where lowmem reserves would prevent allocation even * to skip over zones where lowmem reserves would prevent allocation
* if compaction succeeds. * even if compaction succeeds.
* For costly orders, we require low watermark instead of min for * For costly orders, we require low watermark instead of min for
* compaction to proceed to increase its chances. * compaction to proceed to increase its chances.
* ALLOC_CMA is used, as pages in CMA pageblocks are considered * ALLOC_CMA is used, as pages in CMA pageblocks are considered
@ -2002,7 +2002,7 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
low_wmark_pages(zone) : min_wmark_pages(zone); low_wmark_pages(zone) : min_wmark_pages(zone);
watermark += compact_gap(order); watermark += compact_gap(order);
if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
ALLOC_CMA, wmark_target)) ALLOC_CMA, wmark_target))
return COMPACT_SKIPPED; return COMPACT_SKIPPED;
@ -2011,12 +2011,12 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
enum compact_result compaction_suitable(struct zone *zone, int order, enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, unsigned int alloc_flags,
int classzone_idx) int highest_zoneidx)
{ {
enum compact_result ret; enum compact_result ret;
int fragindex; int fragindex;
ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
zone_page_state(zone, NR_FREE_PAGES)); zone_page_state(zone, NR_FREE_PAGES));
/* /*
* fragmentation index determines if allocation failures are due to * fragmentation index determines if allocation failures are due to
@ -2057,8 +2057,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
* Make sure at least one zone would pass __compaction_suitable if we continue * Make sure at least one zone would pass __compaction_suitable if we continue
* retrying the reclaim. * retrying the reclaim.
*/ */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->nodemask) { ac->highest_zoneidx, ac->nodemask) {
unsigned long available; unsigned long available;
enum compact_result compact_result; enum compact_result compact_result;
@ -2071,7 +2071,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
available = zone_reclaimable_pages(zone) / order; available = zone_reclaimable_pages(zone) / order;
available += zone_page_state_snapshot(zone, NR_FREE_PAGES); available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
compact_result = __compaction_suitable(zone, order, alloc_flags, compact_result = __compaction_suitable(zone, order, alloc_flags,
ac_classzone_idx(ac), available); ac->highest_zoneidx, available);
if (compact_result != COMPACT_SKIPPED) if (compact_result != COMPACT_SKIPPED)
return true; return true;
} }
@ -2102,7 +2102,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask); cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags, ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
cc->classzone_idx); cc->highest_zoneidx);
/* Compaction is likely to fail */ /* Compaction is likely to fail */
if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
return ret; return ret;
@ -2293,7 +2293,7 @@ out:
static enum compact_result compact_zone_order(struct zone *zone, int order, static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum compact_priority prio, gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int classzone_idx, unsigned int alloc_flags, int highest_zoneidx,
struct page **capture) struct page **capture)
{ {
enum compact_result ret; enum compact_result ret;
@ -2305,7 +2305,7 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.mode = (prio == COMPACT_PRIO_ASYNC) ? .mode = (prio == COMPACT_PRIO_ASYNC) ?
MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
.alloc_flags = alloc_flags, .alloc_flags = alloc_flags,
.classzone_idx = classzone_idx, .highest_zoneidx = highest_zoneidx,
.direct_compaction = true, .direct_compaction = true,
.whole_zone = (prio == MIN_COMPACT_PRIORITY), .whole_zone = (prio == MIN_COMPACT_PRIORITY),
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
@ -2361,8 +2361,8 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
/* Compact each zone in the list */ /* Compact each zone in the list */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->nodemask) { ac->highest_zoneidx, ac->nodemask) {
enum compact_result status; enum compact_result status;
if (prio > MIN_COMPACT_PRIORITY if (prio > MIN_COMPACT_PRIORITY
@ -2372,7 +2372,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
} }
status = compact_zone_order(zone, order, gfp_mask, prio, status = compact_zone_order(zone, order, gfp_mask, prio,
alloc_flags, ac_classzone_idx(ac), capture); alloc_flags, ac->highest_zoneidx, capture);
rc = max(status, rc); rc = max(status, rc);
/* The allocation should succeed, stop compacting */ /* The allocation should succeed, stop compacting */
@ -2507,16 +2507,16 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
{ {
int zoneid; int zoneid;
struct zone *zone; struct zone *zone;
enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx; enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
for (zoneid = 0; zoneid <= classzone_idx; zoneid++) { for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
zone = &pgdat->node_zones[zoneid]; zone = &pgdat->node_zones[zoneid];
if (!populated_zone(zone)) if (!populated_zone(zone))
continue; continue;
if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0, if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
classzone_idx) == COMPACT_CONTINUE) highest_zoneidx) == COMPACT_CONTINUE)
return true; return true;
} }
@ -2534,16 +2534,16 @@ static void kcompactd_do_work(pg_data_t *pgdat)
struct compact_control cc = { struct compact_control cc = {
.order = pgdat->kcompactd_max_order, .order = pgdat->kcompactd_max_order,
.search_order = pgdat->kcompactd_max_order, .search_order = pgdat->kcompactd_max_order,
.classzone_idx = pgdat->kcompactd_classzone_idx, .highest_zoneidx = pgdat->kcompactd_highest_zoneidx,
.mode = MIGRATE_SYNC_LIGHT, .mode = MIGRATE_SYNC_LIGHT,
.ignore_skip_hint = false, .ignore_skip_hint = false,
.gfp_mask = GFP_KERNEL, .gfp_mask = GFP_KERNEL,
}; };
trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
cc.classzone_idx); cc.highest_zoneidx);
count_compact_event(KCOMPACTD_WAKE); count_compact_event(KCOMPACTD_WAKE);
for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) { for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) {
int status; int status;
zone = &pgdat->node_zones[zoneid]; zone = &pgdat->node_zones[zoneid];
@ -2592,16 +2592,16 @@ static void kcompactd_do_work(pg_data_t *pgdat)
/* /*
* Regardless of success, we are done until woken up next. But remember * Regardless of success, we are done until woken up next. But remember
* the requested order/classzone_idx in case it was higher/tighter than * the requested order/highest_zoneidx in case it was higher/tighter
* our current ones * than our current ones
*/ */
if (pgdat->kcompactd_max_order <= cc.order) if (pgdat->kcompactd_max_order <= cc.order)
pgdat->kcompactd_max_order = 0; pgdat->kcompactd_max_order = 0;
if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx) if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx)
pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
} }
void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
{ {
if (!order) if (!order)
return; return;
@ -2609,8 +2609,8 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
if (pgdat->kcompactd_max_order < order) if (pgdat->kcompactd_max_order < order)
pgdat->kcompactd_max_order = order; pgdat->kcompactd_max_order = order;
if (pgdat->kcompactd_classzone_idx > classzone_idx) if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx)
pgdat->kcompactd_classzone_idx = classzone_idx; pgdat->kcompactd_highest_zoneidx = highest_zoneidx;
/* /*
* Pairs with implicit barrier in wait_event_freezable() * Pairs with implicit barrier in wait_event_freezable()
@ -2623,7 +2623,7 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
return; return;
trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order, trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
classzone_idx); highest_zoneidx);
wake_up_interruptible(&pgdat->kcompactd_wait); wake_up_interruptible(&pgdat->kcompactd_wait);
} }
@ -2644,7 +2644,7 @@ static int kcompactd(void *p)
set_freezable(); set_freezable();
pgdat->kcompactd_max_order = 0; pgdat->kcompactd_max_order = 0;
pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
unsigned long pflags; unsigned long pflags;

View File

@ -127,10 +127,10 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
* between functions involved in allocations, including the alloc_pages* * between functions involved in allocations, including the alloc_pages*
* family of functions. * family of functions.
* *
* nodemask, migratetype and high_zoneidx are initialized only once in * nodemask, migratetype and highest_zoneidx are initialized only once in
* __alloc_pages_nodemask() and then never change. * __alloc_pages_nodemask() and then never change.
* *
* zonelist, preferred_zone and classzone_idx are set first in * zonelist, preferred_zone and highest_zoneidx are set first in
* __alloc_pages_nodemask() for the fast path, and might be later changed * __alloc_pages_nodemask() for the fast path, and might be later changed
* in __alloc_pages_slowpath(). All other functions pass the whole strucure * in __alloc_pages_slowpath(). All other functions pass the whole strucure
* by a const pointer. * by a const pointer.
@ -140,12 +140,21 @@ struct alloc_context {
nodemask_t *nodemask; nodemask_t *nodemask;
struct zoneref *preferred_zoneref; struct zoneref *preferred_zoneref;
int migratetype; int migratetype;
enum zone_type high_zoneidx;
/*
* highest_zoneidx represents highest usable zone index of
* the allocation request. Due to the nature of the zone,
* memory on lower zone than the highest_zoneidx will be
* protected by lowmem_reserve[highest_zoneidx].
*
* highest_zoneidx is also used by reclaim/compaction to limit
* the target zone since higher zone than this index cannot be
* usable for this allocation request.
*/
enum zone_type highest_zoneidx;
bool spread_dirty_pages; bool spread_dirty_pages;
}; };
#define ac_classzone_idx(ac) (ac->high_zoneidx)
/* /*
* Locate the struct page for both the matching buddy in our * Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page). * pair (buddy1) and the combined O(n+1) page they form (page).
@ -224,7 +233,7 @@ struct compact_control {
int order; /* order a direct compactor needs */ int order; /* order a direct compactor needs */
int migratetype; /* migratetype of direct compactor */ int migratetype; /* migratetype of direct compactor */
const unsigned int alloc_flags; /* alloc flags of a direct compactor */ const unsigned int alloc_flags; /* alloc flags of a direct compactor */
const int classzone_idx; /* zone index of a direct compactor */ const int highest_zoneidx; /* zone index of a direct compactor */
enum migrate_mode mode; /* Async or sync migration mode */ enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool ignore_skip_hint; /* Scan blocks even if marked skip */
bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool no_set_skip_hint; /* Don't mark blocks for skipping */

View File

@ -879,13 +879,13 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
} else { } else {
int cpu; int cpu;
/* /*
* Reset the nr_zones, order and classzone_idx before reuse. * Reset the nr_zones, order and highest_zoneidx before reuse.
* Note that kswapd will init kswapd_classzone_idx properly * Note that kswapd will init kswapd_highest_zoneidx properly
* when it starts in the near future. * when it starts in the near future.
*/ */
pgdat->nr_zones = 0; pgdat->nr_zones = 0;
pgdat->kswapd_order = 0; pgdat->kswapd_order = 0;
pgdat->kswapd_classzone_idx = 0; pgdat->kswapd_highest_zoneidx = 0;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
struct per_cpu_nodestat *p; struct per_cpu_nodestat *p;

View File

@ -254,7 +254,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
{ {
struct zone *zone; struct zone *zone;
struct zoneref *z; struct zoneref *z;
enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); enum zone_type highest_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false; bool cpuset_limited = false;
int nid; int nid;
@ -294,7 +294,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
/* Check this allocation failure is caused by cpuset's wall function */ /* Check this allocation failure is caused by cpuset's wall function */
for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
high_zoneidx, oc->nodemask) highest_zoneidx, oc->nodemask)
if (!cpuset_zone_allowed(zone, oc->gfp_mask)) if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true; cpuset_limited = true;

View File

@ -2593,7 +2593,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
int order; int order;
bool ret; bool ret;
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) { ac->nodemask) {
/* /*
* Preserve at least one pageblock unless memory pressure * Preserve at least one pageblock unless memory pressure
@ -3462,7 +3462,7 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
* to check in the allocation paths if no pages are free. * to check in the allocation paths if no pages are free.
*/ */
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, unsigned int alloc_flags, int highest_zoneidx, unsigned int alloc_flags,
long free_pages) long free_pages)
{ {
long min = mark; long min = mark;
@ -3507,7 +3507,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
* are not met, then a high-order request also cannot go ahead * are not met, then a high-order request also cannot go ahead
* even if a suitable page happened to be free. * even if a suitable page happened to be free.
*/ */
if (free_pages <= min + z->lowmem_reserve[classzone_idx]) if (free_pages <= min + z->lowmem_reserve[highest_zoneidx])
return false; return false;
/* If this is an order-0 request then the watermark is fine */ /* If this is an order-0 request then the watermark is fine */
@ -3540,14 +3540,15 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
} }
bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, unsigned int alloc_flags) int highest_zoneidx, unsigned int alloc_flags)
{ {
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
zone_page_state(z, NR_FREE_PAGES)); zone_page_state(z, NR_FREE_PAGES));
} }
static inline bool zone_watermark_fast(struct zone *z, unsigned int order, static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, unsigned int alloc_flags) unsigned long mark, int highest_zoneidx,
unsigned int alloc_flags)
{ {
long free_pages = zone_page_state(z, NR_FREE_PAGES); long free_pages = zone_page_state(z, NR_FREE_PAGES);
long cma_pages = 0; long cma_pages = 0;
@ -3565,22 +3566,23 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
* the caller is !atomic then it'll uselessly search the free * the caller is !atomic then it'll uselessly search the free
* list. That corner case is then slower but it is harmless. * list. That corner case is then slower but it is harmless.
*/ */
if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx]) if (!order && (free_pages - cma_pages) >
mark + z->lowmem_reserve[highest_zoneidx])
return true; return true;
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
free_pages); free_pages);
} }
bool zone_watermark_ok_safe(struct zone *z, unsigned int order, bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx) unsigned long mark, int highest_zoneidx)
{ {
long free_pages = zone_page_state(z, NR_FREE_PAGES); long free_pages = zone_page_state(z, NR_FREE_PAGES);
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
return __zone_watermark_ok(z, order, mark, classzone_idx, 0, return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
free_pages); free_pages);
} }
@ -3657,8 +3659,8 @@ retry:
*/ */
no_fallback = alloc_flags & ALLOC_NOFRAGMENT; no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
z = ac->preferred_zoneref; z = ac->preferred_zoneref;
for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, for_next_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->nodemask) { ac->highest_zoneidx, ac->nodemask) {
struct page *page; struct page *page;
unsigned long mark; unsigned long mark;
@ -3713,7 +3715,7 @@ retry:
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
if (!zone_watermark_fast(zone, order, mark, if (!zone_watermark_fast(zone, order, mark,
ac_classzone_idx(ac), alloc_flags)) { ac->highest_zoneidx, alloc_flags)) {
int ret; int ret;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@ -3746,7 +3748,7 @@ retry:
default: default:
/* did we reclaim enough */ /* did we reclaim enough */
if (zone_watermark_ok(zone, order, mark, if (zone_watermark_ok(zone, order, mark,
ac_classzone_idx(ac), alloc_flags)) ac->highest_zoneidx, alloc_flags))
goto try_this_zone; goto try_this_zone;
continue; continue;
@ -3905,7 +3907,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (gfp_mask & __GFP_RETRY_MAYFAIL) if (gfp_mask & __GFP_RETRY_MAYFAIL)
goto out; goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */ /* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->high_zoneidx < ZONE_NORMAL) if (ac->highest_zoneidx < ZONE_NORMAL)
goto out; goto out;
if (pm_suspended_storage()) if (pm_suspended_storage())
goto out; goto out;
@ -4108,10 +4110,10 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
* Let's give them a good hope and keep retrying while the order-0 * Let's give them a good hope and keep retrying while the order-0
* watermarks are OK. * watermarks are OK.
*/ */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->nodemask) { ac->highest_zoneidx, ac->nodemask) {
if (zone_watermark_ok(zone, 0, min_wmark_pages(zone), if (zone_watermark_ok(zone, 0, min_wmark_pages(zone),
ac_classzone_idx(ac), alloc_flags)) ac->highest_zoneidx, alloc_flags))
return true; return true;
} }
return false; return false;
@ -4235,12 +4237,12 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
struct zoneref *z; struct zoneref *z;
struct zone *zone; struct zone *zone;
pg_data_t *last_pgdat = NULL; pg_data_t *last_pgdat = NULL;
enum zone_type high_zoneidx = ac->high_zoneidx; enum zone_type highest_zoneidx = ac->highest_zoneidx;
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
ac->nodemask) { ac->nodemask) {
if (last_pgdat != zone->zone_pgdat) if (last_pgdat != zone->zone_pgdat)
wakeup_kswapd(zone, gfp_mask, order, high_zoneidx); wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx);
last_pgdat = zone->zone_pgdat; last_pgdat = zone->zone_pgdat;
} }
} }
@ -4375,8 +4377,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* request even if all reclaimable pages are considered then we are * request even if all reclaimable pages are considered then we are
* screwed and have to go OOM. * screwed and have to go OOM.
*/ */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->nodemask) { ac->highest_zoneidx, ac->nodemask) {
unsigned long available; unsigned long available;
unsigned long reclaimable; unsigned long reclaimable;
unsigned long min_wmark = min_wmark_pages(zone); unsigned long min_wmark = min_wmark_pages(zone);
@ -4390,7 +4392,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* reclaimable pages? * reclaimable pages?
*/ */
wmark = __zone_watermark_ok(zone, order, min_wmark, wmark = __zone_watermark_ok(zone, order, min_wmark,
ac_classzone_idx(ac), alloc_flags, available); ac->highest_zoneidx, alloc_flags, available);
trace_reclaim_retry_zone(z, order, reclaimable, trace_reclaim_retry_zone(z, order, reclaimable,
available, min_wmark, *no_progress_loops, wmark); available, min_wmark, *no_progress_loops, wmark);
if (wmark) { if (wmark) {
@ -4509,7 +4511,7 @@ retry_cpuset:
* could end up iterating over non-eligible zones endlessly. * could end up iterating over non-eligible zones endlessly.
*/ */
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask); ac->highest_zoneidx, ac->nodemask);
if (!ac->preferred_zoneref->zone) if (!ac->preferred_zoneref->zone)
goto nopage; goto nopage;
@ -4596,7 +4598,7 @@ retry:
if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
ac->nodemask = NULL; ac->nodemask = NULL;
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask); ac->highest_zoneidx, ac->nodemask);
} }
/* Attempt with potentially adjusted zonelist and alloc_flags */ /* Attempt with potentially adjusted zonelist and alloc_flags */
@ -4730,7 +4732,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac, gfp_t *alloc_mask, struct alloc_context *ac, gfp_t *alloc_mask,
unsigned int *alloc_flags) unsigned int *alloc_flags)
{ {
ac->high_zoneidx = gfp_zone(gfp_mask); ac->highest_zoneidx = gfp_zone(gfp_mask);
ac->zonelist = node_zonelist(preferred_nid, gfp_mask); ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
ac->nodemask = nodemask; ac->nodemask = nodemask;
ac->migratetype = gfpflags_to_migratetype(gfp_mask); ac->migratetype = gfpflags_to_migratetype(gfp_mask);
@ -4769,7 +4771,7 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
* may get reset for allocations that ignore memory policies. * may get reset for allocations that ignore memory policies.
*/ */
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask); ac->highest_zoneidx, ac->nodemask);
} }
/* /*
@ -6867,7 +6869,7 @@ static void __init free_area_init_node(int nid)
unsigned long end_pfn = 0; unsigned long end_pfn = 0;
/* pg_data_t should be reset to zero when it's allocated */ /* pg_data_t should be reset to zero when it's allocated */
WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx); WARN_ON(pgdat->nr_zones || pgdat->kswapd_highest_zoneidx);
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);

View File

@ -3106,7 +3106,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zonelist *zonelist; struct zonelist *zonelist;
struct zoneref *z; struct zoneref *z;
struct zone *zone; struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags); enum zone_type highest_zoneidx = gfp_zone(flags);
void *obj = NULL; void *obj = NULL;
struct page *page; struct page *page;
int nid; int nid;
@ -3124,7 +3124,7 @@ retry:
* Look through allowed nodes for objects available * Look through allowed nodes for objects available
* from existing per node queues. * from existing per node queues.
*/ */
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
nid = zone_to_nid(zone); nid = zone_to_nid(zone);
if (cpuset_zone_allowed(zone, flags) && if (cpuset_zone_allowed(zone, flags) &&

View File

@ -1938,7 +1938,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
struct zonelist *zonelist; struct zonelist *zonelist;
struct zoneref *z; struct zoneref *z;
struct zone *zone; struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags); enum zone_type highest_zoneidx = gfp_zone(flags);
void *object; void *object;
unsigned int cpuset_mems_cookie; unsigned int cpuset_mems_cookie;
@ -1967,7 +1967,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
do { do {
cpuset_mems_cookie = read_mems_allowed_begin(); cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags); zonelist = node_zonelist(mempolicy_slab_node(), flags);
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n; struct kmem_cache_node *n;
n = get_node(s, zone_to_nid(zone)); n = get_node(s, zone_to_nid(zone));

View File

@ -3131,8 +3131,8 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
/* kswapd must be awake if processes are being throttled */ /* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
if (READ_ONCE(pgdat->kswapd_classzone_idx) > ZONE_NORMAL) if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
WRITE_ONCE(pgdat->kswapd_classzone_idx, ZONE_NORMAL); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL);
wake_up_interruptible(&pgdat->kswapd_wait); wake_up_interruptible(&pgdat->kswapd_wait);
} }
@ -3385,7 +3385,7 @@ static void age_active_anon(struct pglist_data *pgdat,
} while (memcg); } while (memcg);
} }
static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx) static bool pgdat_watermark_boosted(pg_data_t *pgdat, int highest_zoneidx)
{ {
int i; int i;
struct zone *zone; struct zone *zone;
@ -3397,7 +3397,7 @@ static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx)
* start prematurely when there is no boosting and a lower * start prematurely when there is no boosting and a lower
* zone is balanced. * zone is balanced.
*/ */
for (i = classzone_idx; i >= 0; i--) { for (i = highest_zoneidx; i >= 0; i--) {
zone = pgdat->node_zones + i; zone = pgdat->node_zones + i;
if (!managed_zone(zone)) if (!managed_zone(zone))
continue; continue;
@ -3411,9 +3411,9 @@ static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx)
/* /*
* Returns true if there is an eligible zone balanced for the request order * Returns true if there is an eligible zone balanced for the request order
* and classzone_idx * and highest_zoneidx
*/ */
static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
{ {
int i; int i;
unsigned long mark = -1; unsigned long mark = -1;
@ -3423,19 +3423,19 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
* Check watermarks bottom-up as lower zones are more likely to * Check watermarks bottom-up as lower zones are more likely to
* meet watermarks. * meet watermarks.
*/ */
for (i = 0; i <= classzone_idx; i++) { for (i = 0; i <= highest_zoneidx; i++) {
zone = pgdat->node_zones + i; zone = pgdat->node_zones + i;
if (!managed_zone(zone)) if (!managed_zone(zone))
continue; continue;
mark = high_wmark_pages(zone); mark = high_wmark_pages(zone);
if (zone_watermark_ok_safe(zone, order, mark, classzone_idx)) if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
return true; return true;
} }
/* /*
* If a node has no populated zone within classzone_idx, it does not * If a node has no populated zone within highest_zoneidx, it does not
* need balancing by definition. This can happen if a zone-restricted * need balancing by definition. This can happen if a zone-restricted
* allocation tries to wake a remote kswapd. * allocation tries to wake a remote kswapd.
*/ */
@ -3461,7 +3461,8 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
* *
* Returns true if kswapd is ready to sleep * Returns true if kswapd is ready to sleep
*/ */
static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order,
int highest_zoneidx)
{ {
/* /*
* The throttled processes are normally woken up in balance_pgdat() as * The throttled processes are normally woken up in balance_pgdat() as
@ -3483,7 +3484,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
return true; return true;
if (pgdat_balanced(pgdat, order, classzone_idx)) { if (pgdat_balanced(pgdat, order, highest_zoneidx)) {
clear_pgdat_congested(pgdat); clear_pgdat_congested(pgdat);
return true; return true;
} }
@ -3547,7 +3548,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
* or lower is eligible for reclaim until at least one usable zone is * or lower is eligible for reclaim until at least one usable zone is
* balanced. * balanced.
*/ */
static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
{ {
int i; int i;
unsigned long nr_soft_reclaimed; unsigned long nr_soft_reclaimed;
@ -3575,7 +3576,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
* stall or direct reclaim until kswapd is finished. * stall or direct reclaim until kswapd is finished.
*/ */
nr_boost_reclaim = 0; nr_boost_reclaim = 0;
for (i = 0; i <= classzone_idx; i++) { for (i = 0; i <= highest_zoneidx; i++) {
zone = pgdat->node_zones + i; zone = pgdat->node_zones + i;
if (!managed_zone(zone)) if (!managed_zone(zone))
continue; continue;
@ -3593,7 +3594,7 @@ restart:
bool balanced; bool balanced;
bool ret; bool ret;
sc.reclaim_idx = classzone_idx; sc.reclaim_idx = highest_zoneidx;
/* /*
* If the number of buffer_heads exceeds the maximum allowed * If the number of buffer_heads exceeds the maximum allowed
@ -3623,7 +3624,7 @@ restart:
* on the grounds that the normal reclaim should be enough to * on the grounds that the normal reclaim should be enough to
* re-evaluate if boosting is required when kswapd next wakes. * re-evaluate if boosting is required when kswapd next wakes.
*/ */
balanced = pgdat_balanced(pgdat, sc.order, classzone_idx); balanced = pgdat_balanced(pgdat, sc.order, highest_zoneidx);
if (!balanced && nr_boost_reclaim) { if (!balanced && nr_boost_reclaim) {
nr_boost_reclaim = 0; nr_boost_reclaim = 0;
goto restart; goto restart;
@ -3723,7 +3724,7 @@ out:
if (boosted) { if (boosted) {
unsigned long flags; unsigned long flags;
for (i = 0; i <= classzone_idx; i++) { for (i = 0; i <= highest_zoneidx; i++) {
if (!zone_boosts[i]) if (!zone_boosts[i])
continue; continue;
@ -3738,7 +3739,7 @@ out:
* As there is now likely space, wakeup kcompact to defragment * As there is now likely space, wakeup kcompact to defragment
* pageblocks. * pageblocks.
*/ */
wakeup_kcompactd(pgdat, pageblock_order, classzone_idx); wakeup_kcompactd(pgdat, pageblock_order, highest_zoneidx);
} }
snapshot_refaults(NULL, pgdat); snapshot_refaults(NULL, pgdat);
@ -3756,22 +3757,22 @@ out:
} }
/* /*
* The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
* reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not * be reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is
* a valid index then either kswapd runs for first time or kswapd couldn't sleep * not a valid index then either kswapd runs for first time or kswapd couldn't
* after previous reclaim attempt (node is still unbalanced). In that case * sleep after previous reclaim attempt (node is still unbalanced). In that
* return the zone index of the previous kswapd reclaim cycle. * case return the zone index of the previous kswapd reclaim cycle.
*/ */
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat, static enum zone_type kswapd_highest_zoneidx(pg_data_t *pgdat,
enum zone_type prev_classzone_idx) enum zone_type prev_highest_zoneidx)
{ {
enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx); enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
return curr_idx == MAX_NR_ZONES ? prev_classzone_idx : curr_idx; return curr_idx == MAX_NR_ZONES ? prev_highest_zoneidx : curr_idx;
} }
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
unsigned int classzone_idx) unsigned int highest_zoneidx)
{ {
long remaining = 0; long remaining = 0;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
@ -3788,7 +3789,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
* eligible zone balanced that it's also unlikely that compaction will * eligible zone balanced that it's also unlikely that compaction will
* succeed. * succeed.
*/ */
if (prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) { if (prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) {
/* /*
* Compaction records what page blocks it recently failed to * Compaction records what page blocks it recently failed to
* isolate pages from and skips them in the future scanning. * isolate pages from and skips them in the future scanning.
@ -3801,18 +3802,19 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
* We have freed the memory, now we should compact it to make * We have freed the memory, now we should compact it to make
* allocation of the requested order possible. * allocation of the requested order possible.
*/ */
wakeup_kcompactd(pgdat, alloc_order, classzone_idx); wakeup_kcompactd(pgdat, alloc_order, highest_zoneidx);
remaining = schedule_timeout(HZ/10); remaining = schedule_timeout(HZ/10);
/* /*
* If woken prematurely then reset kswapd_classzone_idx and * If woken prematurely then reset kswapd_highest_zoneidx and
* order. The values will either be from a wakeup request or * order. The values will either be from a wakeup request or
* the previous request that slept prematurely. * the previous request that slept prematurely.
*/ */
if (remaining) { if (remaining) {
WRITE_ONCE(pgdat->kswapd_classzone_idx, WRITE_ONCE(pgdat->kswapd_highest_zoneidx,
kswapd_classzone_idx(pgdat, classzone_idx)); kswapd_highest_zoneidx(pgdat,
highest_zoneidx));
if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
WRITE_ONCE(pgdat->kswapd_order, reclaim_order); WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
@ -3827,7 +3829,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
* go fully to sleep until explicitly woken up. * go fully to sleep until explicitly woken up.
*/ */
if (!remaining && if (!remaining &&
prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) { prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id); trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
/* /*
@ -3869,7 +3871,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
static int kswapd(void *p) static int kswapd(void *p)
{ {
unsigned int alloc_order, reclaim_order; unsigned int alloc_order, reclaim_order;
unsigned int classzone_idx = MAX_NR_ZONES - 1; unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
pg_data_t *pgdat = (pg_data_t*)p; pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current; struct task_struct *tsk = current;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
@ -3893,22 +3895,24 @@ static int kswapd(void *p)
set_freezable(); set_freezable();
WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
for ( ; ; ) { for ( ; ; ) {
bool ret; bool ret;
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx); highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);
kswapd_try_sleep: kswapd_try_sleep:
kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order, kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
classzone_idx); highest_zoneidx);
/* Read the new order and classzone_idx */ /* Read the new order and highest_zoneidx */
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx); highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);
WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
ret = try_to_freeze(); ret = try_to_freeze();
if (kthread_should_stop()) if (kthread_should_stop())
@ -3929,9 +3933,10 @@ kswapd_try_sleep:
* but kcompactd is woken to compact for the original * but kcompactd is woken to compact for the original
* request (alloc_order). * request (alloc_order).
*/ */
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx,
alloc_order); alloc_order);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); reclaim_order = balance_pgdat(pgdat, alloc_order,
highest_zoneidx);
if (reclaim_order < alloc_order) if (reclaim_order < alloc_order)
goto kswapd_try_sleep; goto kswapd_try_sleep;
} }
@ -3949,7 +3954,7 @@ kswapd_try_sleep:
* needed. * needed.
*/ */
void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
enum zone_type classzone_idx) enum zone_type highest_zoneidx)
{ {
pg_data_t *pgdat; pg_data_t *pgdat;
enum zone_type curr_idx; enum zone_type curr_idx;
@ -3961,10 +3966,10 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
return; return;
pgdat = zone->zone_pgdat; pgdat = zone->zone_pgdat;
curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx); curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
if (curr_idx == MAX_NR_ZONES || curr_idx < classzone_idx) if (curr_idx == MAX_NR_ZONES || curr_idx < highest_zoneidx)
WRITE_ONCE(pgdat->kswapd_classzone_idx, classzone_idx); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx);
if (READ_ONCE(pgdat->kswapd_order) < order) if (READ_ONCE(pgdat->kswapd_order) < order)
WRITE_ONCE(pgdat->kswapd_order, order); WRITE_ONCE(pgdat->kswapd_order, order);
@ -3974,8 +3979,8 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
/* Hopeless node, leave it to direct reclaim if possible */ /* Hopeless node, leave it to direct reclaim if possible */
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ||
(pgdat_balanced(pgdat, order, classzone_idx) && (pgdat_balanced(pgdat, order, highest_zoneidx) &&
!pgdat_watermark_boosted(pgdat, classzone_idx))) { !pgdat_watermark_boosted(pgdat, highest_zoneidx))) {
/* /*
* There may be plenty of free memory available, but it's too * There may be plenty of free memory available, but it's too
* fragmented for high-order allocations. Wake up kcompactd * fragmented for high-order allocations. Wake up kcompactd
@ -3984,11 +3989,11 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
* ratelimit its work. * ratelimit its work.
*/ */
if (!(gfp_flags & __GFP_DIRECT_RECLAIM)) if (!(gfp_flags & __GFP_DIRECT_RECLAIM))
wakeup_kcompactd(pgdat, order, classzone_idx); wakeup_kcompactd(pgdat, order, highest_zoneidx);
return; return;
} }
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order, trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order,
gfp_flags); gfp_flags);
wake_up_interruptible(&pgdat->kswapd_wait); wake_up_interruptible(&pgdat->kswapd_wait);
} }