revert "memcg, vmscan: integrate soft reclaim tighter with zone shrinking code"
Revert commit 3b38722efd
("memcg, vmscan: integrate soft reclaim
tighter with zone shrinking code")
I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
bb4cc1a8b5
commit
0608f43da6
|
@ -234,7 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||||
mem_cgroup_update_page_stat(page, idx, -1);
|
mem_cgroup_update_page_stat(page, idx, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg);
|
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||||
|
gfp_t gfp_mask,
|
||||||
|
unsigned long *total_scanned);
|
||||||
|
|
||||||
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
|
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
|
||||||
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
|
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
|
||||||
|
@ -434,9 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
|
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||||
|
gfp_t gfp_mask,
|
||||||
|
unsigned long *total_scanned)
|
||||||
{
|
{
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
||||||
|
|
157
mm/memcontrol.c
157
mm/memcontrol.c
|
@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||||
* A group is eligible for the soft limit reclaim if
|
struct zone *zone,
|
||||||
* a) it is over its soft limit
|
gfp_t gfp_mask,
|
||||||
* b) any parent up the hierarchy is over its soft limit
|
unsigned long *total_scanned)
|
||||||
*/
|
|
||||||
bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
|
|
||||||
{
|
{
|
||||||
struct mem_cgroup *parent = memcg;
|
struct mem_cgroup *victim = NULL;
|
||||||
|
int total = 0;
|
||||||
|
int loop = 0;
|
||||||
|
unsigned long excess;
|
||||||
|
unsigned long nr_scanned;
|
||||||
|
struct mem_cgroup_reclaim_cookie reclaim = {
|
||||||
|
.zone = zone,
|
||||||
|
.priority = 0,
|
||||||
|
};
|
||||||
|
|
||||||
if (res_counter_soft_limit_excess(&memcg->res))
|
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
||||||
return true;
|
|
||||||
|
|
||||||
/*
|
while (1) {
|
||||||
* If any parent up the hierarchy is over its soft limit then we
|
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
||||||
* have to obey and reclaim from this group as well.
|
if (!victim) {
|
||||||
*/
|
loop++;
|
||||||
while ((parent = parent_mem_cgroup(parent))) {
|
if (loop >= 2) {
|
||||||
if (res_counter_soft_limit_excess(&parent->res))
|
/*
|
||||||
return true;
|
* If we have not been able to reclaim
|
||||||
|
* anything, it might because there are
|
||||||
|
* no reclaimable pages under this hierarchy
|
||||||
|
*/
|
||||||
|
if (!total)
|
||||||
|
break;
|
||||||
|
/*
|
||||||
|
* We want to do more targeted reclaim.
|
||||||
|
* excess >> 2 is not to excessive so as to
|
||||||
|
* reclaim too much, nor too less that we keep
|
||||||
|
* coming back to reclaim from this cgroup
|
||||||
|
*/
|
||||||
|
if (total >= (excess >> 2) ||
|
||||||
|
(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!mem_cgroup_reclaimable(victim, false))
|
||||||
|
continue;
|
||||||
|
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
||||||
|
zone, &nr_scanned);
|
||||||
|
*total_scanned += nr_scanned;
|
||||||
|
if (!res_counter_soft_limit_excess(&root_memcg->res))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
mem_cgroup_iter_break(root_memcg, victim);
|
||||||
return false;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(memcg_oom_lock);
|
static DEFINE_SPINLOCK(memcg_oom_lock);
|
||||||
|
@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||||
|
gfp_t gfp_mask,
|
||||||
|
unsigned long *total_scanned)
|
||||||
|
{
|
||||||
|
unsigned long nr_reclaimed = 0;
|
||||||
|
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
|
||||||
|
unsigned long reclaimed;
|
||||||
|
int loop = 0;
|
||||||
|
struct mem_cgroup_tree_per_zone *mctz;
|
||||||
|
unsigned long long excess;
|
||||||
|
unsigned long nr_scanned;
|
||||||
|
|
||||||
|
if (order > 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
|
||||||
|
/*
|
||||||
|
* This loop can run a while, specially if mem_cgroup's continuously
|
||||||
|
* keep exceeding their soft limit and putting the system under
|
||||||
|
* pressure
|
||||||
|
*/
|
||||||
|
do {
|
||||||
|
if (next_mz)
|
||||||
|
mz = next_mz;
|
||||||
|
else
|
||||||
|
mz = mem_cgroup_largest_soft_limit_node(mctz);
|
||||||
|
if (!mz)
|
||||||
|
break;
|
||||||
|
|
||||||
|
nr_scanned = 0;
|
||||||
|
reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
|
||||||
|
gfp_mask, &nr_scanned);
|
||||||
|
nr_reclaimed += reclaimed;
|
||||||
|
*total_scanned += nr_scanned;
|
||||||
|
spin_lock(&mctz->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we failed to reclaim anything from this memory cgroup
|
||||||
|
* it is time to move on to the next cgroup
|
||||||
|
*/
|
||||||
|
next_mz = NULL;
|
||||||
|
if (!reclaimed) {
|
||||||
|
do {
|
||||||
|
/*
|
||||||
|
* Loop until we find yet another one.
|
||||||
|
*
|
||||||
|
* By the time we get the soft_limit lock
|
||||||
|
* again, someone might have aded the
|
||||||
|
* group back on the RB tree. Iterate to
|
||||||
|
* make sure we get a different mem.
|
||||||
|
* mem_cgroup_largest_soft_limit_node returns
|
||||||
|
* NULL if no other cgroup is present on
|
||||||
|
* the tree
|
||||||
|
*/
|
||||||
|
next_mz =
|
||||||
|
__mem_cgroup_largest_soft_limit_node(mctz);
|
||||||
|
if (next_mz == mz)
|
||||||
|
css_put(&next_mz->memcg->css);
|
||||||
|
else /* next_mz == NULL or other memcg */
|
||||||
|
break;
|
||||||
|
} while (1);
|
||||||
|
}
|
||||||
|
__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
|
||||||
|
excess = res_counter_soft_limit_excess(&mz->memcg->res);
|
||||||
|
/*
|
||||||
|
* One school of thought says that we should not add
|
||||||
|
* back the node to the tree if reclaim returns 0.
|
||||||
|
* But our reclaim could return 0, simply because due
|
||||||
|
* to priority we are exposing a smaller subset of
|
||||||
|
* memory to reclaim from. Consider this as a longer
|
||||||
|
* term TODO.
|
||||||
|
*/
|
||||||
|
/* If excess == 0, no tree ops */
|
||||||
|
__mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
|
||||||
|
spin_unlock(&mctz->lock);
|
||||||
|
css_put(&mz->memcg->css);
|
||||||
|
loop++;
|
||||||
|
/*
|
||||||
|
* Could not reclaim anything and there are no more
|
||||||
|
* mem cgroups to try or we seem to be looping without
|
||||||
|
* reclaiming anything.
|
||||||
|
*/
|
||||||
|
if (!nr_reclaimed &&
|
||||||
|
(next_mz == NULL ||
|
||||||
|
loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
|
||||||
|
break;
|
||||||
|
} while (!nr_reclaimed);
|
||||||
|
if (next_mz)
|
||||||
|
css_put(&next_mz->memcg->css);
|
||||||
|
return nr_reclaimed;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mem_cgroup_force_empty_list - clears LRU of a group
|
* mem_cgroup_force_empty_list - clears LRU of a group
|
||||||
* @memcg: group to clear
|
* @memcg: group to clear
|
||||||
|
|
62
mm/vmscan.c
62
mm/vmscan.c
|
@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
|
||||||
{
|
{
|
||||||
return !sc->target_mem_cgroup;
|
return !sc->target_mem_cgroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
|
||||||
{
|
|
||||||
return !mem_cgroup_disabled() && global_reclaim(sc);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static bool global_reclaim(struct scan_control *sc)
|
static bool global_reclaim(struct scan_control *sc)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned long zone_reclaimable_pages(struct zone *zone)
|
unsigned long zone_reclaimable_pages(struct zone *zone)
|
||||||
|
@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||||
__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
|
||||||
{
|
{
|
||||||
unsigned long nr_reclaimed, nr_scanned;
|
unsigned long nr_reclaimed, nr_scanned;
|
||||||
|
|
||||||
|
@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
||||||
do {
|
do {
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
|
|
||||||
if (soft_reclaim &&
|
|
||||||
!mem_cgroup_soft_reclaim_eligible(memcg)) {
|
|
||||||
memcg = mem_cgroup_iter(root, memcg, &reclaim);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||||
|
|
||||||
shrink_lruvec(lruvec, sc);
|
shrink_lruvec(lruvec, sc);
|
||||||
|
@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
||||||
sc->nr_scanned - nr_scanned, sc));
|
sc->nr_scanned - nr_scanned, sc));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
|
||||||
{
|
|
||||||
bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
|
|
||||||
unsigned long nr_scanned = sc->nr_scanned;
|
|
||||||
|
|
||||||
__shrink_zone(zone, sc, do_soft_reclaim);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* No group is over the soft limit or those that are do not have
|
|
||||||
* pages in the zone we are reclaiming so we have to reclaim everybody
|
|
||||||
*/
|
|
||||||
if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
|
|
||||||
__shrink_zone(zone, sc, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Returns true if compaction should go ahead for a high-order request */
|
/* Returns true if compaction should go ahead for a high-order request */
|
||||||
static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
|
static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
|
@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
struct zoneref *z;
|
struct zoneref *z;
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
|
unsigned long nr_soft_reclaimed;
|
||||||
|
unsigned long nr_soft_scanned;
|
||||||
bool aborted_reclaim = false;
|
bool aborted_reclaim = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* This steals pages from memory cgroups over softlimit
|
||||||
|
* and returns the number of reclaimed pages and
|
||||||
|
* scanned pages. This works for global memory pressure
|
||||||
|
* and balancing, not for a memcg's limit.
|
||||||
|
*/
|
||||||
|
nr_soft_scanned = 0;
|
||||||
|
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||||
|
sc->order, sc->gfp_mask,
|
||||||
|
&nr_soft_scanned);
|
||||||
|
sc->nr_reclaimed += nr_soft_reclaimed;
|
||||||
|
sc->nr_scanned += nr_soft_scanned;
|
||||||
/* need some check for avoid more shrink_zone() */
|
/* need some check for avoid more shrink_zone() */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
||||||
|
unsigned long nr_soft_reclaimed;
|
||||||
|
unsigned long nr_soft_scanned;
|
||||||
struct scan_control sc = {
|
struct scan_control sc = {
|
||||||
.gfp_mask = GFP_KERNEL,
|
.gfp_mask = GFP_KERNEL,
|
||||||
.priority = DEF_PRIORITY,
|
.priority = DEF_PRIORITY,
|
||||||
|
@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||||
|
|
||||||
sc.nr_scanned = 0;
|
sc.nr_scanned = 0;
|
||||||
|
|
||||||
|
nr_soft_scanned = 0;
|
||||||
|
/*
|
||||||
|
* Call soft limit reclaim before calling shrink_zone.
|
||||||
|
*/
|
||||||
|
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||||
|
order, sc.gfp_mask,
|
||||||
|
&nr_soft_scanned);
|
||||||
|
sc.nr_reclaimed += nr_soft_reclaimed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There should be no need to raise the scanning
|
* There should be no need to raise the scanning
|
||||||
* priority if enough pages are already being scanned
|
* priority if enough pages are already being scanned
|
||||||
|
|
Loading…
Reference in New Issue