mm: remove zone_lru_lock() function, access ->lru_lock directly
We have common pattern to access lru_lock from a page pointer: zone_lru_lock(page_zone(page)) Which is silly, because it unfolds to this: &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]->zone_pgdat->lru_lock while we can simply do &NODE_DATA(page_to_nid(page))->lru_lock Remove zone_lru_lock() function, since it's only complicate things. Use 'page_pgdat(page)->lru_lock' pattern instead. [aryabinin@virtuozzo.com: a slightly better version of __split_huge_page()] Link: http://lkml.kernel.org/r/20190301121651.7741-1-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/20190228083329.31892-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Mel Gorman <mgorman@techsingularity.net> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: William Kucharski <william.kucharski@oracle.com> Cc: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
a7ca12f9d9
commit
f4b7e272b5
|
@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
|||
|
||||
8. LRU
|
||||
Each memcg has its own private LRU. Now, its handling is under global
|
||||
VM's control (means that it's handled under global zone_lru_lock).
|
||||
VM's control (means that it's handled under global pgdat->lru_lock).
|
||||
Almost all routines around memcg's LRU is called by global LRU's
|
||||
list management functions under zone_lru_lock().
|
||||
list management functions under pgdat->lru_lock.
|
||||
|
||||
A special function is mem_cgroup_isolate_pages(). This scans
|
||||
memcg's private LRU and call __isolate_lru_page() to extract a page
|
||||
|
|
|
@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
|
|||
Other lock order is following:
|
||||
PG_locked.
|
||||
mm->page_table_lock
|
||||
zone_lru_lock
|
||||
pgdat->lru_lock
|
||||
lock_page_cgroup.
|
||||
In many cases, just lock_page_cgroup() is called.
|
||||
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
|
||||
zone_lru_lock, it has no lock of its own.
|
||||
pgdat->lru_lock, it has no lock of its own.
|
||||
|
||||
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ struct page {
|
|||
struct { /* Page cache and anonymous pages */
|
||||
/**
|
||||
* @lru: Pageout list, eg. active_list protected by
|
||||
* zone_lru_lock. Sometimes used as a generic list
|
||||
* pgdat->lru_lock. Sometimes used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
struct list_head lru;
|
||||
|
|
|
@ -730,10 +730,6 @@ typedef struct pglist_data {
|
|||
|
||||
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
|
||||
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
|
||||
static inline spinlock_t *zone_lru_lock(struct zone *zone)
|
||||
{
|
||||
return &zone->zone_pgdat->lru_lock;
|
||||
}
|
||||
|
||||
static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
|
||||
{
|
||||
|
|
|
@ -775,6 +775,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
unsigned long end_pfn, isolate_mode_t isolate_mode)
|
||||
{
|
||||
struct zone *zone = cc->zone;
|
||||
pg_data_t *pgdat = zone->zone_pgdat;
|
||||
unsigned long nr_scanned = 0, nr_isolated = 0;
|
||||
struct lruvec *lruvec;
|
||||
unsigned long flags = 0;
|
||||
|
@ -839,8 +840,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
* if contended.
|
||||
*/
|
||||
if (!(low_pfn % SWAP_CLUSTER_MAX)
|
||||
&& compact_unlock_should_abort(zone_lru_lock(zone), flags,
|
||||
&locked, cc))
|
||||
&& compact_unlock_should_abort(&pgdat->lru_lock,
|
||||
flags, &locked, cc))
|
||||
break;
|
||||
|
||||
if (!pfn_valid_within(low_pfn))
|
||||
|
@ -910,7 +911,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
if (unlikely(__PageMovable(page)) &&
|
||||
!PageIsolated(page)) {
|
||||
if (locked) {
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone),
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock,
|
||||
flags);
|
||||
locked = false;
|
||||
}
|
||||
|
@ -940,7 +941,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
|
||||
/* If we already hold the lock, we can skip some rechecking */
|
||||
if (!locked) {
|
||||
locked = compact_lock_irqsave(zone_lru_lock(zone),
|
||||
locked = compact_lock_irqsave(&pgdat->lru_lock,
|
||||
&flags, cc);
|
||||
|
||||
/* Try get exclusive access under lock */
|
||||
|
@ -965,7 +966,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
}
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
|
||||
/* Try isolate the page */
|
||||
if (__isolate_lru_page(page, isolate_mode) != 0)
|
||||
|
@ -1007,7 +1008,7 @@ isolate_fail:
|
|||
*/
|
||||
if (nr_isolated) {
|
||||
if (locked) {
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
locked = false;
|
||||
}
|
||||
putback_movable_pages(&cc->migratepages);
|
||||
|
@ -1034,7 +1035,7 @@ isolate_fail:
|
|||
|
||||
isolate_abort:
|
||||
if (locked)
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
|
||||
/*
|
||||
* Updated the cached scanner pfn once the pageblock has been scanned
|
||||
|
|
|
@ -98,8 +98,8 @@
|
|||
* ->swap_lock (try_to_unmap_one)
|
||||
* ->private_lock (try_to_unmap_one)
|
||||
* ->i_pages lock (try_to_unmap_one)
|
||||
* ->zone_lru_lock(zone) (follow_page->mark_page_accessed)
|
||||
* ->zone_lru_lock(zone) (check_pte_range->isolate_lru_page)
|
||||
* ->pgdat->lru_lock (follow_page->mark_page_accessed)
|
||||
* ->pgdat->lru_lock (check_pte_range->isolate_lru_page)
|
||||
* ->private_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->i_pages lock (page_remove_rmap->set_page_dirty)
|
||||
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
|
||||
|
|
|
@ -2440,11 +2440,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||
pgoff_t end, unsigned long flags)
|
||||
{
|
||||
struct page *head = compound_head(page);
|
||||
struct zone *zone = page_zone(head);
|
||||
pg_data_t *pgdat = page_pgdat(head);
|
||||
struct lruvec *lruvec;
|
||||
int i;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
|
||||
lruvec = mem_cgroup_page_lruvec(head, pgdat);
|
||||
|
||||
/* complete memcg works before add pages to LRU */
|
||||
mem_cgroup_split_huge_fixup(head);
|
||||
|
@ -2475,7 +2475,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||
xa_unlock(&head->mapping->i_pages);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
|
||||
remap_page(head);
|
||||
|
||||
|
@ -2686,7 +2686,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
lru_add_drain();
|
||||
|
||||
/* prevent PageLRU to go away from under us, and freeze lru stats */
|
||||
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
|
||||
spin_lock_irqsave(&pgdata->lru_lock, flags);
|
||||
|
||||
if (mapping) {
|
||||
XA_STATE(xas, &mapping->i_pages, page_index(head));
|
||||
|
@ -2731,7 +2731,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
spin_unlock(&pgdata->split_queue_lock);
|
||||
fail: if (mapping)
|
||||
xa_unlock(&mapping->i_pages);
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
|
||||
remap_page(head);
|
||||
ret = -EBUSY;
|
||||
}
|
||||
|
|
|
@ -2362,13 +2362,13 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|||
|
||||
static void lock_page_lru(struct page *page, int *isolated)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
if (PageLRU(page)) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_lru(page));
|
||||
*isolated = 1;
|
||||
|
@ -2378,17 +2378,17 @@ static void lock_page_lru(struct page *page, int *isolated)
|
|||
|
||||
static void unlock_page_lru(struct page *page, int isolated)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
|
||||
if (isolated) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
SetPageLRU(page);
|
||||
add_page_to_lru_list(page, lruvec, page_lru(page));
|
||||
}
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
}
|
||||
|
||||
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
|
@ -2674,7 +2674,7 @@ void __memcg_kmem_uncharge(struct page *page, int order)
|
|||
|
||||
/*
|
||||
* Because tail pages are not marked as "used", set it. We're under
|
||||
* zone_lru_lock and migration entries setup in all page mappings.
|
||||
* pgdat->lru_lock and migration entries setup in all page mappings.
|
||||
*/
|
||||
void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
{
|
||||
|
|
14
mm/mlock.c
14
mm/mlock.c
|
@ -182,7 +182,7 @@ static void __munlock_isolation_failed(struct page *page)
|
|||
unsigned int munlock_vma_page(struct page *page)
|
||||
{
|
||||
int nr_pages;
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
|
||||
/* For try_to_munlock() and to serialize with page migration */
|
||||
BUG_ON(!PageLocked(page));
|
||||
|
@ -194,7 +194,7 @@ unsigned int munlock_vma_page(struct page *page)
|
|||
* might otherwise copy PageMlocked to part of the tail pages before
|
||||
* we clear it in the head page. It also stabilizes hpage_nr_pages().
|
||||
*/
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
|
||||
if (!TestClearPageMlocked(page)) {
|
||||
/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
|
||||
|
@ -203,17 +203,17 @@ unsigned int munlock_vma_page(struct page *page)
|
|||
}
|
||||
|
||||
nr_pages = hpage_nr_pages(page);
|
||||
__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
|
||||
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||
|
||||
if (__munlock_isolate_lru_page(page, true)) {
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
__munlock_isolated_page(page);
|
||||
goto out;
|
||||
}
|
||||
__munlock_isolation_failed(page);
|
||||
|
||||
unlock_out:
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
|
||||
out:
|
||||
return nr_pages - 1;
|
||||
|
@ -298,7 +298,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|||
pagevec_init(&pvec_putback);
|
||||
|
||||
/* Phase 1: page isolation */
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
spin_lock_irq(&zone->zone_pgdat->lru_lock);
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
|
||||
|
@ -325,7 +325,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|||
pvec->pages[i] = NULL;
|
||||
}
|
||||
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&zone->zone_pgdat->lru_lock);
|
||||
|
||||
/* Now we can release pins of pages that we are not munlocking */
|
||||
pagevec_release(&pvec_putback);
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
static struct page *page_idle_get_page(unsigned long pfn)
|
||||
{
|
||||
struct page *page;
|
||||
struct zone *zone;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
if (!pfn_valid(pfn))
|
||||
return NULL;
|
||||
|
@ -41,13 +41,13 @@ static struct page *page_idle_get_page(unsigned long pfn)
|
|||
!get_page_unless_zero(page))
|
||||
return NULL;
|
||||
|
||||
zone = page_zone(page);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
pgdat = page_pgdat(page);
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
if (unlikely(!PageLRU(page))) {
|
||||
put_page(page);
|
||||
page = NULL;
|
||||
}
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
return page;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
* mapping->i_mmap_rwsem
|
||||
* anon_vma->rwsem
|
||||
* mm->page_table_lock or pte_lock
|
||||
* zone_lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||
* pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||
* swap_lock (in swap_duplicate, swap_info_get)
|
||||
* mmlist_lock (in mmput, drain_mmlist and others)
|
||||
* mapping->private_lock (in __set_page_dirty_buffers)
|
||||
|
|
16
mm/swap.c
16
mm/swap.c
|
@ -58,16 +58,16 @@ static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
|
|||
static void __page_cache_release(struct page *page)
|
||||
{
|
||||
if (PageLRU(page)) {
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
struct lruvec *lruvec;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(zone_lru_lock(zone), flags);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||
__ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
}
|
||||
__ClearPageWaiters(page);
|
||||
mem_cgroup_uncharge(page);
|
||||
|
@ -322,12 +322,12 @@ static inline void activate_page_drain(int cpu)
|
|||
|
||||
void activate_page(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
|
||||
page = compound_head(page);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
16
mm/vmscan.c
16
mm/vmscan.c
|
@ -1614,8 +1614,8 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* zone_lru_lock is heavily contended. Some of the functions that
|
||||
/**
|
||||
* pgdat->lru_lock is heavily contended. Some of the functions that
|
||||
* shrink the lists perform better by taking out a batch of pages
|
||||
* and working on them outside the LRU lock.
|
||||
*
|
||||
|
@ -1750,11 +1750,11 @@ int isolate_lru_page(struct page *page)
|
|||
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
|
||||
|
||||
if (PageLRU(page)) {
|
||||
struct zone *zone = page_zone(page);
|
||||
pg_data_t *pgdat = page_pgdat(page);
|
||||
struct lruvec *lruvec;
|
||||
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
if (PageLRU(page)) {
|
||||
int lru = page_lru(page);
|
||||
get_page(page);
|
||||
|
@ -1762,7 +1762,7 @@ int isolate_lru_page(struct page *page)
|
|||
del_page_from_lru_list(page, lruvec, lru);
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -1990,9 +1990,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||
* processes, from rmap.
|
||||
*
|
||||
* If the pages are mostly unmapped, the processing is fast and it is
|
||||
* appropriate to hold zone_lru_lock across the whole operation. But if
|
||||
* appropriate to hold pgdat->lru_lock across the whole operation. But if
|
||||
* the pages are mapped, the processing is slow (page_referenced()) so we
|
||||
* should drop zone_lru_lock around each page. It's impossible to balance
|
||||
* should drop pgdat->lru_lock around each page. It's impossible to balance
|
||||
* this, so instead we remove the pages from the LRU while processing them.
|
||||
* It is safe to rely on PG_active against the non-LRU pages in here because
|
||||
* nobody will play with that bit on a non-LRU page.
|
||||
|
|
Loading…
Reference in New Issue