thp: make split_queue per-node
Andrea Arcangeli suggested to make split queue per-node to improve scalability. Let's do it. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Suggested-by: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
34229b2774
commit
a3d0a91850
|
@ -682,6 +682,12 @@ typedef struct pglist_data {
|
||||||
*/
|
*/
|
||||||
unsigned long first_deferred_pfn;
|
unsigned long first_deferred_pfn;
|
||||||
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
|
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
|
||||||
|
|
||||||
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
|
spinlock_t split_queue_lock;
|
||||||
|
struct list_head split_queue;
|
||||||
|
unsigned long split_queue_len;
|
||||||
|
#endif
|
||||||
} pg_data_t;
|
} pg_data_t;
|
||||||
|
|
||||||
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
|
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
|
||||||
|
|
|
@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
|
||||||
.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
|
.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(split_queue_lock);
|
|
||||||
static LIST_HEAD(split_queue);
|
|
||||||
static unsigned long split_queue_len;
|
|
||||||
static struct shrinker deferred_split_shrinker;
|
static struct shrinker deferred_split_shrinker;
|
||||||
|
|
||||||
static void set_recommended_min_free_kbytes(void)
|
static void set_recommended_min_free_kbytes(void)
|
||||||
|
@ -3358,6 +3355,7 @@ int total_mapcount(struct page *page)
|
||||||
int split_huge_page_to_list(struct page *page, struct list_head *list)
|
int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
{
|
{
|
||||||
struct page *head = compound_head(page);
|
struct page *head = compound_head(page);
|
||||||
|
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
|
||||||
struct anon_vma *anon_vma;
|
struct anon_vma *anon_vma;
|
||||||
int count, mapcount, ret;
|
int count, mapcount, ret;
|
||||||
bool mlocked;
|
bool mlocked;
|
||||||
|
@ -3401,19 +3399,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
|
|
||||||
/* Prevent deferred_split_scan() touching ->_count */
|
/* Prevent deferred_split_scan() touching ->_count */
|
||||||
spin_lock_irqsave(&split_queue_lock, flags);
|
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
|
||||||
count = page_count(head);
|
count = page_count(head);
|
||||||
mapcount = total_mapcount(head);
|
mapcount = total_mapcount(head);
|
||||||
if (!mapcount && count == 1) {
|
if (!mapcount && count == 1) {
|
||||||
if (!list_empty(page_deferred_list(head))) {
|
if (!list_empty(page_deferred_list(head))) {
|
||||||
split_queue_len--;
|
pgdata->split_queue_len--;
|
||||||
list_del(page_deferred_list(head));
|
list_del(page_deferred_list(head));
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
__split_huge_page(page, list);
|
__split_huge_page(page, list);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
|
} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
pr_alert("total_mapcount: %u, page_count(): %u\n",
|
pr_alert("total_mapcount: %u, page_count(): %u\n",
|
||||||
mapcount, count);
|
mapcount, count);
|
||||||
if (PageTail(page))
|
if (PageTail(page))
|
||||||
|
@ -3421,7 +3419,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
dump_page(page, "total_mapcount(head) > 0");
|
dump_page(page, "total_mapcount(head) > 0");
|
||||||
BUG();
|
BUG();
|
||||||
} else {
|
} else {
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
unfreeze_page(anon_vma, head);
|
unfreeze_page(anon_vma, head);
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
}
|
}
|
||||||
|
@ -3436,52 +3434,56 @@ out:
|
||||||
|
|
||||||
void free_transhuge_page(struct page *page)
|
void free_transhuge_page(struct page *page)
|
||||||
{
|
{
|
||||||
|
struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&split_queue_lock, flags);
|
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
|
||||||
if (!list_empty(page_deferred_list(page))) {
|
if (!list_empty(page_deferred_list(page))) {
|
||||||
split_queue_len--;
|
pgdata->split_queue_len--;
|
||||||
list_del(page_deferred_list(page));
|
list_del(page_deferred_list(page));
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
free_compound_page(page);
|
free_compound_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
void deferred_split_huge_page(struct page *page)
|
void deferred_split_huge_page(struct page *page)
|
||||||
{
|
{
|
||||||
|
struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
||||||
|
|
||||||
spin_lock_irqsave(&split_queue_lock, flags);
|
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
|
||||||
if (list_empty(page_deferred_list(page))) {
|
if (list_empty(page_deferred_list(page))) {
|
||||||
list_add_tail(page_deferred_list(page), &split_queue);
|
list_add_tail(page_deferred_list(page), &pgdata->split_queue);
|
||||||
split_queue_len++;
|
pgdata->split_queue_len++;
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long deferred_split_count(struct shrinker *shrink,
|
static unsigned long deferred_split_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
|
struct pglist_data *pgdata = NODE_DATA(sc->nid);
|
||||||
/*
|
/*
|
||||||
* Split a page from split_queue will free up at least one page,
|
* Split a page from split_queue will free up at least one page,
|
||||||
* at most HPAGE_PMD_NR - 1. We don't track exact number.
|
* at most HPAGE_PMD_NR - 1. We don't track exact number.
|
||||||
* Let's use HPAGE_PMD_NR / 2 as ballpark.
|
* Let's use HPAGE_PMD_NR / 2 as ballpark.
|
||||||
*/
|
*/
|
||||||
return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
|
return ACCESS_ONCE(pgdata->split_queue_len) * HPAGE_PMD_NR / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long deferred_split_scan(struct shrinker *shrink,
|
static unsigned long deferred_split_scan(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
|
struct pglist_data *pgdata = NODE_DATA(sc->nid);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
LIST_HEAD(list), *pos, *next;
|
LIST_HEAD(list), *pos, *next;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int split = 0;
|
int split = 0;
|
||||||
|
|
||||||
spin_lock_irqsave(&split_queue_lock, flags);
|
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
|
||||||
list_splice_init(&split_queue, &list);
|
list_splice_init(&pgdata->split_queue, &list);
|
||||||
|
|
||||||
/* Take pin on all head pages to avoid freeing them under us */
|
/* Take pin on all head pages to avoid freeing them under us */
|
||||||
list_for_each_safe(pos, next, &list) {
|
list_for_each_safe(pos, next, &list) {
|
||||||
|
@ -3490,10 +3492,10 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
|
||||||
/* race with put_compound_page() */
|
/* race with put_compound_page() */
|
||||||
if (!get_page_unless_zero(page)) {
|
if (!get_page_unless_zero(page)) {
|
||||||
list_del_init(page_deferred_list(page));
|
list_del_init(page_deferred_list(page));
|
||||||
split_queue_len--;
|
pgdata->split_queue_len--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
|
|
||||||
list_for_each_safe(pos, next, &list) {
|
list_for_each_safe(pos, next, &list) {
|
||||||
page = list_entry((void *)pos, struct page, mapping);
|
page = list_entry((void *)pos, struct page, mapping);
|
||||||
|
@ -3505,9 +3507,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
|
||||||
put_page(page);
|
put_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irqsave(&split_queue_lock, flags);
|
spin_lock_irqsave(&pgdata->split_queue_lock, flags);
|
||||||
list_splice_tail(&list, &split_queue);
|
list_splice_tail(&list, &pgdata->split_queue);
|
||||||
spin_unlock_irqrestore(&split_queue_lock, flags);
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
||||||
|
|
||||||
return split * HPAGE_PMD_NR / 2;
|
return split * HPAGE_PMD_NR / 2;
|
||||||
}
|
}
|
||||||
|
@ -3516,6 +3518,7 @@ static struct shrinker deferred_split_shrinker = {
|
||||||
.count_objects = deferred_split_count,
|
.count_objects = deferred_split_count,
|
||||||
.scan_objects = deferred_split_scan,
|
.scan_objects = deferred_split_scan,
|
||||||
.seeks = DEFAULT_SEEKS,
|
.seeks = DEFAULT_SEEKS,
|
||||||
|
.flags = SHRINKER_NUMA_AWARE,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
|
|
@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
|
||||||
spin_lock_init(&pgdat->numabalancing_migrate_lock);
|
spin_lock_init(&pgdat->numabalancing_migrate_lock);
|
||||||
pgdat->numabalancing_migrate_nr_pages = 0;
|
pgdat->numabalancing_migrate_nr_pages = 0;
|
||||||
pgdat->numabalancing_migrate_next_window = jiffies;
|
pgdat->numabalancing_migrate_next_window = jiffies;
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
|
spin_lock_init(&pgdat->split_queue_lock);
|
||||||
|
INIT_LIST_HEAD(&pgdat->split_queue);
|
||||||
|
pgdat->split_queue_len = 0;
|
||||||
#endif
|
#endif
|
||||||
init_waitqueue_head(&pgdat->kswapd_wait);
|
init_waitqueue_head(&pgdat->kswapd_wait);
|
||||||
init_waitqueue_head(&pgdat->pfmemalloc_wait);
|
init_waitqueue_head(&pgdat->pfmemalloc_wait);
|
||||||
|
|
Loading…
Reference in New Issue