rue/mm: introduce memory allocation latency for per-cgroup tool

A new memory.latency_histogram control file is added
under each memory cgroup directory. Cat this file can
print the memory access latency at the memory cgroup level.

Signed-off-by: Jingxiang Zeng <linuszeng@tencent.com>
Signed-off-by: Honglin Li <honglinli@tencent.com>
This commit is contained in:
Honglin Li 2023-09-05 15:08:34 +08:00 committed by Haisu Wang
parent 1824581599
commit 8de07be077
4 changed files with 101 additions and 0 deletions

View File

@ -22,6 +22,8 @@
#include <linux/writeback.h>
#include <linux/page-flags.h>
#define MEM_LATENCY_MAX_SLOTS 64
struct mem_cgroup;
struct obj_cgroup;
struct page;
@ -341,6 +343,8 @@ struct mem_cgroup {
CACHELINE_PADDING(_pad2_);
u64 __percpu *latency_histogram[MEM_LATENCY_MAX_SLOTS];
int reclaim_failed;
struct list_head prio_list;
struct list_head prio_list_async;

View File

@ -2615,6 +2615,7 @@ static struct ctl_table kern_table[] = {
{ }
};
unsigned int vm_memcg_latency_histogram;
unsigned long vm_pagecache_system_usage;
static struct ctl_table vm_table[] = {
@ -2925,6 +2926,15 @@ static struct ctl_table vm_table[] = {
.mode = 0444,
.proc_handler = proc_pagecache_system_usage,
},
{
.procname = "memcg_latency_histogram",
.data = &vm_memcg_latency_histogram,
.maxlen = sizeof(vm_memcg_latency_histogram),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "memory_qos",
.data = &sysctl_vm_memory_qos,

View File

@ -6322,11 +6322,52 @@ static ssize_t memory_async_distance_factor_write(struct kernfs_open_file *of,
return nbytes;
}
extern unsigned int vm_memcg_latency_histogram;
static int mem_cgroup_lat_seq_show(struct seq_file *m, void *v)
{
u64 sum_lat;
int i, cpu;
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
if (!sysctl_vm_memory_qos) {
seq_puts(m, "vm.memory_qos is not enabled.\n");
return 0;
}
if (!vm_memcg_latency_histogram) {
seq_puts(m, "vm.memcg_latency_histogram is not enabled.\n");
return 0;
}
for (i = 0; i < MEM_LATENCY_MAX_SLOTS; i++) {
sum_lat = 0;
for_each_possible_cpu(cpu) {
sum_lat += *per_cpu_ptr(memcg->latency_histogram[i], cpu);
*per_cpu_ptr(memcg->latency_histogram[i], cpu) = 0;
}
if (i == 0)
seq_printf(m, "[%-20llu, %-20llu]ns : %llu.\n",
(u64)0, (u64)1, sum_lat);
else
seq_printf(m, "[%-20llu, %-20llu]ns : %llu.\n",
(u64)1 << (i - 1),
(u64)1 << i, sum_lat);
}
return 0;
}
static int memory_oom_group_show(struct seq_file *m, void *v);
static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
static struct cftype mem_cgroup_legacy_files[] = {
{
.name = "latency_histogram",
.seq_show = mem_cgroup_lat_seq_show,
},
{
.name = "usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
@ -6756,6 +6797,10 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
int i;
for (i = 0; i < MEM_LATENCY_MAX_SLOTS; i++)
free_percpu(memcg->latency_histogram[i]);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
@ -6853,6 +6898,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
struct mem_cgroup *memcg, *old_memcg;
long error = -ENOMEM;
int index;
old_memcg = set_active_memcg(parent);
memcg = mem_cgroup_alloc(parent);
@ -6862,6 +6909,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
for (index = 0; index < MEM_LATENCY_MAX_SLOTS; index++) {
memcg->latency_histogram[index] = alloc_percpu(u64);
if (!memcg->latency_histogram[index])
goto fail;
}
memcg->pagecache_reclaim_ratio = DEFAULT_PAGE_RECLAIM_RATIO;
memcg->pagecache_max_ratio = PAGECACHE_MAX_RATIO_MAX;
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
@ -6924,6 +6976,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
INIT_LIST_HEAD(&memcg->prio_list_async);
return &memcg->css;
fail:
mem_cgroup_id_remove(memcg);
mem_cgroup_free(memcg);
return ERR_PTR(error);
}
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)

View File

@ -56,6 +56,8 @@
#ifdef CONFIG_CGROUP_SLI
#include <linux/sli.h>
#endif
#include <linux/log2.h>
#include <linux/sched/clock.h>
#include "internal.h"
#include "shuffle.h"
@ -4429,6 +4431,8 @@ failed:
}
EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
extern unsigned int vm_memcg_latency_histogram;
/*
* This is the 'heart' of the zoned buddy allocator.
*/
@ -4439,6 +4443,12 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg;
u64 start_ns;
u64 delta;
int delta_log;
#endif
/*
* There are several places where we assume that the order value is sane
@ -4461,6 +4471,16 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
&alloc_gfp, &alloc_flags))
return NULL;
#ifdef CONFIG_MEMCG
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
if (sysctl_vm_memory_qos && vm_memcg_latency_histogram && memcg)
start_ns = local_clock();
if (memcg)
css_get(&memcg->css);
rcu_read_unlock();
#endif
/*
* Forbid the first pass from falling back to types that fragment
* memory until all local zones are considered.
@ -4490,6 +4510,17 @@ out:
page = NULL;
}
#ifdef CONFIG_MEMCG
if (sysctl_vm_memory_qos && vm_memcg_latency_histogram && memcg) {
delta = local_clock() - start_ns;
delta_log = __ilog2_u64(delta);
if (delta_log < 0)
delta_log = 0;
this_cpu_add(*memcg->latency_histogram[delta_log], 1);
}
mem_cgroup_put(memcg);
#endif
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
kmsan_alloc_page(page, order, alloc_gfp);