rue/io: add io_cgv1_buff_wb to enable buffer IO counting in cgroup v1

Add a sysctl switch to control buffer IO counting in
memcg of cgroup v1. If turn on this switch, remove
memory cgroup may leave zombie slabs until wb finished.

Need to turn on io_qos and io_cgv1_buff_wb in cgroup v1.

Signed-off-by: Haisu Wang <haisuwang@tencent.com>
Reviewed-by: Bin Lai <robinlai@tencent.com>
This commit is contained in:
Haisu Wang 2023-01-10 14:42:42 +08:00
parent 826a0366a1
commit fed4a7c8be
6 changed files with 61 additions and 4 deletions

View File

@ -159,6 +159,9 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
void wb_memcg_offline(struct mem_cgroup *memcg);
void wb_blkcg_offline(struct cgroup_subsys_state *css);
extern unsigned int sysctl_io_cgv1_buff_wb_enabled __read_mostly;
bool buff_wb_enabled(void);
/**
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
* @inode: inode of interest
@ -174,7 +177,10 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
return (bdi->capabilities & BDI_CAP_WRITEBACK) &&
return (buff_wb_enabled() ||
(cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
cgroup_subsys_on_dfl(io_cgrp_subsys))) &&
(bdi->capabilities & BDI_CAP_WRITEBACK) &&
(inode->i_sb->s_iflags & SB_I_CGROUPWB);
}

View File

@ -43,7 +43,7 @@ struct blk_crypto_profile;
extern const struct device_type disk_type;
extern const struct device_type part_type;
extern struct class block_class;
extern unsigned int sysctl_io_qos_enabled;
extern unsigned int sysctl_io_qos_enabled __read_mostly;
/*
* Maximum number of blkcg policies allowed to be registered concurrently.

View File

@ -62,6 +62,7 @@
#include <net/sock.h>
#include <linux/blk-cgroup.h>
#include <linux/rue.h>
#include <linux/backing-dev.h>
#ifdef CONFIG_CGROUP_SLI
#include <linux/sli.h>
@ -7376,6 +7377,13 @@ static int __init cgroup_disable(char *str)
static_branch_disable(cgroup_subsys_enabled_key[i]);
pr_info("Disabling %s control group subsystem\n",
ss->name);
#ifdef CONFIG_CGROUP_WRITEBACK
if ((i == memory_cgrp_id) || (i == io_cgrp_id)) {
pr_info("Disable cgv1 buffer IO writeback\n");
sysctl_io_cgv1_buff_wb_enabled = 0;
}
#endif
}
for (i = 0; i < OPT_FEATURE_COUNT; i++) {

View File

@ -64,6 +64,7 @@
#include <linux/userfaultfd_k.h>
#include <linux/pid.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include "../lib/kstrtox.h"
@ -2683,6 +2684,17 @@ static struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_CGROUP_WRITEBACK
{
.procname = "io_cgv1_buff_wb",
.data = &sysctl_io_cgv1_buff_wb_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
{ }
};

View File

@ -511,6 +511,30 @@ static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
/*
* Support buffer IO writeback in cgroup v1
* Enable this will count buffer IO in memcg
*/
unsigned int sysctl_io_cgv1_buff_wb_enabled = 1;
/**
* buff_wb_enabled - test whether buffer writeback is enabled
*
* Cgroup v1 not support counting buffer IO by default.
* Could bind memory and blkio cgroup to count buffer IO
* in cgroup v1.
*
* Need to enable io_qos and io_cgv1_buff_wb to use.
*
*/
bool buff_wb_enabled(void)
{
return rue_io_enabled() && sysctl_io_cgv1_buff_wb_enabled &&
!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
!cgroup_subsys_on_dfl(io_cgrp_subsys);
}
EXPORT_SYMBOL(buff_wb_enabled);
static void cgwb_free_rcu(struct rcu_head *rcu_head)
{
struct bdi_writeback *wb = container_of(rcu_head,

View File

@ -6222,6 +6222,7 @@ static int mem_cgroup_vmstat_read(struct seq_file *m, void *vv)
return mem_cgroup_vmstat_read_comm(m, vv, memcg);
}
#ifdef CONFIG_CGROUP_WRITEBACK
static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
@ -6231,7 +6232,7 @@ static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of,
char *pbuf;
int ret;
if (!rue_io_enabled())
if (!buff_wb_enabled())
return -EPERM;
buf = strstrip(buf);
@ -6293,6 +6294,7 @@ static int mem_cgroup_bind_blkio_show(struct seq_file *m, void *v)
return 0;
}
#endif
static ssize_t mem_cgroup_sync_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
@ -9801,12 +9803,14 @@ static struct cftype memsw_files[] = {
.write = mem_cgroup_reset,
.read_u64 = mem_cgroup_read_u64,
},
#ifdef CONFIG_CGROUP_WRITEBACK
{
.name = "bind_blkio",
.flags = CFTYPE_NOT_ON_ROOT,
.write = mem_cgroup_bind_blkio_write,
.seq_show = mem_cgroup_bind_blkio_show,
},
#endif
{
.name = "sync",
.flags = CFTYPE_NOT_ON_ROOT,
@ -10310,7 +10314,10 @@ static void clean_each_dying_memcg(struct mem_cgroup *memcg)
if (ret)
goto next;
reap_slab(memcg);
#ifdef CONFIG_CGROUP_WRITEBACK
if (buff_wb_enabled())
#endif
reap_slab(memcg);
if (!drained) {
drain_all_stock(memcg);