From fed4a7c8be64318ca38155bf5c1ffb983b3c2720 Mon Sep 17 00:00:00 2001 From: Haisu Wang Date: Tue, 10 Jan 2023 14:42:42 +0800 Subject: [PATCH] rue/io: add io_cgv1_buff_wb to enable buffer IO counting in cgroup v1 Add a sysctl switch to control buffer IO counting in memcg of cgroup v1. If turn on this switch, remove memory cgroup may leave zombie slabs until wb finished. Need to turn on io_qos and io_cgv1_buff_wb in cgroup v1. Signed-off-by: Haisu Wang Reviewed-by: Bin Lai --- include/linux/backing-dev.h | 8 +++++++- include/linux/blkdev.h | 2 +- kernel/cgroup/cgroup.c | 8 ++++++++ kernel/sysctl.c | 12 ++++++++++++ mm/backing-dev.c | 24 ++++++++++++++++++++++++ mm/memcontrol.c | 11 +++++++++-- 6 files changed, 61 insertions(+), 4 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b1f8b04f1b8f..096bfd0bbde3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -159,6 +159,9 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct cgroup_subsys_state *css); +extern unsigned int sysctl_io_cgv1_buff_wb_enabled __read_mostly; +bool buff_wb_enabled(void); + /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode * @inode: inode of interest @@ -174,7 +177,10 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return (bdi->capabilities & BDI_CAP_WRITEBACK) && + return (buff_wb_enabled() || + (cgroup_subsys_on_dfl(memory_cgrp_subsys) && + cgroup_subsys_on_dfl(io_cgrp_subsys))) && + (bdi->capabilities & BDI_CAP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a6863441018b..0d7d5af65265 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; extern struct class block_class; -extern unsigned int sysctl_io_qos_enabled; +extern unsigned int sysctl_io_qos_enabled __read_mostly; /* * Maximum number of blkcg policies allowed to be registered concurrently. diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c2b527344d06..eb69b9ae87e5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -62,6 +62,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUP_SLI #include @@ -7376,6 +7377,13 @@ static int __init cgroup_disable(char *str) static_branch_disable(cgroup_subsys_enabled_key[i]); pr_info("Disabling %s control group subsystem\n", ss->name); + +#ifdef CONFIG_CGROUP_WRITEBACK + if ((i == memory_cgrp_id) || (i == io_cgrp_id)) { + pr_info("Disable cgv1 buffer IO writeback\n"); + sysctl_io_cgv1_buff_wb_enabled = 0; + } +#endif } for (i = 0; i < OPT_FEATURE_COUNT; i++) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ac6cfc749ecc..bc43322cfa79 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -64,6 +64,7 @@ #include #include #include +#include #include "../lib/kstrtox.h" @@ -2683,6 +2684,17 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif +#ifdef CONFIG_CGROUP_WRITEBACK + { + .procname = "io_cgv1_buff_wb", + .data = &sysctl_io_cgv1_buff_wb_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { } }; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 603cc2a032a9..4f9a9d123874 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -511,6 +511,30 @@ static LIST_HEAD(offline_cgwbs); static void cleanup_offline_cgwbs_workfn(struct work_struct *work); static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn); +/* + * Support buffer IO writeback in cgroup v1 + * Enable this will count buffer IO in memcg + */ +unsigned int sysctl_io_cgv1_buff_wb_enabled = 1; + +/** + * buff_wb_enabled - test whether buffer writeback is enabled + * + * Cgroup v1 not support counting buffer IO by default. + * Could bind memory and blkio cgroup to count buffer IO + * in cgroup v1. + * + * Need to enable io_qos and io_cgv1_buff_wb to use. + * + */ +bool buff_wb_enabled(void) +{ + return rue_io_enabled() && sysctl_io_cgv1_buff_wb_enabled && + !cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !cgroup_subsys_on_dfl(io_cgrp_subsys); +} +EXPORT_SYMBOL(buff_wb_enabled); + static void cgwb_free_rcu(struct rcu_head *rcu_head) { struct bdi_writeback *wb = container_of(rcu_head, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e32a56bc6564..3c761a836469 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6222,6 +6222,7 @@ static int mem_cgroup_vmstat_read(struct seq_file *m, void *vv) return mem_cgroup_vmstat_read_comm(m, vv, memcg); } +#ifdef CONFIG_CGROUP_WRITEBACK static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -6231,7 +6232,7 @@ static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of, char *pbuf; int ret; - if (!rue_io_enabled()) + if (!buff_wb_enabled()) return -EPERM; buf = strstrip(buf); @@ -6293,6 +6294,7 @@ static int mem_cgroup_bind_blkio_show(struct seq_file *m, void *v) return 0; } +#endif static ssize_t mem_cgroup_sync_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) @@ -9801,12 +9803,14 @@ static struct cftype memsw_files[] = { .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, +#ifdef CONFIG_CGROUP_WRITEBACK { .name = "bind_blkio", .flags = CFTYPE_NOT_ON_ROOT, .write = mem_cgroup_bind_blkio_write, .seq_show = mem_cgroup_bind_blkio_show, }, +#endif { .name = "sync", .flags = CFTYPE_NOT_ON_ROOT, @@ -10310,7 +10314,10 @@ static void clean_each_dying_memcg(struct mem_cgroup *memcg) if (ret) goto next; - reap_slab(memcg); +#ifdef CONFIG_CGROUP_WRITEBACK + if (buff_wb_enabled()) +#endif + reap_slab(memcg); if (!drained) { drain_all_stock(memcg);