rue/io: add io_cgv1_buff_wb to enable buffer IO counting in cgroup v1
Add a sysctl switch to control buffer IO counting in memcg of cgroup v1. If turn on this switch, remove memory cgroup may leave zombie slabs until wb finished. Need to turn on io_qos and io_cgv1_buff_wb in cgroup v1. Signed-off-by: Haisu Wang <haisuwang@tencent.com> Reviewed-by: Bin Lai <robinlai@tencent.com>
This commit is contained in:
parent
826a0366a1
commit
fed4a7c8be
|
@ -159,6 +159,9 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
|
||||||
void wb_memcg_offline(struct mem_cgroup *memcg);
|
void wb_memcg_offline(struct mem_cgroup *memcg);
|
||||||
void wb_blkcg_offline(struct cgroup_subsys_state *css);
|
void wb_blkcg_offline(struct cgroup_subsys_state *css);
|
||||||
|
|
||||||
|
extern unsigned int sysctl_io_cgv1_buff_wb_enabled __read_mostly;
|
||||||
|
bool buff_wb_enabled(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
|
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
|
||||||
* @inode: inode of interest
|
* @inode: inode of interest
|
||||||
|
@ -174,7 +177,10 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct backing_dev_info *bdi = inode_to_bdi(inode);
|
struct backing_dev_info *bdi = inode_to_bdi(inode);
|
||||||
|
|
||||||
return (bdi->capabilities & BDI_CAP_WRITEBACK) &&
|
return (buff_wb_enabled() ||
|
||||||
|
(cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
|
||||||
|
cgroup_subsys_on_dfl(io_cgrp_subsys))) &&
|
||||||
|
(bdi->capabilities & BDI_CAP_WRITEBACK) &&
|
||||||
(inode->i_sb->s_iflags & SB_I_CGROUPWB);
|
(inode->i_sb->s_iflags & SB_I_CGROUPWB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ struct blk_crypto_profile;
|
||||||
extern const struct device_type disk_type;
|
extern const struct device_type disk_type;
|
||||||
extern const struct device_type part_type;
|
extern const struct device_type part_type;
|
||||||
extern struct class block_class;
|
extern struct class block_class;
|
||||||
extern unsigned int sysctl_io_qos_enabled;
|
extern unsigned int sysctl_io_qos_enabled __read_mostly;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum number of blkcg policies allowed to be registered concurrently.
|
* Maximum number of blkcg policies allowed to be registered concurrently.
|
||||||
|
|
|
@ -62,6 +62,7 @@
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <linux/blk-cgroup.h>
|
#include <linux/blk-cgroup.h>
|
||||||
#include <linux/rue.h>
|
#include <linux/rue.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_SLI
|
#ifdef CONFIG_CGROUP_SLI
|
||||||
#include <linux/sli.h>
|
#include <linux/sli.h>
|
||||||
|
@ -7376,6 +7377,13 @@ static int __init cgroup_disable(char *str)
|
||||||
static_branch_disable(cgroup_subsys_enabled_key[i]);
|
static_branch_disable(cgroup_subsys_enabled_key[i]);
|
||||||
pr_info("Disabling %s control group subsystem\n",
|
pr_info("Disabling %s control group subsystem\n",
|
||||||
ss->name);
|
ss->name);
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
|
if ((i == memory_cgrp_id) || (i == io_cgrp_id)) {
|
||||||
|
pr_info("Disable cgv1 buffer IO writeback\n");
|
||||||
|
sysctl_io_cgv1_buff_wb_enabled = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < OPT_FEATURE_COUNT; i++) {
|
for (i = 0; i < OPT_FEATURE_COUNT; i++) {
|
||||||
|
|
|
@ -64,6 +64,7 @@
|
||||||
#include <linux/userfaultfd_k.h>
|
#include <linux/userfaultfd_k.h>
|
||||||
#include <linux/pid.h>
|
#include <linux/pid.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
|
|
||||||
#include "../lib/kstrtox.h"
|
#include "../lib/kstrtox.h"
|
||||||
|
|
||||||
|
@ -2683,6 +2684,17 @@ static struct ctl_table kern_table[] = {
|
||||||
.extra1 = SYSCTL_ZERO,
|
.extra1 = SYSCTL_ZERO,
|
||||||
.extra2 = SYSCTL_ONE,
|
.extra2 = SYSCTL_ONE,
|
||||||
},
|
},
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
|
{
|
||||||
|
.procname = "io_cgv1_buff_wb",
|
||||||
|
.data = &sysctl_io_cgv1_buff_wb_enabled,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = SYSCTL_ZERO,
|
||||||
|
.extra2 = SYSCTL_ONE,
|
||||||
|
},
|
||||||
#endif
|
#endif
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
|
@ -511,6 +511,30 @@ static LIST_HEAD(offline_cgwbs);
|
||||||
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
|
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
|
||||||
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
|
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Support buffer IO writeback in cgroup v1
|
||||||
|
* Enable this will count buffer IO in memcg
|
||||||
|
*/
|
||||||
|
unsigned int sysctl_io_cgv1_buff_wb_enabled = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* buff_wb_enabled - test whether buffer writeback is enabled
|
||||||
|
*
|
||||||
|
* Cgroup v1 not support counting buffer IO by default.
|
||||||
|
* Could bind memory and blkio cgroup to count buffer IO
|
||||||
|
* in cgroup v1.
|
||||||
|
*
|
||||||
|
* Need to enable io_qos and io_cgv1_buff_wb to use.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
bool buff_wb_enabled(void)
|
||||||
|
{
|
||||||
|
return rue_io_enabled() && sysctl_io_cgv1_buff_wb_enabled &&
|
||||||
|
!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
|
||||||
|
!cgroup_subsys_on_dfl(io_cgrp_subsys);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(buff_wb_enabled);
|
||||||
|
|
||||||
static void cgwb_free_rcu(struct rcu_head *rcu_head)
|
static void cgwb_free_rcu(struct rcu_head *rcu_head)
|
||||||
{
|
{
|
||||||
struct bdi_writeback *wb = container_of(rcu_head,
|
struct bdi_writeback *wb = container_of(rcu_head,
|
||||||
|
|
|
@ -6222,6 +6222,7 @@ static int mem_cgroup_vmstat_read(struct seq_file *m, void *vv)
|
||||||
return mem_cgroup_vmstat_read_comm(m, vv, memcg);
|
return mem_cgroup_vmstat_read_comm(m, vv, memcg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of,
|
static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of,
|
||||||
char *buf, size_t nbytes, loff_t off)
|
char *buf, size_t nbytes, loff_t off)
|
||||||
{
|
{
|
||||||
|
@ -6231,7 +6232,7 @@ static ssize_t mem_cgroup_bind_blkio_write(struct kernfs_open_file *of,
|
||||||
char *pbuf;
|
char *pbuf;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!rue_io_enabled())
|
if (!buff_wb_enabled())
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
buf = strstrip(buf);
|
buf = strstrip(buf);
|
||||||
|
@ -6293,6 +6294,7 @@ static int mem_cgroup_bind_blkio_show(struct seq_file *m, void *v)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static ssize_t mem_cgroup_sync_write(struct kernfs_open_file *of, char *buf,
|
static ssize_t mem_cgroup_sync_write(struct kernfs_open_file *of, char *buf,
|
||||||
size_t nbytes, loff_t off)
|
size_t nbytes, loff_t off)
|
||||||
|
@ -9801,12 +9803,14 @@ static struct cftype memsw_files[] = {
|
||||||
.write = mem_cgroup_reset,
|
.write = mem_cgroup_reset,
|
||||||
.read_u64 = mem_cgroup_read_u64,
|
.read_u64 = mem_cgroup_read_u64,
|
||||||
},
|
},
|
||||||
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
{
|
{
|
||||||
.name = "bind_blkio",
|
.name = "bind_blkio",
|
||||||
.flags = CFTYPE_NOT_ON_ROOT,
|
.flags = CFTYPE_NOT_ON_ROOT,
|
||||||
.write = mem_cgroup_bind_blkio_write,
|
.write = mem_cgroup_bind_blkio_write,
|
||||||
.seq_show = mem_cgroup_bind_blkio_show,
|
.seq_show = mem_cgroup_bind_blkio_show,
|
||||||
},
|
},
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
.name = "sync",
|
.name = "sync",
|
||||||
.flags = CFTYPE_NOT_ON_ROOT,
|
.flags = CFTYPE_NOT_ON_ROOT,
|
||||||
|
@ -10310,6 +10314,9 @@ static void clean_each_dying_memcg(struct mem_cgroup *memcg)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
|
if (buff_wb_enabled())
|
||||||
|
#endif
|
||||||
reap_slab(memcg);
|
reap_slab(memcg);
|
||||||
|
|
||||||
if (!drained) {
|
if (!drained) {
|
||||||
|
|
Loading…
Reference in New Issue