rue/io: buffered_write_bps hierarchy support

Support hierarchy setting of buffered_write_bps

Signed-off-by: Haisu Wang <haisuwang@tencent.com>
This commit is contained in:
Haisu Wang 2024-04-19 10:58:18 +08:00
parent 701147d7b1
commit f630af7168
7 changed files with 134 additions and 20 deletions

View File

@ -288,17 +288,6 @@ struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
}
EXPORT_SYMBOL_GPL(bio_blkcg_css);
/**
* blkcg_parent - get the parent of a blkcg
* @blkcg: blkcg of interest
*
* Return the parent blkcg of @blkcg. Can be called anytime.
*/
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
{
return css_to_blkcg(blkcg->css.parent);
}
/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with

View File

@ -29,10 +29,6 @@ struct blkg_policy_data;
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
#ifdef CONFIG_BLK_CGROUP
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
#ifdef CONFIG_BLK_CGROUP_DISKSTATS
/*

View File

@ -44,6 +44,8 @@
static struct workqueue_struct *kthrotld_workqueue;
static bool throtl_hierarchy __read_mostly = true;
unsigned int sysctl_skip_throttle_prio_req;
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
static void throtl_pending_timer_fn(struct timer_list *t);
@ -1504,13 +1506,25 @@ static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
}
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
/*
* Initial write bandwidth: 1000 MB/s (wb_init is 100 MB/s)
* The bandwidth will be updated via blkcg_update_bandwidth()
*/
#define INIT_DIRTY_BW (1000 << (20 - PAGE_SHIFT))
static int tg_set_buffered_write_bps(struct cgroup_subsys_state *css,
struct cftype *cft,
u64 val)
{
struct blkcg *blkcg = css_to_blkcg(css);
if (blkcg)
if (!rue_io_enabled())
return -EPERM;
if (blkcg) {
blkcg->buffered_write_bps = val;
blkcg->dirty_ratelimit = INIT_DIRTY_BW;
}
return 0;
}
static u64 tg_read_buffered_write_bps(struct cgroup_subsys_state *css,

View File

@ -418,7 +418,25 @@ struct rue_io_module_ops {
};
extern struct rue_io_module_ops rue_io_ops;
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
/**
* blkcg_parent - get the parent of a blkcg
* @blkcg: blkcg of interest
*
* Return the parent blkcg of @blkcg. Can be called anytime.
*/
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
{
return css_to_blkcg(blkcg->css.parent);
}
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
extern unsigned int sysctl_buffered_write_bps_hierarchy __read_mostly;
static inline uint64_t blkcg_buffered_write_bps(struct blkcg *blkcg)
{
return blkcg->buffered_write_bps;
@ -429,6 +447,24 @@ static inline unsigned long blkcg_dirty_ratelimit(struct blkcg *blkcg)
return blkcg->dirty_ratelimit;
}
static inline int blkcg_buffered_write_bps_enabled(struct blkcg *blkcg)
{
if (!rue_io_enabled())
return 0;
if (!sysctl_buffered_write_bps_hierarchy)
return blkcg_buffered_write_bps(blkcg);
while (blkcg) {
if (blkcg->buffered_write_bps)
return blkcg_buffered_write_bps(blkcg);
blkcg = blkcg_parent(blkcg);
}
return 0;
}
static inline struct blkcg *get_task_blkcg(struct task_struct *tsk)
{
struct cgroup_subsys_state *css;

View File

@ -572,7 +572,7 @@ TRACE_EVENT(global_dirty_state,
#define KBps(x) ((x) << (PAGE_SHIFT - 10))
#ifdef CONFIG_BLK_DEV_THROTTLING
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
TRACE_EVENT(blkcg_dirty_ratelimit,
TP_PROTO(unsigned long bps,
@ -606,6 +606,40 @@ TRACE_EVENT(blkcg_dirty_ratelimit,
__entry->balanced_dirty_ratelimit
)
);
TRACE_EVENT(blkcg_calc_task_ratelimit,
TP_PROTO(const char *blkcg_name,
unsigned long blkcg_buffered_write_bps,
unsigned long blkcg_dirty_ratelimit,
unsigned long task_ratelimit),
TP_ARGS(blkcg_name, blkcg_buffered_write_bps,
blkcg_dirty_ratelimit, task_ratelimit),
TP_STRUCT__entry(
__array(char, name, 256)
__field(unsigned long, blkcg_bps)
__field(unsigned long, blkcg_dirty_ratelimit)
__field(unsigned long, task_ratelimit)
),
TP_fast_assign(
strscpy_pad(__entry->name, blkcg_name, 256);
__entry->blkcg_bps = blkcg_buffered_write_bps;
__entry->blkcg_dirty_ratelimit = KBps(blkcg_dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
),
TP_printk("cgroup=%s blkcg_bps=%lu "
"blkcg_dirty_ratelimit=%lu "
"task_ratelimit_kbps=%lu",
__entry->name,
__entry->blkcg_bps,
__entry->blkcg_dirty_ratelimit,
__entry->task_ratelimit
)
);
#endif
TRACE_EVENT(bdi_dirty_ratelimit,

View File

@ -66,6 +66,10 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
#include <linux/blk-cgroup.h>
#endif
#include "../lib/kstrtox.h"
#include <linux/uaccess.h>
@ -2647,6 +2651,17 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
{
.procname = "io_buffered_write_bps_hierarchy",
.data = &sysctl_buffered_write_bps_hierarchy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_RQM
{
.procname = "qos_mbuf_enable",

View File

@ -111,6 +111,15 @@ EXPORT_SYMBOL_GPL(dirty_writeback_interval);
*/
unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
/*
* Support buffer write bps hierarchy
* Enable this will check all parents's limitations
*/
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
unsigned int sysctl_buffered_write_bps_hierarchy;
#endif
/*
* Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
* a full sync is triggered after this time elapses without any disk activity.
@ -1721,6 +1730,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
int ret = 0;
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
struct blkcg *blkcg = get_task_blkcg(current);
struct blkcg *parent_blkcg;
#endif
for (;;) {
@ -1810,7 +1820,7 @@ free_running:
m_intv = ULONG_MAX;
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
if (blkcg_buffered_write_bps(blkcg))
if (blkcg && blkcg_buffered_write_bps_enabled(blkcg))
goto blkcg_bps;
#endif
@ -1894,12 +1904,32 @@ free_running:
RATELIMIT_CALC_SHIFT;
#ifdef CONFIG_BLK_DEV_THROTTLING_CGROUP_V1
if (blkcg_buffered_write_bps(blkcg) &&
if (blkcg && blkcg_buffered_write_bps_enabled(blkcg) &&
task_ratelimit > blkcg_dirty_ratelimit(blkcg)) {
blkcg_bps:
if (likely(sysctl_buffered_write_bps_hierarchy)) {
dirty_ratelimit = blkcg_dirty_ratelimit(blkcg);
parent_blkcg = blkcg;
while (parent_blkcg) {
if (blkcg_buffered_write_bps(parent_blkcg)) {
RUE_CALL_VOID(IO, blkcg_update_bandwidth,
parent_blkcg);
if (dirty_ratelimit > blkcg_dirty_ratelimit(parent_blkcg))
dirty_ratelimit = blkcg_dirty_ratelimit(parent_blkcg);
}
parent_blkcg = blkcg_parent(parent_blkcg);
}
} else {
RUE_CALL_VOID(IO, blkcg_update_bandwidth, blkcg);
dirty_ratelimit = blkcg_dirty_ratelimit(blkcg);
task_ratelimit = dirty_ratelimit;
}
task_ratelimit = dirty_ratelimit;
trace_blkcg_calc_task_ratelimit(blkcg->css.cgroup->kn->name,
blkcg_buffered_write_bps(blkcg), blkcg_dirty_ratelimit(blkcg),
task_ratelimit);
}
#endif