rue/io: introduce wbt class for cgroup priority

One wbt class covers one or more continuous cgroup priority.
Here we introduce 3 wbt classes and wbt_throtl_info for necessary
information.

Integrate wbt_classes into rq_wb struct and introduct wbt_grp
to make wbt support cgroup

In wbt_class_timer_fn function, multi wbt classes in one
request queue. When read latency happened, we need to
coordinate those wbt classes.

We use the following policy:
1. Find the highest class at which any expired read request
happened at last time window, then throttle step by step from
lowest class to this class(excluded).
2. Recorver queue depth from the highest class to lower class.

Fix the compile/run errors for tkernel5:
Upstream da521626ac convert memset() by fill in one by one.
So uninitialized bi_wbt_acct may make wbt_class as 3(WBT_CLASS_NR),
led to NULL pointer return as wbt_throtl_info, then will crash at
wbt_done_bio()

Signed-off-by: Haisu Wang <haisuwang@tencent.com>
Signed-off-by: Lenny Chen <lennychen@tencent.com>
This commit is contained in:
Haisu Wang 2023-09-16 03:04:40 +08:00
parent 701785a3f8
commit 3796036b9f
9 changed files with 1029 additions and 21 deletions

View File

@ -266,6 +266,9 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
#ifdef CONFIG_BLK_CGROUP_IOCOST
bio->bi_iocost_cost = 0;
#endif
#ifdef CONFIG_BLK_WBT
bio->bi_wbt_acct = 0;
#endif
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
bio->bi_crypt_context = NULL;

View File

@ -36,6 +36,7 @@
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"
#include "blk-wbt.h"
static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
@ -2055,6 +2056,10 @@ int blkcg_init_disk(struct gendisk *disk)
if (ret)
goto err_ioprio_exit;
ret = blk_wbt_init(disk);
if (ret)
goto err_ioprio_exit;
return 0;
err_ioprio_exit:

View File

@ -19,11 +19,6 @@ enum rq_qos_id {
RQ_QOS_COST,
};
struct rq_wait {
wait_queue_head_t wait;
atomic_t inflight;
};
struct rq_qos {
const struct rq_qos_ops *ops;
struct gendisk *disk;

View File

@ -10,6 +10,7 @@
#include <linux/backing-dev.h>
#include <linux/blktrace_api.h>
#include <linux/debugfs.h>
#include <linux/rue.h>
#include "blk.h"
#include "blk-mq.h"
@ -546,6 +547,11 @@ QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
#ifdef CONFIG_BLK_CGROUP
QUEUE_RW_ENTRY(queue_wbt_class_lat, "wbt_class_lat_usec");
QUEUE_RW_ENTRY(queue_wbt_class_conf, "wbt_class_conf");
#endif
static struct queue_sysfs_entry queue_nbd_ignore_blksize_set_entry = {
.attr = {.name = "nbd_ignore_blksize_set", .mode = S_IRUGO | S_IWUSR },
.show = queue_nbd_ignore_blksize_set_show,
@ -691,6 +697,10 @@ static struct attribute *blk_mq_queue_attrs[] = {
&queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_WBT
&queue_wb_lat_entry.attr,
#endif
#ifdef CONFIG_BLK_CGROUP
&queue_wbt_class_lat_entry.attr,
&queue_wbt_class_conf_entry.attr,
#endif
NULL,
};

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,14 @@ void wbt_set_write_cache(struct request_queue *, bool);
u64 wbt_default_latency_nsec(struct request_queue *);
#ifdef CONFIG_BLK_CGROUP
ssize_t queue_wbt_class_lat_show(struct request_queue *q, char *page);
ssize_t queue_wbt_class_lat_store(struct request_queue *q, const char *page, size_t count);
ssize_t queue_wbt_class_conf_show(struct request_queue *q, char *page);
ssize_t queue_wbt_class_conf_store(struct request_queue *q, const char *page, size_t count);
int blk_wbt_init(struct gendisk *disk);
#endif
#else
static inline void wbt_disable_default(struct gendisk *disk)
@ -28,6 +36,34 @@ static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
{
}
static inline ssize_t queue_wbt_class_lat_show(struct request_queue *q,
char *page)
{
return 0;
}
static inline ssize_t queue_wbt_class_lat_store(struct request_queue *q,
const char *page, size_t count)
{
return 0;
}
static inline ssize_t queue_wbt_class_conf_show(struct request_queue *q,
char *page)
{
return 0;
}
static inline ssize_t queue_wbt_class_conf_store(struct request_queue *q,
const char *page, size_t count)
{
return 0;
}
static inline int blk_wbt_init(struct gendisk *disk)
{
return 0;
}
#endif /* CONFIG_BLK_WBT */
#endif

View File

@ -410,6 +410,9 @@ struct rue_io_module_ops {
unsigned int (*calc_readwrite_iops_limit)(struct throtl_data *td, struct throtl_grp *tg,
struct blkcg_gq *blkg, int rw, unsigned int ret);
int (*new_dynamic_ratio)(struct throtl_grp *tg);
bool (*throtl_info_scale_up)(struct wbt_throtl_info *ti, bool force_max);
bool (*throtl_info_scale_down)(struct wbt_throtl_info *ti, bool hard_throttle);
void (*throtl_info_calc_limit)(struct wbt_throtl_info *ti);
KABI_RESERVE(1);
KABI_RESERVE(2);

View File

@ -296,6 +296,10 @@ struct bio {
#ifdef CONFIG_BLK_CGROUP_IOCOST
u64 bi_iocost_cost;
#endif
#ifdef CONFIG_BLK_WBT
unsigned int bi_wbt_acct;
#endif
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION

View File

@ -49,7 +49,7 @@ extern unsigned int sysctl_io_qos_enabled __read_mostly;
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
*/
#define BLKCG_MAX_POLS 6
#define BLKCG_MAX_POLS 7
#define DISK_MAX_PARTS 256
#define DISK_NAME_LEN 32
@ -557,6 +557,53 @@ struct request_queue {
bool mq_sysfs_init_done;
};
enum {
WBT_RWQ_BG = 0,
WBT_RWQ_KSWAPD,
WBT_RWQ_DISCARD,
WBT_NUM_RWQ,
};
struct rq_wait {
wait_queue_head_t wait;
atomic_t inflight;
};
struct wbt_throtl_info {
unsigned int max_depth;
unsigned int min_depth;
unsigned int scale_up_percent;
unsigned int scale_down_percent;
unsigned int current_depth;
/*calc by wg_calc_limit from current_depth*/
unsigned int wb_normal, wb_background;
unsigned long last_issue; /* last non-throttled issue */
unsigned long last_comp; /* last non-throttled comp */
struct rq_wait rq_wait[WBT_NUM_RWQ]; /*online focus on write back IO*/
u64 min_lat_nsec; /*user set min lat*/
/*
* used to record how many expired bio happened
* from each cgroup priority for this request queue
*/
atomic64_t read_expired_cnt;
/*debug info*/
atomic64_t tracked_cnt[WBT_NUM_RWQ];
atomic64_t finished_cnt[WBT_NUM_RWQ];
atomic64_t read_cnt;
atomic64_t direct_write_cnt;
atomic64_t escaped_merge_cnt;
atomic64_t wr_sync_cnt;
unsigned int recent_limit;
u64 recent_rd_latency_us;
struct blk_rq_stat __percpu *read_lat_stats;
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
#define QUEUE_FLAG_DYING 1 /* queue being torn down */