rue/io: introduce per mem_cgroup sync interface

Introduce the per cgroup.sync interface, so that we can ensure
that the dirty pages of the cgroup are actually written to the
disk without considering the dirty pages generated elsewhere.
This can avoid the problem of large cgroup exit delay caused
by system-level sync and avoid the problem of IO jitter.

Note:
struct wb_writeback_work moved from fs/fs-writeback.c to
include/linux/writeback.h

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Signed-off-by: Haisu Wang <haisuwang@tencent.com>
This commit is contained in:
Haisu Wang 2023-09-16 02:56:22 +08:00
parent a12bb1a43d
commit 826a0366a1
4 changed files with 52 additions and 21 deletions

View File

@ -29,6 +29,7 @@
#include <linux/tracepoint.h>
#include <linux/device.h>
#include <linux/memcontrol.h>
#include <linux/rue.h>
#include "internal.h"
/*
@ -36,25 +37,6 @@
*/
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned int auto_free:1; /* free on completion */
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
struct wb_completion *done; /* set if the caller waits */
};
/*
* If an inode is constantly having its pages dirtied, but then the
* updates stop dirtytime_expire_interval seconds in the past, it's
@ -157,8 +139,7 @@ static void finish_writeback_work(struct bdi_writeback *wb,
}
}
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);

View File

@ -86,6 +86,7 @@ struct blkcg {
struct rue_io_module_ops {
void (*blkcg_update_bandwidth)(struct blkcg *blkcg);
void (*cgroup_sync)(struct mem_cgroup *memcg);
KABI_RESERVE(1);
KABI_RESERVE(2);

View File

@ -34,6 +34,25 @@ enum writeback_sync_modes {
WB_SYNC_ALL, /* Wait on every mapping */
};
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned int auto_free:1; /* free on completion */
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
struct wb_completion *done; /* set if the caller waits */
};
/*
* A control structure which tells the writeback code what to do. These are
* always on the stack, and hence need no locking. They are always initialised
@ -215,6 +234,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
size_t bytes);
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done);
void cgroup_sync(struct mem_cgroup *memcg);
void cgroup_writeback_umount(void);
bool cleanup_offline_cgwb(struct bdi_writeback *wb);
@ -385,4 +405,6 @@ bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);
void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work);
#endif /* WRITEBACK_H */

View File

@ -6294,6 +6294,23 @@ static int mem_cgroup_bind_blkio_show(struct seq_file *m, void *v)
return 0;
}
static ssize_t mem_cgroup_sync_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
if (mem_cgroup_is_root(memcg))
return -EINVAL;
if (!rue_io_enabled())
return -EPERM;
#ifdef CONFIG_BLK_CGROUP
RUE_CALL_VOID(IO, cgroup_sync, memcg);
#endif
return nbytes;
}
static u64 memory_current_read(struct cgroup_subsys_state *css,
struct cftype *cft);
static int memory_low_show(struct seq_file *m, void *v);
@ -8793,6 +8810,11 @@ static struct cftype memory_files[] = {
.seq_show = memory_async_distance_delta_show,
.write = memory_async_distance_delta_write,
},
{
.name = "sync",
.flags = CFTYPE_NOT_ON_ROOT,
.write = mem_cgroup_sync_write,
},
{ } /* terminate */
};
@ -9785,6 +9807,11 @@ static struct cftype memsw_files[] = {
.write = mem_cgroup_bind_blkio_write,
.seq_show = mem_cgroup_bind_blkio_show,
},
{
.name = "sync",
.flags = CFTYPE_NOT_ON_ROOT,
.write = mem_cgroup_sync_write,
},
{ }, /* terminate */
};