zram: writeback throttle
If there are lots of write IO with flash device, it could have a wearout problem of storage. To overcome the problem, admin needs to design write limitation to guarantee flash health for entire product life. This patch creates a new knob "writeback_limit" for zram. writeback_limit's default value is 0 so that it doesn't limit any writeback. If admin want to measure writeback count in a certain period, he could know it via /sys/block/zram0/bd_stat's 3rd column. If admin want to limit writeback as per-day 400M, he could do it like below. MB_SHIFT=20 4K_SHIFT=12 echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit. If admin want to allow further write again, he could do it like below echo 0 > /sys/block/zram0/writeback_limit If admin want to see remaining writeback budget, cat /sys/block/zram0/writeback_limit The writeback_limit count will reset whenever you reset zram (e.g., system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback happened until you reset the zram to allocate extra writeback budget in next setting is user's job. [minchan@kernel.org: v4] Link: http://lkml.kernel.org/r/20181203024045.153534-8-minchan@kernel.org Link: http://lkml.kernel.org/r/20181127055429.251614-8-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Joey Pabalinas <joeypabalinas@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
23eddf39b2
commit
bb416d18b8
|
@ -121,3 +121,12 @@ Description:
|
|||
The bd_stat file is read-only and represents backing device's
|
||||
statistics (bd_count, bd_reads, bd_writes) in a format
|
||||
similar to block layer statistics file format.
|
||||
|
||||
What: /sys/block/zram<id>/writeback_limit
|
||||
Date: November 2018
|
||||
Contact: Minchan Kim <minchan@kernel.org>
|
||||
Description:
|
||||
The writeback_limit file is read-write and specifies the maximum
|
||||
amount of writeback ZRAM can do. The limit could be changed
|
||||
in run time and "0" means disable the limit.
|
||||
No limit is the initial state.
|
||||
|
|
|
@ -164,6 +164,8 @@ reset WO trigger device reset
|
|||
mem_used_max WO reset the `mem_used_max' counter (see later)
|
||||
mem_limit WO specifies the maximum amount of memory ZRAM can use
|
||||
to store the compressed data
|
||||
writeback_limit WO specifies the maximum amount of write IO zram can
|
||||
write out to backing device as 4KB unit
|
||||
max_comp_streams RW the number of possible concurrent compress operations
|
||||
comp_algorithm RW show and change the compression algorithm
|
||||
compact WO trigger memory compaction
|
||||
|
@ -275,6 +277,35 @@ Admin can request writeback of those idle pages at right timing via
|
|||
|
||||
With the command, zram writeback idle pages from memory to the storage.
|
||||
|
||||
If there are lots of write IO with flash device, potentially, it has
|
||||
flash wearout problem so that admin needs to design write limitation
|
||||
to guarantee storage health for entire product life.
|
||||
To overcome the concern, zram supports "writeback_limit".
|
||||
The "writeback_limit"'s default value is 0 so that it doesn't limit
|
||||
any writeback. If admin want to measure writeback count in a certain
|
||||
period, he could know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||
|
||||
If admin want to limit writeback as per-day 400M, he could do it
|
||||
like below.
|
||||
|
||||
MB_SHIFT=20
|
||||
4K_SHIFT=12
|
||||
echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||
/sys/block/zram0/writeback_limit.
|
||||
|
||||
If admin want to allow further write again, he could do it like below
|
||||
|
||||
echo 0 > /sys/block/zram0/writeback_limit
|
||||
|
||||
If admin want to see remaining writeback budget since he set,
|
||||
|
||||
cat /sys/block/zram0/writeback_limit
|
||||
|
||||
The writeback_limit count will reset whenever you reset zram(e.g.,
|
||||
system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
||||
writeback happened until you reset the zram to allocate extra writeback
|
||||
budget in next setting is user's job.
|
||||
|
||||
= memory tracking
|
||||
|
||||
With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
|
||||
|
|
|
@ -330,6 +330,39 @@ next:
|
|||
}
|
||||
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
static ssize_t writeback_limit_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t len)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
u64 val;
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
if (kstrtoull(buf, 10, &val))
|
||||
return ret;
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
atomic64_set(&zram->stats.bd_wb_limit, val);
|
||||
if (val == 0)
|
||||
zram->stop_writeback = false;
|
||||
up_read(&zram->init_lock);
|
||||
ret = len;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t writeback_limit_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
u64 val;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
val = atomic64_read(&zram->stats.bd_wb_limit);
|
||||
up_read(&zram->init_lock);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
}
|
||||
|
||||
static void reset_bdev(struct zram *zram)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
@ -612,6 +645,11 @@ static ssize_t writeback_store(struct device *dev,
|
|||
bvec.bv_len = PAGE_SIZE;
|
||||
bvec.bv_offset = 0;
|
||||
|
||||
if (zram->stop_writeback) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!blk_idx) {
|
||||
blk_idx = alloc_block_bdev(zram);
|
||||
if (!blk_idx) {
|
||||
|
@ -694,6 +732,11 @@ static ssize_t writeback_store(struct device *dev,
|
|||
zram_set_element(zram, index, blk_idx);
|
||||
blk_idx = 0;
|
||||
atomic64_inc(&zram->stats.pages_stored);
|
||||
if (atomic64_add_unless(&zram->stats.bd_wb_limit,
|
||||
-1 << (PAGE_SHIFT - 12), 0)) {
|
||||
if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
|
||||
zram->stop_writeback = true;
|
||||
}
|
||||
next:
|
||||
zram_slot_unlock(zram, index);
|
||||
}
|
||||
|
@ -1018,6 +1061,7 @@ static ssize_t mm_stat_show(struct device *dev,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
|
||||
static ssize_t bd_stat_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
|
@ -1027,9 +1071,9 @@ static ssize_t bd_stat_show(struct device *dev,
|
|||
down_read(&zram->init_lock);
|
||||
ret = scnprintf(buf, PAGE_SIZE,
|
||||
"%8llu %8llu %8llu\n",
|
||||
(u64)atomic64_read(&zram->stats.bd_count) * (PAGE_SHIFT - 12),
|
||||
(u64)atomic64_read(&zram->stats.bd_reads) * (PAGE_SHIFT - 12),
|
||||
(u64)atomic64_read(&zram->stats.bd_writes) * (PAGE_SHIFT - 12));
|
||||
FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
|
||||
FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
|
||||
FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
|
||||
up_read(&zram->init_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -1767,6 +1811,7 @@ static DEVICE_ATTR_RW(comp_algorithm);
|
|||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
static DEVICE_ATTR_RW(backing_dev);
|
||||
static DEVICE_ATTR_WO(writeback);
|
||||
static DEVICE_ATTR_RW(writeback_limit);
|
||||
#endif
|
||||
|
||||
static struct attribute *zram_disk_attrs[] = {
|
||||
|
@ -1782,6 +1827,7 @@ static struct attribute *zram_disk_attrs[] = {
|
|||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
&dev_attr_backing_dev.attr,
|
||||
&dev_attr_writeback.attr,
|
||||
&dev_attr_writeback_limit.attr,
|
||||
#endif
|
||||
&dev_attr_io_stat.attr,
|
||||
&dev_attr_mm_stat.attr,
|
||||
|
|
|
@ -86,6 +86,7 @@ struct zram_stats {
|
|||
atomic64_t bd_count; /* no. of pages in backing device */
|
||||
atomic64_t bd_reads; /* no. of reads from backing device */
|
||||
atomic64_t bd_writes; /* no. of writes from backing device */
|
||||
atomic64_t bd_wb_limit; /* writeback limit of backing device */
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -113,6 +114,7 @@ struct zram {
|
|||
*/
|
||||
bool claim; /* Protected by bdev->bd_mutex */
|
||||
struct file *backing_dev;
|
||||
bool stop_writeback;
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
struct block_device *bdev;
|
||||
unsigned int old_block_size;
|
||||
|
|
Loading…
Reference in New Issue