zram: introduce recompress sysfs knob
Allow zram to recompress (using secondary compression streams) pages. Re-compression algorithms (we support up to 3 at this stage) are selected via recomp_algorithm: echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm Please read documentation for more details. We support several recompression modes: 1) IDLE pages recompression is activated by `idle` mode echo "type=idle" > /sys/block/zram0/recompress 2) Since there may be many idle pages user-space may pass a size threshold value (in bytes) and we will recompress pages only of equal or greater size: echo "threshold=888" > /sys/block/zram0/recompress 3) HUGE pages recompression is activated by `huge` mode echo "type=huge" > /sys/block/zram0/recompress 4) HUGE_IDLE pages recompression is activated by `huge_idle` mode echo "type=huge_idle" > /sys/block/zram0/recompress [senozhatsky@chromium.org: we should always zero out err variable in recompress loop[ Link: https://lkml.kernel.org/r/20221110143423.3250790-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20221109115047.2921851-5-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> Acked-by: Minchan Kim <minchan@kernel.org> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Alexey Romanov <avromanov@sberdevices.ru> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Suleiman Souhlal <suleiman@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
5561347aa5
commit
84b33bf788
|
@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING
|
|||
/sys/kernel/debug/zram/zramX/block_state.
|
||||
|
||||
See Documentation/admin-guide/blockdev/zram.rst for more information.
|
||||
|
||||
config ZRAM_MULTI_COMP
|
||||
bool "Enable multiple compression streams"
|
||||
depends on ZRAM
|
||||
help
|
||||
This will enable multi-compression streams, so that ZRAM can
|
||||
re-compress pages using a potentially slower but more effective
|
||||
compression algorithm. Note, that IDLE page recompression
|
||||
requires ZRAM_MEMORY_TRACKING.
|
||||
|
|
|
@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
|
||||
{
|
||||
prio &= ZRAM_COMP_PRIORITY_MASK;
|
||||
/*
|
||||
* Clear previous priority value first, in case if we recompress
|
||||
* further an already recompressed page
|
||||
*/
|
||||
zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
|
||||
ZRAM_COMP_PRIORITY_BIT1);
|
||||
zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
|
||||
}
|
||||
|
||||
static inline u32 zram_get_priority(struct zram *zram, u32 index)
|
||||
{
|
||||
u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
|
||||
|
||||
return prio & ZRAM_COMP_PRIORITY_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if request is within bounds and aligned on zram logical blocks.
|
||||
*/
|
||||
|
@ -1304,6 +1323,11 @@ static void zram_free_page(struct zram *zram, size_t index)
|
|||
atomic64_dec(&zram->stats.huge_pages);
|
||||
}
|
||||
|
||||
if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||
zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
|
||||
|
||||
zram_set_priority(zram, index, 0);
|
||||
|
||||
if (zram_test_flag(zram, index, ZRAM_WB)) {
|
||||
zram_clear_flag(zram, index, ZRAM_WB);
|
||||
free_block_bdev(zram, zram_get_element(zram, index));
|
||||
|
@ -1364,6 +1388,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
|
|||
unsigned long handle;
|
||||
unsigned int size;
|
||||
void *src, *dst;
|
||||
u32 prio;
|
||||
int ret;
|
||||
|
||||
handle = zram_get_handle(zram, index);
|
||||
|
@ -1380,8 +1405,10 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
|
|||
|
||||
size = zram_get_obj_size(zram, index);
|
||||
|
||||
if (size != PAGE_SIZE)
|
||||
zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||
if (size != PAGE_SIZE) {
|
||||
prio = zram_get_priority(zram, index);
|
||||
zstrm = zcomp_stream_get(zram->comps[prio]);
|
||||
}
|
||||
|
||||
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
|
||||
if (size == PAGE_SIZE) {
|
||||
|
@ -1393,7 +1420,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
|
|||
dst = kmap_atomic(page);
|
||||
ret = zcomp_decompress(zstrm, src, size, dst);
|
||||
kunmap_atomic(dst);
|
||||
zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
|
||||
zcomp_stream_put(zram->comps[prio]);
|
||||
}
|
||||
zs_unmap_object(zram->mem_pool, handle);
|
||||
return ret;
|
||||
|
@ -1624,6 +1651,235 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||
/*
|
||||
* This function will decompress (unless it's ZRAM_HUGE) the page and then
|
||||
* attempt to compress it using provided compression algorithm priority
|
||||
* (which is potentially more effective).
|
||||
*
|
||||
* Corresponding ZRAM slot should be locked.
|
||||
*/
|
||||
static int zram_recompress(struct zram *zram, u32 index, struct page *page,
|
||||
u32 threshold, u32 prio, u32 prio_max)
|
||||
{
|
||||
struct zcomp_strm *zstrm = NULL;
|
||||
unsigned long handle_old;
|
||||
unsigned long handle_new;
|
||||
unsigned int comp_len_old;
|
||||
unsigned int comp_len_new;
|
||||
void *src, *dst;
|
||||
int ret;
|
||||
|
||||
handle_old = zram_get_handle(zram, index);
|
||||
if (!handle_old)
|
||||
return -EINVAL;
|
||||
|
||||
comp_len_old = zram_get_obj_size(zram, index);
|
||||
/*
|
||||
* Do not recompress objects that are already "small enough".
|
||||
*/
|
||||
if (comp_len_old < threshold)
|
||||
return 0;
|
||||
|
||||
ret = zram_read_from_zspool(zram, page, index);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Iterate the secondary comp algorithms list (in order of priority)
|
||||
* and try to recompress the page.
|
||||
*/
|
||||
for (; prio < prio_max; prio++) {
|
||||
if (!zram->comps[prio])
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Skip if the object is already re-compressed with a higher
|
||||
* priority algorithm (or same algorithm).
|
||||
*/
|
||||
if (prio <= zram_get_priority(zram, index))
|
||||
continue;
|
||||
|
||||
zstrm = zcomp_stream_get(zram->comps[prio]);
|
||||
src = kmap_atomic(page);
|
||||
ret = zcomp_compress(zstrm, src, &comp_len_new);
|
||||
kunmap_atomic(src);
|
||||
|
||||
if (ret) {
|
||||
zcomp_stream_put(zram->comps[prio]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Continue until we make progress */
|
||||
if (comp_len_new >= huge_class_size ||
|
||||
comp_len_new >= comp_len_old ||
|
||||
(threshold && comp_len_new >= threshold)) {
|
||||
zcomp_stream_put(zram->comps[prio]);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Recompression was successful so break out */
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We did not try to recompress, e.g. when we have only one
|
||||
* secondary algorithm and the page is already recompressed
|
||||
* using that algorithm
|
||||
*/
|
||||
if (!zstrm)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* All secondary algorithms failed to re-compress the page in a way
|
||||
* that would save memory, mark the object as incompressible so that
|
||||
* we will not try to compress it again.
|
||||
*/
|
||||
if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) {
|
||||
zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Successful recompression but above threshold */
|
||||
if (threshold && comp_len_new >= threshold)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* No direct reclaim (slow path) for handle allocation and no
|
||||
* re-compression attempt (unlike in __zram_bvec_write()) since
|
||||
* we already have stored that object in zsmalloc. If we cannot
|
||||
* alloc memory for recompressed object then we bail out and
|
||||
* simply keep the old (existing) object in zsmalloc.
|
||||
*/
|
||||
handle_new = zs_malloc(zram->mem_pool, comp_len_new,
|
||||
__GFP_KSWAPD_RECLAIM |
|
||||
__GFP_NOWARN |
|
||||
__GFP_HIGHMEM |
|
||||
__GFP_MOVABLE);
|
||||
if (IS_ERR_VALUE(handle_new)) {
|
||||
zcomp_stream_put(zram->comps[prio]);
|
||||
return PTR_ERR((void *)handle_new);
|
||||
}
|
||||
|
||||
dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
|
||||
memcpy(dst, zstrm->buffer, comp_len_new);
|
||||
zcomp_stream_put(zram->comps[prio]);
|
||||
|
||||
zs_unmap_object(zram->mem_pool, handle_new);
|
||||
|
||||
zram_free_page(zram, index);
|
||||
zram_set_handle(zram, index, handle_new);
|
||||
zram_set_obj_size(zram, index, comp_len_new);
|
||||
zram_set_priority(zram, index, prio);
|
||||
|
||||
atomic64_add(comp_len_new, &zram->stats.compr_data_size);
|
||||
atomic64_inc(&zram->stats.pages_stored);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define RECOMPRESS_IDLE (1 << 0)
|
||||
#define RECOMPRESS_HUGE (1 << 1)
|
||||
|
||||
static ssize_t recompress_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP;
|
||||
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
|
||||
char *args, *param, *val;
|
||||
unsigned long index;
|
||||
struct page *page;
|
||||
ssize_t ret;
|
||||
|
||||
args = skip_spaces(buf);
|
||||
while (*args) {
|
||||
args = next_arg(args, ¶m, &val);
|
||||
|
||||
if (!*val)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(param, "type")) {
|
||||
if (!strcmp(val, "idle"))
|
||||
mode = RECOMPRESS_IDLE;
|
||||
if (!strcmp(val, "huge"))
|
||||
mode = RECOMPRESS_HUGE;
|
||||
if (!strcmp(val, "huge_idle"))
|
||||
mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(param, "threshold")) {
|
||||
/*
|
||||
* We will re-compress only idle objects equal or
|
||||
* greater in size than watermark.
|
||||
*/
|
||||
ret = kstrtouint(val, 10, &threshold);
|
||||
if (ret)
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (threshold >= PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
if (!init_done(zram)) {
|
||||
ret = -EINVAL;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
ret = len;
|
||||
for (index = 0; index < nr_pages; index++) {
|
||||
int err = 0;
|
||||
|
||||
zram_slot_lock(zram, index);
|
||||
|
||||
if (!zram_allocated(zram, index))
|
||||
goto next;
|
||||
|
||||
if (mode & RECOMPRESS_IDLE &&
|
||||
!zram_test_flag(zram, index, ZRAM_IDLE))
|
||||
goto next;
|
||||
|
||||
if (mode & RECOMPRESS_HUGE &&
|
||||
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||
goto next;
|
||||
|
||||
if (zram_test_flag(zram, index, ZRAM_WB) ||
|
||||
zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
|
||||
zram_test_flag(zram, index, ZRAM_SAME) ||
|
||||
zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||
goto next;
|
||||
|
||||
err = zram_recompress(zram, index, page, threshold,
|
||||
prio, ZRAM_MAX_COMPS);
|
||||
next:
|
||||
zram_slot_unlock(zram, index);
|
||||
if (err) {
|
||||
ret = err;
|
||||
break;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
__free_page(page);
|
||||
|
||||
release_init_lock:
|
||||
up_read(&zram->init_lock);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* zram_bio_discard - handler on discard request
|
||||
* @index: physical block index in PAGE_SIZE units
|
||||
|
@ -2003,6 +2259,7 @@ static DEVICE_ATTR_RW(writeback_limit_enable);
|
|||
#endif
|
||||
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||
static DEVICE_ATTR_RW(recomp_algorithm);
|
||||
static DEVICE_ATTR_WO(recompress);
|
||||
#endif
|
||||
|
||||
static struct attribute *zram_disk_attrs[] = {
|
||||
|
@ -2029,6 +2286,7 @@ static struct attribute *zram_disk_attrs[] = {
|
|||
&dev_attr_debug_stat.attr,
|
||||
#ifdef CONFIG_ZRAM_MULTI_COMP
|
||||
&dev_attr_recomp_algorithm.attr,
|
||||
&dev_attr_recompress.attr,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
|
|
@ -40,6 +40,9 @@
|
|||
*/
|
||||
#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
|
||||
|
||||
/* Only 2 bits are allowed for comp priority index */
|
||||
#define ZRAM_COMP_PRIORITY_MASK 0x3
|
||||
|
||||
/* Flags for zram pages (table[page_no].flags) */
|
||||
enum zram_pageflags {
|
||||
/* zram slot is locked */
|
||||
|
@ -49,6 +52,10 @@ enum zram_pageflags {
|
|||
ZRAM_UNDER_WB, /* page is under writeback */
|
||||
ZRAM_HUGE, /* Incompressible page */
|
||||
ZRAM_IDLE, /* not accessed page since last idle marking */
|
||||
ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
|
||||
|
||||
ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */
|
||||
ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */
|
||||
|
||||
__NR_ZRAM_PAGEFLAGS,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue