erofs: introduce multipage per-CPU buffers
To deal the with the cases which inplace decompression is infeasible for some inplace I/O. Per-CPU buffers was introduced to get rid of page allocation latency and thrash for low-latency decompression algorithms such as lz4. For the big pcluster feature, introduce multipage per-CPU buffers to keep such inplace I/O pclusters temporarily as well but note that per-CPU pages are just consecutive virtually. When a new big pcluster fs is mounted, its max pclustersize will be read and per-CPU buffers can be growed if needed. Shrinking adjustable per-CPU buffers is more complex (because we don't know if such size is still be used), so currently just release them all when unloading. Link: https://lore.kernel.org/r/20210409190630.19569-1-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
This commit is contained in:
parent
54e0b6c873
commit
524887347f
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
obj-$(CONFIG_EROFS_FS) += erofs.o
|
obj-$(CONFIG_EROFS_FS) += erofs.o
|
||||||
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
|
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
|
||||||
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
|
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
|
||||||
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
|
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
|
||||||
|
|
|
@ -47,7 +47,9 @@ int z_erofs_load_lz4_config(struct super_block *sb,
|
||||||
EROFS_SB(sb)->lz4.max_distance_pages = distance ?
|
EROFS_SB(sb)->lz4.max_distance_pages = distance ?
|
||||||
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
|
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
|
||||||
LZ4_MAX_DISTANCE_PAGES;
|
LZ4_MAX_DISTANCE_PAGES;
|
||||||
return 0;
|
|
||||||
|
/* TODO: use max pclusterblks after bigpcluster is enabled */
|
||||||
|
return erofs_pcpubuf_growsize(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
|
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
|
||||||
|
@ -114,7 +116,7 @@ static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
|
||||||
* pages should be copied in order to avoid being overlapped.
|
* pages should be copied in order to avoid being overlapped.
|
||||||
*/
|
*/
|
||||||
struct page **in = rq->in;
|
struct page **in = rq->in;
|
||||||
u8 *const tmp = erofs_get_pcpubuf(0);
|
u8 *const tmp = erofs_get_pcpubuf(1);
|
||||||
u8 *tmpp = tmp;
|
u8 *tmpp = tmp;
|
||||||
unsigned int inlen = rq->inputsize - pageofs_in;
|
unsigned int inlen = rq->inputsize - pageofs_in;
|
||||||
unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
|
unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
|
||||||
|
@ -271,7 +273,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
|
||||||
* compressed data is preferred.
|
* compressed data is preferred.
|
||||||
*/
|
*/
|
||||||
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
|
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
|
||||||
dst = erofs_get_pcpubuf(0);
|
dst = erofs_get_pcpubuf(1);
|
||||||
if (IS_ERR(dst))
|
if (IS_ERR(dst))
|
||||||
return PTR_ERR(dst);
|
return PTR_ERR(dst);
|
||||||
|
|
||||||
|
|
|
@ -197,9 +197,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
|
||||||
|
|
||||||
/* hard limit of pages per compressed cluster */
|
/* hard limit of pages per compressed cluster */
|
||||||
#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
|
#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
|
||||||
#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
|
|
||||||
#else
|
|
||||||
#define EROFS_PCPUBUF_NR_PAGES 0
|
|
||||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||||
|
|
||||||
/* we strictly follow PAGE_SIZE and no buffer head yet */
|
/* we strictly follow PAGE_SIZE and no buffer head yet */
|
||||||
|
@ -405,24 +402,16 @@ int erofs_namei(struct inode *dir, struct qstr *name,
|
||||||
/* dir.c */
|
/* dir.c */
|
||||||
extern const struct file_operations erofs_dir_fops;
|
extern const struct file_operations erofs_dir_fops;
|
||||||
|
|
||||||
|
/* pcpubuf.c */
|
||||||
|
void *erofs_get_pcpubuf(unsigned int requiredpages);
|
||||||
|
void erofs_put_pcpubuf(void *ptr);
|
||||||
|
int erofs_pcpubuf_growsize(unsigned int nrpages);
|
||||||
|
void erofs_pcpubuf_init(void);
|
||||||
|
void erofs_pcpubuf_exit(void);
|
||||||
|
|
||||||
/* utils.c / zdata.c */
|
/* utils.c / zdata.c */
|
||||||
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
|
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
|
||||||
|
|
||||||
#if (EROFS_PCPUBUF_NR_PAGES > 0)
|
|
||||||
void *erofs_get_pcpubuf(unsigned int pagenr);
|
|
||||||
#define erofs_put_pcpubuf(buf) do { \
|
|
||||||
(void)&(buf); \
|
|
||||||
preempt_enable(); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
|
||||||
static inline void *erofs_get_pcpubuf(unsigned int pagenr)
|
|
||||||
{
|
|
||||||
return ERR_PTR(-EOPNOTSUPP);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define erofs_put_pcpubuf(buf) do {} while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_EROFS_FS_ZIP
|
#ifdef CONFIG_EROFS_FS_ZIP
|
||||||
int erofs_workgroup_put(struct erofs_workgroup *grp);
|
int erofs_workgroup_put(struct erofs_workgroup *grp);
|
||||||
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
|
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* Copyright (C) Gao Xiang <xiang@kernel.org>
|
||||||
|
*
|
||||||
|
* For low-latency decompression algorithms (e.g. lz4), reserve consecutive
|
||||||
|
* per-CPU virtual memory (in pages) in advance to store such inplace I/O
|
||||||
|
* data if inplace decompression is failed (due to unmet inplace margin for
|
||||||
|
* example).
|
||||||
|
*/
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
struct erofs_pcpubuf {
|
||||||
|
raw_spinlock_t lock;
|
||||||
|
void *ptr;
|
||||||
|
struct page **pages;
|
||||||
|
unsigned int nrpages;
|
||||||
|
};
|
||||||
|
|
||||||
|
static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
|
||||||
|
|
||||||
|
void *erofs_get_pcpubuf(unsigned int requiredpages)
|
||||||
|
__acquires(pcb->lock)
|
||||||
|
{
|
||||||
|
struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
|
||||||
|
|
||||||
|
raw_spin_lock(&pcb->lock);
|
||||||
|
/* check if the per-CPU buffer is too small */
|
||||||
|
if (requiredpages > pcb->nrpages) {
|
||||||
|
raw_spin_unlock(&pcb->lock);
|
||||||
|
put_cpu_var(erofs_pcb);
|
||||||
|
/* (for sparse checker) pretend pcb->lock is still taken */
|
||||||
|
__acquire(pcb->lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return pcb->ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
|
||||||
|
{
|
||||||
|
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
|
||||||
|
|
||||||
|
DBG_BUGON(pcb->ptr != ptr);
|
||||||
|
raw_spin_unlock(&pcb->lock);
|
||||||
|
put_cpu_var(erofs_pcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the next step: support per-CPU page buffers hotplug */
|
||||||
|
int erofs_pcpubuf_growsize(unsigned int nrpages)
|
||||||
|
{
|
||||||
|
static DEFINE_MUTEX(pcb_resize_mutex);
|
||||||
|
static unsigned int pcb_nrpages;
|
||||||
|
LIST_HEAD(pagepool);
|
||||||
|
int delta, cpu, ret, i;
|
||||||
|
|
||||||
|
mutex_lock(&pcb_resize_mutex);
|
||||||
|
delta = nrpages - pcb_nrpages;
|
||||||
|
ret = 0;
|
||||||
|
/* avoid shrinking pcpubuf, since no idea how many fses rely on */
|
||||||
|
if (delta <= 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||||
|
struct page **pages, **oldpages;
|
||||||
|
void *ptr, *old_ptr;
|
||||||
|
|
||||||
|
pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
|
||||||
|
if (!pages) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nrpages; ++i) {
|
||||||
|
pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
|
||||||
|
if (!pages[i]) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
oldpages = pages;
|
||||||
|
goto free_pagearray;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
|
||||||
|
if (!ptr) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
oldpages = pages;
|
||||||
|
goto free_pagearray;
|
||||||
|
}
|
||||||
|
raw_spin_lock(&pcb->lock);
|
||||||
|
old_ptr = pcb->ptr;
|
||||||
|
pcb->ptr = ptr;
|
||||||
|
oldpages = pcb->pages;
|
||||||
|
pcb->pages = pages;
|
||||||
|
i = pcb->nrpages;
|
||||||
|
pcb->nrpages = nrpages;
|
||||||
|
raw_spin_unlock(&pcb->lock);
|
||||||
|
|
||||||
|
if (!oldpages) {
|
||||||
|
DBG_BUGON(old_ptr);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old_ptr)
|
||||||
|
vunmap(old_ptr);
|
||||||
|
free_pagearray:
|
||||||
|
while (i)
|
||||||
|
list_add(&oldpages[--i]->lru, &pagepool);
|
||||||
|
kfree(oldpages);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pcb_nrpages = nrpages;
|
||||||
|
put_pages_list(&pagepool);
|
||||||
|
out:
|
||||||
|
mutex_unlock(&pcb_resize_mutex);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void erofs_pcpubuf_init(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||||
|
|
||||||
|
raw_spin_lock_init(&pcb->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void erofs_pcpubuf_exit(void)
|
||||||
|
{
|
||||||
|
int cpu, i;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||||
|
|
||||||
|
if (pcb->ptr) {
|
||||||
|
vunmap(pcb->ptr);
|
||||||
|
pcb->ptr = NULL;
|
||||||
|
}
|
||||||
|
if (!pcb->pages)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (i = 0; i < pcb->nrpages; ++i)
|
||||||
|
if (pcb->pages[i])
|
||||||
|
put_page(pcb->pages[i]);
|
||||||
|
kfree(pcb->pages);
|
||||||
|
pcb->pages = NULL;
|
||||||
|
}
|
||||||
|
}
|
|
@ -655,6 +655,7 @@ static int __init erofs_module_init(void)
|
||||||
if (err)
|
if (err)
|
||||||
goto shrinker_err;
|
goto shrinker_err;
|
||||||
|
|
||||||
|
erofs_pcpubuf_init();
|
||||||
err = z_erofs_init_zip_subsystem();
|
err = z_erofs_init_zip_subsystem();
|
||||||
if (err)
|
if (err)
|
||||||
goto zip_err;
|
goto zip_err;
|
||||||
|
@ -684,6 +685,7 @@ static void __exit erofs_module_exit(void)
|
||||||
/* Ensure all RCU free inodes are safe before cache is destroyed. */
|
/* Ensure all RCU free inodes are safe before cache is destroyed. */
|
||||||
rcu_barrier();
|
rcu_barrier();
|
||||||
kmem_cache_destroy(erofs_inode_cachep);
|
kmem_cache_destroy(erofs_inode_cachep);
|
||||||
|
erofs_pcpubuf_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get filesystem statistics */
|
/* get filesystem statistics */
|
||||||
|
|
|
@ -21,18 +21,6 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (EROFS_PCPUBUF_NR_PAGES > 0)
|
|
||||||
static struct {
|
|
||||||
u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
|
|
||||||
} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
|
|
||||||
|
|
||||||
void *erofs_get_pcpubuf(unsigned int pagenr)
|
|
||||||
{
|
|
||||||
preempt_disable();
|
|
||||||
return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_EROFS_FS_ZIP
|
#ifdef CONFIG_EROFS_FS_ZIP
|
||||||
/* global shrink count (for all mounted EROFS instances) */
|
/* global shrink count (for all mounted EROFS instances) */
|
||||||
static atomic_long_t erofs_global_shrink_cnt;
|
static atomic_long_t erofs_global_shrink_cnt;
|
||||||
|
|
Loading…
Reference in New Issue