Squashfs: add multi-threaded decompression using percpu variable
Add a multi-threaded decompression implementation which uses percpu variables. Using percpu variables has advantages and disadvantages over implementations which do not use percpu variables. Advantages: * the nature of percpu variables ensures decompression is load-balanced across the multiple cores. * simplicity. Disadvantages: it limits decompression to one thread per core. Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
This commit is contained in:
parent
cd59c2ec5f
commit
d208383d64
|
@ -25,6 +25,50 @@ config SQUASHFS
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
choice
|
||||
prompt "Decompressor parallelisation options"
|
||||
depends on SQUASHFS
|
||||
help
|
||||
Squashfs now supports three parallelisation options for
|
||||
decompression. Each one exhibits various trade-offs between
|
||||
decompression performance and CPU and memory usage.
|
||||
|
||||
If in doubt, select "Single threaded compression"
|
||||
|
||||
config SQUASHFS_DECOMP_SINGLE
|
||||
bool "Single threaded compression"
|
||||
help
|
||||
Traditionally Squashfs has used single-threaded decompression.
|
||||
Only one block (data or metadata) can be decompressed at any
|
||||
one time. This limits CPU and memory usage to a minimum.
|
||||
|
||||
config SQUASHFS_DECOMP_MULTI
|
||||
bool "Use multiple decompressors for parallel I/O"
|
||||
help
|
||||
By default Squashfs uses a single decompressor but it gives
|
||||
poor performance on parallel I/O workloads when using multiple CPU
|
||||
machines due to waiting on decompressor availability.
|
||||
|
||||
If you have a parallel I/O workload and your system has enough memory,
|
||||
using this option may improve overall I/O performance.
|
||||
|
||||
This decompressor implementation uses up to two parallel
|
||||
decompressors per core. It dynamically allocates decompressors
|
||||
on a demand basis.
|
||||
|
||||
config SQUASHFS_DECOMP_MULTI_PERCPU
|
||||
bool "Use percpu multiple decompressors for parallel I/O"
|
||||
help
|
||||
By default Squashfs uses a single decompressor but it gives
|
||||
poor performance on parallel I/O workloads when using multiple CPU
|
||||
machines due to waiting on decompressor availability.
|
||||
|
||||
This decompressor implementation uses a maximum of one
|
||||
decompressor per core. It uses percpu variables to ensure
|
||||
decompression is load-balanced across the cores.
|
||||
|
||||
endchoice
|
||||
|
||||
config SQUASHFS_XATTR
|
||||
bool "Squashfs XATTR support"
|
||||
depends on SQUASHFS
|
||||
|
@ -63,19 +107,6 @@ config SQUASHFS_LZO
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config SQUASHFS_MULTI_DECOMPRESSOR
|
||||
bool "Use multiple decompressors for handling parallel I/O"
|
||||
depends on SQUASHFS
|
||||
help
|
||||
By default Squashfs uses a single decompressor but it gives
|
||||
poor performance on parallel I/O workloads when using multiple CPU
|
||||
machines due to waiting on decompressor availability.
|
||||
|
||||
If you have a parallel I/O workload and your system has enough memory,
|
||||
using this option may improve overall I/O performance.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config SQUASHFS_XZ
|
||||
bool "Include support for XZ compressed file systems"
|
||||
depends on SQUASHFS
|
||||
|
|
|
@ -5,14 +5,10 @@
|
|||
obj-$(CONFIG_SQUASHFS) += squashfs.o
|
||||
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
|
||||
squashfs-y += namei.o super.o symlink.o decompressor.o
|
||||
|
||||
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
|
||||
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
|
||||
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
|
||||
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
|
||||
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
|
||||
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
|
||||
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
|
||||
|
||||
ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
|
||||
squashfs-y += decompressor_multi.o
|
||||
else
|
||||
squashfs-y += decompressor_single.o
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright (c) 2013
|
||||
* Phillip Lougher <phillip@squashfs.org.uk>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/buffer_head.h>
|
||||
|
||||
#include "squashfs_fs.h"
|
||||
#include "squashfs_fs_sb.h"
|
||||
#include "decompressor.h"
|
||||
#include "squashfs.h"
|
||||
|
||||
/*
|
||||
* This file implements multi-threaded decompression using percpu
|
||||
* variables, one thread per cpu core.
|
||||
*/
|
||||
|
||||
struct squashfs_stream {
|
||||
void *stream;
|
||||
};
|
||||
|
||||
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
|
||||
void *comp_opts)
|
||||
{
|
||||
struct squashfs_stream *stream;
|
||||
struct squashfs_stream __percpu *percpu;
|
||||
int err, cpu;
|
||||
|
||||
percpu = alloc_percpu(struct squashfs_stream);
|
||||
if (percpu == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
stream = per_cpu_ptr(percpu, cpu);
|
||||
stream->stream = msblk->decompressor->init(msblk, comp_opts);
|
||||
if (IS_ERR(stream->stream)) {
|
||||
err = PTR_ERR(stream->stream);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
kfree(comp_opts);
|
||||
return (__force void *) percpu;
|
||||
|
||||
out:
|
||||
for_each_possible_cpu(cpu) {
|
||||
stream = per_cpu_ptr(percpu, cpu);
|
||||
if (!IS_ERR_OR_NULL(stream->stream))
|
||||
msblk->decompressor->free(stream->stream);
|
||||
}
|
||||
free_percpu(percpu);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
|
||||
{
|
||||
struct squashfs_stream __percpu *percpu =
|
||||
(struct squashfs_stream __percpu *) msblk->stream;
|
||||
struct squashfs_stream *stream;
|
||||
int cpu;
|
||||
|
||||
if (msblk->stream) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
stream = per_cpu_ptr(percpu, cpu);
|
||||
msblk->decompressor->free(stream->stream);
|
||||
}
|
||||
free_percpu(percpu);
|
||||
}
|
||||
}
|
||||
|
||||
int squashfs_decompress(struct squashfs_sb_info *msblk,
|
||||
void **buffer, struct buffer_head **bh, int b, int offset, int length,
|
||||
int srclength, int pages)
|
||||
{
|
||||
struct squashfs_stream __percpu *percpu =
|
||||
(struct squashfs_stream __percpu *) msblk->stream;
|
||||
struct squashfs_stream *stream = get_cpu_ptr(percpu);
|
||||
int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
|
||||
bh, b, offset, length, srclength, pages);
|
||||
put_cpu_ptr(stream);
|
||||
|
||||
if (res < 0)
|
||||
ERROR("%s decompression failed, data probably corrupt\n",
|
||||
msblk->decompressor->name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int squashfs_max_decompressors(void)
|
||||
{
|
||||
return num_possible_cpus();
|
||||
}
|
Loading…
Reference in New Issue