Squashfs: add multi-threaded decompression using percpu variable

Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
  * the nature of percpu variables ensures decompression is
    load-balanced across the multiple cores.
  * simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
This commit is contained in:
Phillip Lougher 2013-11-18 02:31:36 +00:00
parent cd59c2ec5f
commit d208383d64
3 changed files with 145 additions and 20 deletions

View File

@ -25,6 +25,50 @@ config SQUASHFS
If unsure, say N.
choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
Squashfs now supports three parallelisation options for
decompression. Each one exhibits various trade-offs between
decompression performance and CPU and memory usage.
If in doubt, select "Single threaded compression"
config SQUASHFS_DECOMP_SINGLE
bool "Single threaded compression"
help
Traditionally Squashfs has used single-threaded decompression.
Only one block (data or metadata) can be decompressed at any
one time. This limits CPU and memory usage to a minimum.
config SQUASHFS_DECOMP_MULTI
bool "Use multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.
This decompressor implementation uses up to two parallel
decompressors per core. It dynamically allocates decompressors
on a demand basis.
config SQUASHFS_DECOMP_MULTI_PERCPU
bool "Use percpu multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
This decompressor implementation uses a maximum of one
decompressor per core. It uses percpu variables to ensure
decompression is load-balanced across the cores.
endchoice
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
@ -63,19 +107,6 @@ config SQUASHFS_LZO
If unsure, say N.
config SQUASHFS_MULTI_DECOMPRESSOR
bool "Use multiple decompressors for handling parallel I/O"
depends on SQUASHFS
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.
If unsure, say N.
config SQUASHFS_XZ
bool "Include support for XZ compressed file systems"
depends on SQUASHFS

View File

@ -5,14 +5,10 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
squashfs-y += decompressor_multi.o
else
squashfs-y += decompressor_single.o
endif

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2013
* Phillip Lougher <phillip@squashfs.org.uk>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"
/*
* This file implements multi-threaded decompression using percpu
* variables, one thread per cpu core.
*/
struct squashfs_stream {
void *stream;
};
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
struct squashfs_stream __percpu *percpu;
int err, cpu;
percpu = alloc_percpu(struct squashfs_stream);
if (percpu == NULL)
return ERR_PTR(-ENOMEM);
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
stream->stream = msblk->decompressor->init(msblk, comp_opts);
if (IS_ERR(stream->stream)) {
err = PTR_ERR(stream->stream);
goto out;
}
}
kfree(comp_opts);
return (__force void *) percpu;
out:
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
if (!IS_ERR_OR_NULL(stream->stream))
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
return ERR_PTR(err);
}
void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream;
int cpu;
if (msblk->stream) {
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
}
}
int squashfs_decompress(struct squashfs_sb_info *msblk,
void **buffer, struct buffer_head **bh, int b, int offset, int length,
int srclength, int pages)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream = get_cpu_ptr(percpu);
int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
bh, b, offset, length, srclength, pages);
put_cpu_ptr(stream);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
msblk->decompressor->name);
return res;
}
int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}