Squashfs: add multi-threaded decompression using percpu variable

Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
  * the nature of percpu variables ensures decompression is
    load-balanced across the multiple cores.
  * simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
This commit is contained in:
Phillip Lougher 2013-11-18 02:31:36 +00:00
parent cd59c2ec5f
commit d208383d64
3 changed files with 145 additions and 20 deletions

View File

@ -25,6 +25,50 @@ config SQUASHFS
If unsure, say N. If unsure, say N.
choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
Squashfs now supports three parallelisation options for
decompression. Each one exhibits various trade-offs between
decompression performance and CPU and memory usage.
If in doubt, select "Single threaded compression"
config SQUASHFS_DECOMP_SINGLE
bool "Single threaded compression"
help
Traditionally Squashfs has used single-threaded decompression.
Only one block (data or metadata) can be decompressed at any
one time. This limits CPU and memory usage to a minimum.
config SQUASHFS_DECOMP_MULTI
bool "Use multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.
This decompressor implementation uses up to two parallel
decompressors per core. It dynamically allocates decompressors
on a demand basis.
config SQUASHFS_DECOMP_MULTI_PERCPU
bool "Use percpu multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
This decompressor implementation uses a maximum of one
decompressor per core. It uses percpu variables to ensure
decompression is load-balanced across the cores.
endchoice
config SQUASHFS_XATTR config SQUASHFS_XATTR
bool "Squashfs XATTR support" bool "Squashfs XATTR support"
depends on SQUASHFS depends on SQUASHFS
@ -63,19 +107,6 @@ config SQUASHFS_LZO
If unsure, say N. If unsure, say N.
config SQUASHFS_MULTI_DECOMPRESSOR
bool "Use multiple decompressors for handling parallel I/O"
depends on SQUASHFS
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.
If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.
If unsure, say N.
config SQUASHFS_XZ config SQUASHFS_XZ
bool "Include support for XZ compressed file systems" bool "Include support for XZ compressed file systems"
depends on SQUASHFS depends on SQUASHFS

View File

@ -5,14 +5,10 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o squashfs-y += namei.o super.o symlink.o decompressor.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
squashfs-y += decompressor_multi.o
else
squashfs-y += decompressor_single.o
endif

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2013
* Phillip Lougher <phillip@squashfs.org.uk>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"
/*
* This file implements multi-threaded decompression using percpu
* variables, one thread per cpu core.
*/
struct squashfs_stream {
void *stream;
};
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
struct squashfs_stream __percpu *percpu;
int err, cpu;
percpu = alloc_percpu(struct squashfs_stream);
if (percpu == NULL)
return ERR_PTR(-ENOMEM);
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
stream->stream = msblk->decompressor->init(msblk, comp_opts);
if (IS_ERR(stream->stream)) {
err = PTR_ERR(stream->stream);
goto out;
}
}
kfree(comp_opts);
return (__force void *) percpu;
out:
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
if (!IS_ERR_OR_NULL(stream->stream))
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
return ERR_PTR(err);
}
void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream;
int cpu;
if (msblk->stream) {
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
}
}
int squashfs_decompress(struct squashfs_sb_info *msblk,
void **buffer, struct buffer_head **bh, int b, int offset, int length,
int srclength, int pages)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream = get_cpu_ptr(percpu);
int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
bh, b, offset, length, srclength, pages);
put_cpu_ptr(stream);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
msblk->decompressor->name);
return res;
}
int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}