linux/fs/btrfs/compression.h

/*
 * Copyright (C) 2008 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_

void btrfs_init_compress(void);
void btrfs_exit_compress(void);

int btrfs_compress_pages(int type, struct address_space *mapping,
			 u64 start, unsigned long len,
			 struct page **pages,
			 unsigned long nr_dest_pages,
			 unsigned long *out_pages,
			 unsigned long *total_in,
			 unsigned long *total_out,
			 unsigned long max_out);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
		     unsigned long start_byte, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
			      unsigned long total_out, u64 disk_start,
			      struct bio_vec *bvec, int vcnt,
			      unsigned long *pg_index,
			      unsigned long *pg_offset);

int btrfs_submit_compressed_write(struct inode *inode, u64 start,
				  unsigned long len, u64 disk_start,
				  unsigned long compressed_len,
				  struct page **compressed_pages,
				  unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
				 int mirror_num, unsigned long bio_flags);

struct btrfs_compress_op {
	struct list_head *(*alloc_workspace)(void);

	void (*free_workspace)(struct list_head *workspace);

	int (*compress_pages)(struct list_head *workspace,
			      struct address_space *mapping,
			      u64 start, unsigned long len,
			      struct page **pages,
			      unsigned long nr_dest_pages,
			      unsigned long *out_pages,
			      unsigned long *total_in,
			      unsigned long *total_out,
			      unsigned long max_out);

	int (*decompress_biovec)(struct list_head *workspace,
				 struct page **pages_in,
				 u64 disk_start,
				 struct bio_vec *bvec,
				 int vcnt,
				 size_t srclen);

	int (*decompress)(struct list_head *workspace,
			  unsigned char *data_in,
			  struct page *dest_page,
			  unsigned long start_byte,
			  size_t srclen, size_t destlen);
};

extern struct btrfs_compress_op btrfs_zlib_compress;
extern struct btrfs_compress_op btrfs_lzo_compress;

#endif
Btrfs: Add zlib compression support This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason <chris.mason@oracle.com> 2008-10-30 02:49:59 +08:00			`/*`
			`* Copyright (C) 2008 Oracle. All rights reserved.`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public`
			`* License v2 as published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public`
			`* License along with this program; if not, write to the`
			`* Free Software Foundation, Inc., 59 Temple Place - Suite 330,`
			`* Boston, MA 021110-1307, USA.`
			`*/`

			`#ifndef __BTRFS_COMPRESSION_`
			`#define __BTRFS_COMPRESSION_`

btrfs: return void in functions without error conditions Signed-off-by: Jeff Mahoney <jeffm@suse.com> 2012-03-01 21:56:26 +08:00			`void btrfs_init_compress(void);`
btrfs: Allow to add new compression algorithm Make the code aware of compression type, instead of always assuming zlib compression. Also make the zlib workspace function as common code for all compression types. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-12-17 14:21:50 +08:00			`void btrfs_exit_compress(void);`

			`int btrfs_compress_pages(int type, struct address_space *mapping,`
			`u64 start, unsigned long len,`
			`struct page **pages,`
			`unsigned long nr_dest_pages,`
			`unsigned long *out_pages,`
			`unsigned long *total_in,`
			`unsigned long *total_out,`
			`unsigned long max_out);`
			`int btrfs_decompress(int type, unsigned char data_in, struct page dest_page,`
			`unsigned long start_byte, size_t srclen, size_t destlen);`
btrfs: Extract duplicate decompress code Add a common function to copy decompressed data from working buffer to bio pages. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-11-08 15:22:19 +08:00			`int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,`
			`unsigned long total_out, u64 disk_start,`
			`struct bio_vec *bvec, int vcnt,`
btrfs: rename variables clashing with global function names reported by gcc -Wshadow: page_index, page_offset, new_inode, dev_name Signed-off-by: David Sterba <dsterba@suse.cz> 2011-04-19 20:29:38 +08:00			`unsigned long *pg_index,`
btrfs: Extract duplicate decompress code Add a common function to copy decompressed data from working buffer to bio pages. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-11-08 15:22:19 +08:00			`unsigned long *pg_offset);`
btrfs: Allow to add new compression algorithm Make the code aware of compression type, instead of always assuming zlib compression. Also make the zlib workspace function as common code for all compression types. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-12-17 14:21:50 +08:00
Btrfs: Add zlib compression support This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason <chris.mason@oracle.com> 2008-10-30 02:49:59 +08:00			`int btrfs_submit_compressed_write(struct inode *inode, u64 start,`
			`unsigned long len, u64 disk_start,`
			`unsigned long compressed_len,`
			`struct page **compressed_pages,`
			`unsigned long nr_pages);`
			`int btrfs_submit_compressed_read(struct inode inode, struct bio bio,`
			`int mirror_num, unsigned long bio_flags);`
btrfs: Allow to add new compression algorithm Make the code aware of compression type, instead of always assuming zlib compression. Also make the zlib workspace function as common code for all compression types. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-12-17 14:21:50 +08:00
			`struct btrfs_compress_op {`
			`struct list_head (alloc_workspace)(void);`

			`void (free_workspace)(struct list_head workspace);`

			`int (compress_pages)(struct list_head workspace,`
			`struct address_space *mapping,`
			`u64 start, unsigned long len,`
			`struct page **pages,`
			`unsigned long nr_dest_pages,`
			`unsigned long *out_pages,`
			`unsigned long *total_in,`
			`unsigned long *total_out,`
			`unsigned long max_out);`

			`int (decompress_biovec)(struct list_head workspace,`
			`struct page **pages_in,`
			`u64 disk_start,`
			`struct bio_vec *bvec,`
			`int vcnt,`
			`size_t srclen);`

			`int (decompress)(struct list_head workspace,`
			`unsigned char *data_in,`
			`struct page *dest_page,`
			`unsigned long start_byte,`
			`size_t srclen, size_t destlen);`
			`};`

			`extern struct btrfs_compress_op btrfs_zlib_compress;`
btrfs: Add lzo compression support Lzo is a much faster compression algorithm than gzib, so would allow more users to enable transparent compression, and some users can choose from compression ratio and speed for different applications Usage: # mount -t btrfs -o compress[=<zlib,lzo>] dev /mnt or # mount -t btrfs -o compress-force[=<zlib,lzo>] dev /mnt "-o compress" without argument is still allowed for compatability. Compatibility: If we mount a filesystem with lzo compression, it will not be able be mounted in old kernels. One reason is, otherwise btrfs will directly dump compressed data, which sits in inline extent, to user. Performance: The test copied a linux source tarball (~400M) from an ext4 partition to the btrfs partition, and then extracted it. (time in second) lzo zlib nocompress copy: 10.6 21.7 14.9 extract: 70.1 94.4 66.6 (data size in MB) lzo zlib nocompress copy: 185.87 108.69 394.49 extract: 193.80 132.36 381.21 Changelog: v1 -> v2: - Select LZO_COMPRESS and LZO_DECOMPRESS in btrfs Kconfig. - Add incompability flag. - Fix error handling in compress code. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-10-25 15:12:26 +08:00			`extern struct btrfs_compress_op btrfs_lzo_compress;`
btrfs: Allow to add new compression algorithm Make the code aware of compression type, instead of always assuming zlib compression. Also make the zlib workspace function as common code for all compression types. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> 2010-12-17 14:21:50 +08:00
Btrfs: Add zlib compression support This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason <chris.mason@oracle.com> 2008-10-30 02:49:59 +08:00			`#endif`