for-4.16-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlpvikQACgkQxWXV+ddt
 WDs6qA//ZE7eEH0sKpD4Z+3gUevk/MMXwE9prRijEdjXz/K/UXtvpq0sI7HMQskZ
 Ls9Wmzof+3WEQoa08RQZFzwuclW1Udm09SqE2oHP2gXQB5rC0BtWdrlMaKUJy03y
 NUwxHetbE6TsFLU5HIVmi05NexNx9SVV6oJTWt00RlXTePw9Aoc88ikoXXUE2vqH
 wbH9/ccmM9EkDFxdG+YG5QX054kQV8/5RXdqBJnIiGVRX5ZsAY84AN9x9YoRCVUw
 wq9TfPu6XmeA6Uq6knpeLlXDms5w+FE3n5CduROk7Q7YNgpoZekF20c8uK8HzT4T
 KF8hc0QpQgRCVBJ8I4MbPSMRIDf3IWfZmWSDEDda/6/ep6Bl99b8PFvdDKDBMUct
 8wsgGrwGbHuz2l2QUIXjpBL9Cv9Tbu8vjmg0h2hFrpiH1c8JaXjKtJXAMtigWsZ1
 DdX+5Y0zqvV/YLpzKF4aMDWXIteN4qaznvjdmj3B7BxgcnITOV/cmPCyMplNrtUa
 Cs2fzGV5tpxhBzxE490v+frMULmLq2W1e6WPfFCqPKBCqulcR75TozDQS9M2/h4k
 uAZzVKoguHrUPP1ONVas9aVC05K473nbYPd28eecYwkZ4z32hK4SAdnQY1aPreTe
 axoV7p7a+i1bkzT5LK6gIfpddVWth8w45nz4P0lwxp0Z6XlkbJE=
 =Irul
 -----END PGP SIGNATURE-----

Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Features or user visible changes:

   - fallocate: implement zero range mode

   - avoid losing data raid profile when deleting a device

   - tree item checker: more checks for directory items and xattrs

  Notable fixes:

   - raid56 recovery: don't use cached stripes, that could be
     potentially changed and a later RMW or recovery would lead to
     corruptions or failures

   - let raid56 try harder to rebuild damaged data, reading from all
     stripes if necessary

   - fix scrub to repair raid56 in a similar way as in the case above

  Other:

   - cleanups: device freeing, removed some call indirections, redundant
     bio_put/_get, unused parameters, refactorings and renames

   - RCU list traversal fixups

   - simplify mount callchain, remove recursing back when mounting a
     subvolume

   - plug for fsync, may improve bio merging on multiple devices

   - compression heurisic: replace heap sort with radix sort, gains some
     performance

   - add extent map selftests, buffered write vs dio"

* tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits)
  btrfs: drop devid as device_list_add() arg
  btrfs: get device pointer from device_list_add()
  btrfs: set the total_devices in device_list_add()
  btrfs: move pr_info into device_list_add
  btrfs: make btrfs_free_stale_devices() to match the path
  btrfs: rename btrfs_free_stale_devices() arg to skip_dev
  btrfs: make btrfs_free_stale_devices() argument optional
  btrfs: make btrfs_free_stale_device() to iterate all stales
  btrfs: no need to check for btrfs_fs_devices::seeding
  btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it
  Btrfs: noinline merge_extent_mapping
  Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping
  Btrfs: extent map selftest: dio write vs dio read
  Btrfs: extent map selftest: buffered write vs dio read
  Btrfs: add extent map selftests
  Btrfs: move extent map specific code to extent_map.c
  Btrfs: add helper for em merge logic
  Btrfs: fix unexpected EEXIST from btrfs_get_extent
  Btrfs: fix incorrect block_len in merge_extent_mapping
  btrfs: Remove unused readahead spinlock
  ...
This commit is contained in:
Linus Torvalds 2018-01-29 14:04:23 -08:00
commit 31466f3ed7
47 changed files with 2268 additions and 1406 deletions

View File

@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
tests/free-space-tree-tests.o
tests/free-space-tree-tests.o tests/extent-map-tests.o

View File

@ -216,7 +216,8 @@ static int prelim_ref_compare(struct prelim_ref *ref1,
return 0;
}
void update_share_count(struct share_check *sc, int oldcount, int newcount)
static void update_share_count(struct share_check *sc, int oldcount,
int newcount)
{
if ((!sc) || (oldcount == 0 && newcount < 1))
return;

View File

@ -33,7 +33,6 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
@ -45,6 +44,21 @@
#include "extent_io.h"
#include "extent_map.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
case BTRFS_COMPRESS_LZO:
case BTRFS_COMPRESS_ZSTD:
case BTRFS_COMPRESS_NONE:
return btrfs_compress_types[type];
}
return NULL;
}
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
bio_put(bio);
bio = btrfs_bio_alloc(bdev, first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
first_byte += PAGE_SIZE;
cond_resched();
}
bio_get(bio);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
bio_put(bio);
return 0;
}
@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
bio_put(comp_bio);
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
cur_disk_byte += PAGE_SIZE;
}
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
bio_put(comp_bio);
return 0;
fail2:
@ -752,6 +754,8 @@ struct heuristic_ws {
u32 sample_size;
/* Buckets store counters for each byte value */
struct bucket_item *bucket;
/* Sorting buffer */
struct bucket_item *bucket_b;
struct list_head list;
};
@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)
kvfree(workspace->sample);
kfree(workspace->bucket);
kfree(workspace->bucket_b);
kfree(workspace);
}
@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
if (!ws->bucket)
goto fail;
ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
if (!ws->bucket_b)
goto fail;
INIT_LIST_HEAD(&ws->list);
return &ws->list;
fail:
@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
return entropy_sum * 100 / entropy_max;
}
/* Compare buckets by size, ascending */
static int bucket_comp_rev(const void *lv, const void *rv)
{
const struct bucket_item *l = (const struct bucket_item *)lv;
const struct bucket_item *r = (const struct bucket_item *)rv;
#define RADIX_BASE 4U
#define COUNTERS_SIZE (1U << RADIX_BASE)
return r->count - l->count;
static u8 get4bits(u64 num, int shift) {
u8 low4bits;
num >>= shift;
/* Reverse order */
low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
return low4bits;
}
/*
* Use 4 bits as radix base
* Use 16 u32 counters for calculating new possition in buf array
*
* @array - array that will be sorted
* @array_buf - buffer array to store sorting results
* must be equal in size to @array
* @num - array size
*/
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
int num)
{
u64 max_num;
u64 buf_num;
u32 counters[COUNTERS_SIZE];
u32 new_addr;
u32 addr;
int bitlen;
int shift;
int i;
/*
* Try avoid useless loop iterations for small numbers stored in big
* counters. Example: 48 33 4 ... in 64bit array
*/
max_num = array[0].count;
for (i = 1; i < num; i++) {
buf_num = array[i].count;
if (buf_num > max_num)
max_num = buf_num;
}
buf_num = ilog2(max_num);
bitlen = ALIGN(buf_num, RADIX_BASE * 2);
shift = 0;
while (shift < bitlen) {
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i++) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array_buf[new_addr] = array[i];
}
shift += RADIX_BASE;
/*
* Normal radix expects to move data from a temporary array, to
* the main one. But that requires some CPU time. Avoid that
* by doing another sort iteration to original array instead of
* memcpy()
*/
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i ++) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array[new_addr] = array_buf[i];
}
shift += RADIX_BASE;
}
}
/*
@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;

View File

@ -75,7 +75,7 @@ struct compressed_bio {
u32 sums;
};
void btrfs_init_compress(void);
void __init btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
#endif

View File

@ -1807,8 +1807,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
* simple bin_search frontend that does the right thing for
* leaves vs nodes
*/
static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
if (level == 0)
return generic_bin_search(eb,
@ -1824,12 +1824,6 @@ static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
slot);
}
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
return bin_search(eb, key, level, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
@ -2614,7 +2608,7 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
int level, int *prev_cmp, int *slot)
{
if (*prev_cmp != 0) {
*prev_cmp = bin_search(b, key, level, slot);
*prev_cmp = btrfs_bin_search(b, key, level, slot);
return *prev_cmp;
}
@ -2660,17 +2654,29 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
}
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
* level of the path (level 0)
* btrfs_search_slot - look for a key in a tree and perform necessary
* modifications to preserve tree invariants.
*
* If the key isn't found, the path points to the slot where it should
* be inserted, and 1 is returned. If there are other errors during the
* search a negative error number is returned.
* @trans: Handle of transaction, used when modifying the tree
* @p: Holds all btree nodes along the search path
* @root: The root node of the tree
* @key: The key we are looking for
* @ins_len: Indicates purpose of search, for inserts it is 1, for
* deletions it's -1. 0 for plain searches
* @cow: boolean should CoW operations be performed. Must always be 1
* when modifying the tree.
*
* if ins_len > 0, nodes and leaves will be split as we walk down the
* tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
* possible)
* If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
* If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
*
* If @key is found, 0 is returned and you can find the item in the leaf level
* of the path (level 0)
*
* If @key isn't found, 1 is returned and the leaf level of the path (level 0)
* points to the slot where it should be inserted
*
* If an error is encountered while searching the tree a negative error number
* is returned
*/
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p,
@ -2774,6 +2780,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* contention with the cow code
*/
if (cow) {
bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
/*
* if we don't really need to cow this block
* then we don't want to set the path blocking,
@ -2798,9 +2806,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
btrfs_set_path_blocking(p);
err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1], &b);
if (last_level)
err = btrfs_cow_block(trans, root, b, NULL, 0,
&b);
else
err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
ret = err;
goto done;
@ -5175,7 +5187,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
while (1) {
nritems = btrfs_header_nritems(cur);
level = btrfs_header_level(cur);
sret = bin_search(cur, min_key, level, &slot);
sret = btrfs_bin_search(cur, min_key, level, &slot);
/* at the lowest level, we're done, setup the path and exit */
if (level == path->lowest_level) {

View File

@ -679,7 +679,6 @@ enum btrfs_orphan_cleanup_state {
/* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash {
struct list_head hash_list;
wait_queue_head_t wait;
spinlock_t lock;
};
@ -3060,15 +3059,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 dir,
const char *name, u16 name_len,
int mod);
int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf, int slot,
struct btrfs_dir_item *dir_item);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
const char *name,
int name_len);
bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
unsigned long start, u16 name_len);
/* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@ -3197,7 +3191,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void);
int __init btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
@ -3248,7 +3242,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff);
/* file.c */
int btrfs_auto_defrag_init(void);
int __init btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
@ -3283,7 +3277,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
/* sysfs.c */
int btrfs_init_sysfs(void);
int __init btrfs_init_sysfs(void);
void btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);

View File

@ -1303,40 +1303,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
if (!path)
goto out;
again:
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
goto free_path;
do {
if (atomic_read(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND / 2)
break;
delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
if (!delayed_node)
goto free_path;
delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
if (!delayed_node)
break;
path->leave_spinning = 1;
root = delayed_node->root;
path->leave_spinning = 1;
root = delayed_node->root;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto release_path;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_release_path(path);
btrfs_release_prepared_delayed_node(delayed_node);
total_done++;
continue;
}
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
trans->block_rsv = block_rsv;
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty_nodelay(root->fs_info);
trans->block_rsv = block_rsv;
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty_nodelay(root->fs_info);
release_path:
btrfs_release_path(path);
total_done++;
btrfs_release_path(path);
btrfs_release_prepared_delayed_node(delayed_node);
total_done++;
btrfs_release_prepared_delayed_node(delayed_node);
if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
total_done < async_work->nr)
goto again;
} while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
|| total_done < async_work->nr);
free_path:
btrfs_free_path(path);
out:
wake_up(&delayed_root->wait);
@ -1349,10 +1351,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{
struct btrfs_async_delayed_work *async_work;
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return 0;
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
if (!async_work)
return -ENOMEM;
@ -1388,7 +1386,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return;
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {

View File

@ -937,7 +937,7 @@ void btrfs_delayed_ref_exit(void)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
int __init btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",

View File

@ -203,7 +203,7 @@ extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int btrfs_delayed_ref_init(void);
int __init btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op *

View File

@ -172,7 +172,8 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
dev_replace->tgtdev->commit_bytes_used =
dev_replace->srcdev->commit_bytes_used;
}
dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1;
set_bit(BTRFS_DEV_STATE_REPLACE_TGT,
&dev_replace->tgtdev->dev_state);
btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
dev_replace->tgtdev);
}
@ -304,6 +305,14 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item;
}
static char* btrfs_dev_name(struct btrfs_device *device)
{
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
return "<missing disk>";
else
return rcu_str_deref(device->name);
}
int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
int read_src)
@ -363,8 +372,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s started",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name));
@ -538,8 +546,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
} else {
btrfs_err_in_rcu(fs_info,
"btrfs_scrub_dev(%s, %llu, %s) failed %d",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace, 1);
@ -557,11 +564,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s finished",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name));
tgt_device->is_tgtdev_for_dev_replace = 0;
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state);
tgt_device->devid = src_device->devid;
src_device->devid = BTRFS_DEV_REPLACE_DEVID;
memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
@ -814,12 +820,10 @@ static int btrfs_dev_replace_kthread(void *data)
progress = btrfs_dev_replace_progress(fs_info);
progress = div_u64(progress, 10);
btrfs_info_in_rcu(fs_info,
"continuing dev_replace from %s (devid %llu) to %s @%u%%",
dev_replace->srcdev->missing ? "<missing disk>"
: rcu_str_deref(dev_replace->srcdev->name),
"continuing dev_replace from %s (devid %llu) to target %s @%u%%",
btrfs_dev_name(dev_replace->srcdev),
dev_replace->srcdev->devid,
dev_replace->tgtdev ? rcu_str_deref(dev_replace->tgtdev->name)
: "<missing target disk>",
btrfs_dev_name(dev_replace->tgtdev),
(unsigned int)progress);
btrfs_dev_replace_continue_on_mount(fs_info);

View File

@ -403,8 +403,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
btrfs_dir_data_len(leaf, dir_item);
name_ptr = (unsigned long)(dir_item + 1);
if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item))
return NULL;
if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
return dir_item;
@ -450,109 +448,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
}
return ret;
}
int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
int slot,
struct btrfs_dir_item *dir_item)
{
u16 namelen = BTRFS_NAME_LEN;
int ret;
u8 type = btrfs_dir_type(leaf, dir_item);
if (type >= BTRFS_FT_MAX) {
btrfs_crit(fs_info, "invalid dir item type: %d", (int)type);
return 1;
}
if (type == BTRFS_FT_XATTR)
namelen = XATTR_NAME_MAX;
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
btrfs_crit(fs_info, "invalid dir item name len: %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item));
return 1;
}
namelen = btrfs_dir_name_len(leaf, dir_item);
ret = btrfs_is_name_len_valid(leaf, slot,
(unsigned long)(dir_item + 1), namelen);
if (!ret)
return 1;
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
if ((btrfs_dir_data_len(leaf, dir_item) +
btrfs_dir_name_len(leaf, dir_item)) >
BTRFS_MAX_XATTR_SIZE(fs_info)) {
btrfs_crit(fs_info, "invalid dir item name + data len: %u + %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
return 0;
}
bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
unsigned long start, u16 name_len)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_key key;
u32 read_start;
u32 read_end;
u32 item_start;
u32 item_end;
u32 size;
bool ret = true;
ASSERT(start > BTRFS_LEAF_DATA_OFFSET);
read_start = start - BTRFS_LEAF_DATA_OFFSET;
read_end = read_start + name_len;
item_start = btrfs_item_offset_nr(leaf, slot);
item_end = btrfs_item_end_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
switch (key.type) {
case BTRFS_DIR_ITEM_KEY:
case BTRFS_XATTR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
size = sizeof(struct btrfs_dir_item);
break;
case BTRFS_INODE_REF_KEY:
size = sizeof(struct btrfs_inode_ref);
break;
case BTRFS_INODE_EXTREF_KEY:
size = sizeof(struct btrfs_inode_extref);
break;
case BTRFS_ROOT_REF_KEY:
case BTRFS_ROOT_BACKREF_KEY:
size = sizeof(struct btrfs_root_ref);
break;
default:
ret = false;
goto out;
}
if (read_start < item_start) {
ret = false;
goto out;
}
if (read_end > item_end) {
ret = false;
goto out;
}
/* there shall be item(s) before name */
if (read_start - item_start < size) {
ret = false;
goto out;
}
out:
if (!ret)
btrfs_crit(fs_info, "invalid dir item name len: %u",
(unsigned int)name_len);
return ret;
}

View File

@ -61,7 +61,8 @@
BTRFS_HEADER_FLAG_RELOC |\
BTRFS_SUPER_FLAG_ERROR |\
BTRFS_SUPER_FLAG_SEEDING |\
BTRFS_SUPER_FLAG_METADUMP)
BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@ -220,7 +221,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
*/
static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
@ -285,7 +286,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
int verify)
{
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
char *result = NULL;
char result[BTRFS_CSUM_SIZE];
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
@ -294,7 +295,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
unsigned long map_len;
int err;
u32 crc = ~(u32)0;
unsigned long inline_result;
len = buf->len - offset;
while (len > 0) {
@ -308,13 +308,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
len -= cur_len;
offset += cur_len;
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size, GFP_NOFS);
if (!result)
return -ENOMEM;
} else {
result = (char *)&inline_result;
}
memset(result, 0, BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
@ -329,15 +323,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
"%s checksum verify failed on %llu wanted %X found %X level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
}
if (result != (char *)&inline_result)
kfree(result);
return 0;
}
@ -391,7 +382,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
clear_extent_buffer_uptodate(eb);
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
&cached_state);
if (need_lock)
btrfs_tree_read_unlock_blocking(eb);
return ret;
@ -455,7 +446,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
btree_get_extent, mirror_num);
mirror_num);
if (!ret) {
if (!verify_parent_transid(io_tree, eb,
parent_transid, 0))
@ -1012,7 +1003,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, WAIT_NONE, btree_get_extent, 0);
buf, WAIT_NONE, 0);
free_extent_buffer(buf);
}
@ -1031,7 +1022,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
btree_get_extent, mirror_num);
mirror_num);
if (ret) {
free_extent_buffer(buf);
return ret;
@ -1243,7 +1234,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
uuid_le uuid;
uuid_le uuid = NULL_UUID_LE;
root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
@ -1284,7 +1275,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
uuid_le_gen(&uuid);
if (is_fstree(objectid))
uuid_le_gen(&uuid);
memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0;
@ -2875,7 +2867,7 @@ int open_ctree(struct super_block *sb,
goto fail_sysfs;
}
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) {
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"writeable mount is not allowed due to too many missing devices");
goto fail_sysfs;
@ -3357,7 +3349,7 @@ static void write_dev_flush(struct btrfs_device *device)
bio->bi_private = &device->flush_wait;
btrfsic_submit_bio(bio);
device->flush_bio_sent = 1;
set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
}
/*
@ -3367,10 +3359,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
{
struct bio *bio = device->flush_bio;
if (!device->flush_bio_sent)
if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
return BLK_STS_OK;
device->flush_bio_sent = 0;
clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
wait_for_completion_io(&device->flush_wait);
return bio->bi_status;
@ -3378,7 +3370,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
static int check_barrier_error(struct btrfs_fs_info *fs_info)
{
if (!btrfs_check_rw_degradable(fs_info))
if (!btrfs_check_rw_degradable(fs_info, NULL))
return -EIO;
return 0;
}
@ -3394,14 +3386,16 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
int errors_wait = 0;
blk_status_t ret;
lockdep_assert_held(&info->fs_devices->device_list_mutex);
/* send down all the barriers */
head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
if (dev->missing)
list_for_each_entry(dev, head, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->bdev)
continue;
if (!dev->in_fs_metadata || !dev->writeable)
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
write_dev_flush(dev);
@ -3409,14 +3403,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
}
/* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) {
if (dev->missing)
list_for_each_entry(dev, head, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->bdev) {
errors_wait++;
continue;
}
if (!dev->in_fs_metadata || !dev->writeable)
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
ret = wait_dev_flush(dev);
@ -3508,12 +3503,13 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
}
}
list_for_each_entry_rcu(dev, head, dev_list) {
list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
total_errors++;
continue;
}
if (!dev->in_fs_metadata || !dev->writeable)
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
btrfs_set_stack_device_generation(dev_item, 0);
@ -3549,10 +3545,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
}
total_errors = 0;
list_for_each_entry_rcu(dev, head, dev_list) {
list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev)
continue;
if (!dev->in_fs_metadata || !dev->writeable)
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
ret = wait_dev_supers(dev, max_mirrors);
@ -3910,9 +3907,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL;
}
if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
btrfs_warn(fs_info, "unrecognized super flag: %llu",
if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
ret = -EINVAL;
}
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);

View File

@ -149,6 +149,9 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create);
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int __init btrfs_end_io_wq_init(void);
void btrfs_end_io_wq_exit(void);

View File

@ -283,11 +283,6 @@ static int btrfs_get_name(struct dentry *parent, char *name,
name_len = btrfs_inode_ref_name_len(leaf, iref);
}
ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len);
if (!ret) {
btrfs_free_path(path);
return -EIO;
}
read_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_free_path(path);

View File

@ -2145,7 +2145,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
if (!stripe->dev->can_discard)
struct request_queue *req_q;
req_q = bdev_get_queue(stripe->dev->bdev);
if (!blk_queue_discard(req_q))
continue;
ret = btrfs_issue_discard(stripe->dev->bdev,
@ -2894,7 +2897,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *global_rsv;
u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
u64 num_bytes, num_dirty_bgs_bytes;
int ret = 0;
@ -4945,12 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
bytes = 0;
else
bytes -= delayed_rsv->size;
spin_unlock(&delayed_rsv->lock);
if (percpu_counter_compare(&space_info->total_bytes_pinned,
bytes) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
spin_unlock(&delayed_rsv->lock);
commit:
trans = btrfs_join_transaction(fs_info->extent_root);
@ -5738,8 +5741,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
* or return if we already have enough space. This will also handle the resreve
* tracepoint for the reserved amount.
*/
int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
enum btrfs_reserve_flush_enum flush)
static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
@ -5770,7 +5773,7 @@ int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
* This is the same as btrfs_block_rsv_release, except that it handles the
* tracepoint for the reservation.
*/
void btrfs_inode_rsv_release(struct btrfs_inode *inode)
static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
@ -9690,7 +9693,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
* space to fit our block group in.
*/
if (device->total_bytes > device->bytes_used + min_free &&
!device->is_tgtdev_for_dev_replace) {
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
ret = find_free_dev_extent(trans, device, min_free,
&dev_offset, NULL);
if (!ret)
@ -10875,7 +10878,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
*trimmed = 0;
/* Not writeable = nothing to do. */
if (!device->writeable)
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
return 0;
/* No free space = nothing to do. */

View File

@ -21,6 +21,7 @@
#include "locking.h"
#include "rcu-string.h"
#include "backref.h"
#include "disk-io.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@ -109,8 +110,6 @@ struct tree_entry {
struct extent_page_data {
struct bio *bio;
struct extent_io_tree *tree;
get_extent_t *get_extent;
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
@ -139,7 +138,8 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits,
BUG_ON(ret < 0);
}
static noinline void flush_write_bio(void *data);
static void flush_write_bio(struct extent_page_data *epd);
static inline struct btrfs_fs_info *
tree_fs_info(struct extent_io_tree *tree)
{
@ -581,7 +581,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
*
* This takes the tree lock, and returns 0 on success and < 0 on error.
*/
static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached_state,
gfp_t mask, struct extent_changeset *changeset)
@ -1295,10 +1295,10 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask)
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, bits, wake, delete,
cached, mask, NULL);
cached, GFP_NOFS, NULL);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@ -1348,7 +1348,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
EXTENT_LOCKED, 1, 0, NULL);
return 0;
}
return 1;
@ -1648,7 +1648,7 @@ STATIC u64 find_lock_delalloc_range(struct inode *inode,
EXTENT_DELALLOC, 1, cached_state);
if (!ret) {
unlock_extent_cached(tree, delalloc_start, delalloc_end,
&cached_state, GFP_NOFS);
&cached_state);
__unlock_for_delalloc(inode, locked_page,
delalloc_start, delalloc_end);
cond_resched();
@ -1744,7 +1744,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
unsigned long page_ops)
{
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
NULL, GFP_NOFS);
NULL);
__process_pages_contig(inode->i_mapping, locked_page,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
@ -2027,7 +2027,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[bbio->mirror_num - 1].dev;
btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
if (!dev || !dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
@ -2493,7 +2494,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
if (uptodate && tree->track_uptodate)
set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
unlock_extent_cached_atomic(tree, start, end, &cached);
}
/*
@ -2733,7 +2734,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL;
bio_get(bio);
if (tree->ops)
ret = tree->ops->submit_bio_hook(tree->private_data, bio,
@ -2741,7 +2741,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
else
btrfsic_submit_bio(bio);
bio_put(bio);
return blk_status_to_errno(ret);
}
@ -2943,8 +2942,7 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
cur + iosize - 1, &cached);
break;
}
em = __get_extent_map(inode, page, pg_offset, cur,
@ -3037,8 +3035,7 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
cur + iosize - 1, &cached);
cur = cur + iosize;
pg_offset += iosize;
continue;
@ -3093,9 +3090,8 @@ static int __do_readpage(struct extent_io_tree *tree,
static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
struct page *pages[], int nr_pages,
u64 start, u64 end,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
struct bio **bio,
unsigned long *bio_flags,
u64 *prev_em_start)
{
@ -3116,18 +3112,17 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
}
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
mirror_num, bio_flags, 0, prev_em_start);
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
bio, 0, bio_flags, 0, prev_em_start);
put_page(pages[index]);
}
}
static void __extent_readpages(struct extent_io_tree *tree,
struct page *pages[],
int nr_pages, get_extent_t *get_extent,
int nr_pages,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
unsigned long *bio_flags,
struct bio **bio, unsigned long *bio_flags,
u64 *prev_em_start)
{
u64 start = 0;
@ -3147,8 +3142,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
} else {
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached,
bio, mirror_num, bio_flags,
end, em_cached,
bio, bio_flags,
prev_em_start);
start = page_start;
end = start + PAGE_SIZE - 1;
@ -3159,9 +3154,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
if (end)
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached, bio,
mirror_num, bio_flags,
prev_em_start);
end, em_cached, bio,
bio_flags, prev_em_start);
}
static int __extent_read_full_page(struct extent_io_tree *tree,
@ -3376,7 +3370,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page_end, NULL, 1);
break;
}
em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
end - cur + 1, 1);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
@ -3459,10 +3453,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
* and the end_io handler clears the writeback ranges
*/
static int __extent_writepage(struct page *page, struct writeback_control *wbc,
void *data)
struct extent_page_data *epd)
{
struct inode *inode = page->mapping->host;
struct extent_page_data *epd = data;
u64 start = page_offset(page);
u64 page_end = start + PAGE_SIZE - 1;
int ret;
@ -3896,8 +3889,7 @@ int btree_write_cache_pages(struct address_space *mapping,
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @writepage: function called for each page
* @data: data passed to writepage function
* @data: data passed to __extent_writepage function
*
* If a page is already under I/O, write_cache_pages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
@ -3909,8 +3901,7 @@ int btree_write_cache_pages(struct address_space *mapping,
*/
static int extent_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
struct extent_page_data *epd)
{
struct inode *inode = mapping->host;
int ret = 0;
@ -3974,7 +3965,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
* mapping
*/
if (!trylock_page(page)) {
flush_fn(data);
flush_write_bio(epd);
lock_page(page);
}
@ -3985,7 +3976,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
if (wbc->sync_mode != WB_SYNC_NONE) {
if (PageWriteback(page))
flush_fn(data);
flush_write_bio(epd);
wait_on_page_writeback(page);
}
@ -3995,7 +3986,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
continue;
}
ret = (*writepage)(page, wbc, data);
ret = __extent_writepage(page, wbc, epd);
if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
unlock_page(page);
@ -4043,7 +4034,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
return ret;
}
static void flush_epd_write_bio(struct extent_page_data *epd)
static void flush_write_bio(struct extent_page_data *epd)
{
if (epd->bio) {
int ret;
@ -4054,37 +4045,28 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
}
}
static noinline void flush_write_bio(void *data)
{
struct extent_page_data *epd = data;
flush_epd_write_bio(epd);
}
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
int extent_write_full_page(struct page *page, struct writeback_control *wbc)
{
int ret;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
.tree = &BTRFS_I(page->mapping->host)->io_tree,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = __extent_writepage(page, wbc, &epd);
flush_epd_write_bio(&epd);
flush_write_bio(&epd);
return ret;
}
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
u64 start, u64 end, get_extent_t *get_extent,
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode)
{
int ret = 0;
struct address_space *mapping = inode->i_mapping;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *page;
unsigned long nr_pages = (end - start + PAGE_SIZE) >>
PAGE_SHIFT;
@ -4092,7 +4074,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL,
};
@ -4118,34 +4099,30 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_SIZE;
}
flush_epd_write_bio(&epd);
flush_write_bio(&epd);
return ret;
}
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc)
{
int ret = 0;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
flush_write_bio);
flush_epd_write_bio(&epd);
ret = extent_write_cache_pages(mapping, wbc, &epd);
flush_write_bio(&epd);
return ret;
}
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages,
get_extent_t get_extent)
struct list_head *pages, unsigned nr_pages)
{
struct bio *bio = NULL;
unsigned page_idx;
@ -4171,13 +4148,13 @@ int extent_readpages(struct extent_io_tree *tree,
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
&bio, 0, &bio_flags, &prev_em_start);
__extent_readpages(tree, pagepool, nr, &em_cached, &bio,
&bio_flags, &prev_em_start);
nr = 0;
}
if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
&bio, 0, &bio_flags, &prev_em_start);
__extent_readpages(tree, pagepool, nr, &em_cached, &bio,
&bio_flags, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
@ -4210,7 +4187,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING,
1, 1, &cached_state, GFP_NOFS);
1, 1, &cached_state);
return 0;
}
@ -4235,9 +4212,9 @@ static int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
*/
ret = clear_extent_bit(tree, start, end,
ret = __clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM),
0, 0, NULL, mask);
0, 0, NULL, mask, NULL);
/* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue.
@ -4303,9 +4280,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
* This maps until we find something past 'last'
*/
static struct extent_map *get_extent_skip_holes(struct inode *inode,
u64 offset,
u64 last,
get_extent_t *get_extent)
u64 offset, u64 last)
{
u64 sectorsize = btrfs_inode_sectorsize(inode);
struct extent_map *em;
@ -4319,15 +4294,14 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (len == 0)
break;
len = ALIGN(len, sectorsize);
em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
len, 0);
if (IS_ERR_OR_NULL(em))
return em;
/* if this isn't a hole return it */
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
em->block_start != EXTENT_MAP_HOLE) {
if (em->block_start != EXTENT_MAP_HOLE)
return em;
}
/* this is a hole, advance to the next extent */
offset = extent_map_end(em);
@ -4452,7 +4426,7 @@ static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
}
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
__u64 start, __u64 len)
{
int ret = 0;
u64 off = start;
@ -4534,8 +4508,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
get_extent);
em = get_extent_skip_holes(inode, start, last_for_get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@ -4623,8 +4596,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
/* now scan forward to see if this is really the last extent. */
em = get_extent_skip_holes(inode, off, last_for_get_extent,
get_extent);
em = get_extent_skip_holes(inode, off, last_for_get_extent);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@ -4648,7 +4620,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out:
btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
&cached_state);
return ret;
}
@ -5264,8 +5236,7 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
}
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait,
get_extent_t *get_extent, int mirror_num)
struct extent_buffer *eb, int wait, int mirror_num)
{
unsigned long i;
struct page *page;
@ -5325,7 +5296,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
btree_get_extent, &bio,
mirror_num, &bio_flags,
REQ_META);
if (err) {

View File

@ -300,19 +300,29 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
GFP_NOFS);
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
}
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached, gfp_t mask)
u64 end, struct extent_state **cached)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
mask);
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
u64 start, u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_ATOMIC, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
@ -323,8 +333,7 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
if (bits & EXTENT_LOCKED)
wake = 1;
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL,
GFP_NOFS);
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@ -340,10 +349,10 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
u64 end, struct extent_state **cached_state)
{
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask);
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
@ -358,7 +367,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
EXTENT_DO_ACCOUNTING, 0, 0, NULL);
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
@ -401,24 +410,19 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
struct extent_state **cached_state);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc);
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
u64 start, u64 end, get_extent_t *get_extent,
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages,
get_extent_t get_extent);
struct list_head *pages, unsigned nr_pages);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
@ -437,7 +441,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait,
get_extent_t *get_extent, int mirror_num);
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len)

View File

@ -454,3 +454,135 @@ void replace_extent_mapping(struct extent_map_tree *tree,
setup_extent_mapping(tree, new, modified);
}
static struct extent_map *next_extent_map(struct extent_map *em)
{
struct rb_node *next;
next = rb_next(&em->rb_node);
if (!next)
return NULL;
return container_of(next, struct extent_map, rb_node);
}
static struct extent_map *prev_extent_map(struct extent_map *em)
{
struct rb_node *prev;
prev = rb_prev(&em->rb_node);
if (!prev)
return NULL;
return container_of(prev, struct extent_map, rb_node);
}
/* helper for btfs_get_extent. Given an existing extent in the tree,
* the existing extent is the nearest extent to map_start,
* and an extent that you want to insert, deal with overlap and insert
* the best fitted new extent into the tree.
*/
static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em,
u64 map_start)
{
struct extent_map *prev;
struct extent_map *next;
u64 start;
u64 end;
u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
if (existing->start > map_start) {
next = existing;
prev = prev_extent_map(next);
} else {
prev = existing;
next = next_extent_map(prev);
}
start = prev ? extent_map_end(prev) : em->start;
start = max_t(u64, start, em->start);
end = next ? next->start : extent_map_end(em);
end = min_t(u64, end, extent_map_end(em));
start_diff = start - em->start;
em->start = start;
em->len = end - start;
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff;
em->block_len = em->len;
}
return add_extent_mapping(em_tree, em, 0);
}
/**
* btrfs_add_extent_mapping - add extent mapping into em_tree
* @em_tree - the extent tree into which we want to insert the extent mapping
* @em_in - extent we are inserting
* @start - start of the logical range btrfs_get_extent() is requesting
* @len - length of the logical range btrfs_get_extent() is requesting
*
* Note that @em_in's range may be different from [start, start+len),
* but they must be overlapped.
*
* Insert @em_in into @em_tree. In case there is an overlapping range, handle
* the -EEXIST by either:
* a) Returning the existing extent in @em_in if @start is within the
* existing em.
* b) Merge the existing extent with @em_in passed in.
*
* Return 0 on success, otherwise -EEXIST.
*
*/
int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len)
{
int ret;
struct extent_map *em = *em_in;
ret = add_extent_mapping(em_tree, em, 0);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
*/
if (ret == -EEXIST) {
struct extent_map *existing;
ret = 0;
existing = search_extent_mapping(em_tree, start, len);
/*
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.
*/
if (start >= existing->start &&
start < extent_map_end(existing)) {
free_extent_map(em);
*em_in = existing;
ret = 0;
} else {
u64 orig_start = em->start;
u64 orig_len = em->len;
/*
* The existing extent map is the one nearest to
* the [start, start + len) range which overlaps
*/
ret = merge_extent_mapping(em_tree, existing,
em, start);
if (ret) {
free_extent_map(em);
*em_in = NULL;
WARN_ONCE(ret,
"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
ret, existing->start, existing->len,
orig_start, orig_len);
}
free_extent_map(existing);
}
}
ASSERT(ret == 0 || ret == -EEXIST);
return ret;
}

View File

@ -13,7 +13,6 @@
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
#define EXTENT_FLAG_COMPRESSED 1
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
@ -92,4 +91,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len);
#endif

View File

@ -1505,7 +1505,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
ordered->file_offset + ordered->len > start_pos &&
ordered->file_offset <= last_pos) {
unlock_extent_cached(&inode->io_tree, start_pos,
last_pos, cached_state, GFP_NOFS);
last_pos, cached_state);
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
put_page(pages[i]);
@ -1520,7 +1520,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
clear_extent_bit(&inode->io_tree, start_pos, last_pos,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached_state, GFP_NOFS);
0, 0, cached_state);
*lockstart = start_pos;
*lockend = last_pos;
ret = 1;
@ -1756,11 +1756,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, NULL);
pos, copied, &cached_state);
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
GFP_NOFS);
lockstart, lockend, &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
@ -2020,10 +2019,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
{
int ret;
struct blk_plug plug;
/*
* This is only called in fsync, which would do synchronous writes, so
* a plug can merge adjacent IOs as much as possible. Esp. in case of
* multiple disks using raid profile, a large IO can be split to
* several segments of stripe length (currently 64K).
*/
blk_start_plug(&plug);
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
blk_finish_plug(&plug);
return ret;
}
@ -2451,6 +2459,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
return ret;
}
static int btrfs_punch_hole_lock_range(struct inode *inode,
const u64 lockstart,
const u64 lockend,
struct extent_state **cached_state)
{
while (1) {
struct btrfs_ordered_extent *ordered;
int ret;
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
* We need to make sure we have no ordered extents in this range
* and nobody raced in and read a page in this range, if we did
* we need to try again.
*/
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, cached_state);
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret)
return ret;
}
return 0;
}
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@ -2567,38 +2615,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex;
}
while (1) {
struct btrfs_ordered_extent *ordered;
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
* We need to make sure we have no ordered extents in this range
* and nobody raced in and read a page in this range, if we did
* we need to try again.
*/
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret) {
inode_unlock(inode);
return ret;
}
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
&cached_state);
if (ret) {
inode_unlock(inode);
goto out_only_mutex;
}
path = btrfs_alloc_path();
@ -2743,7 +2764,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_free_block_rsv(fs_info, rsv);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
&cached_state);
out_only_mutex:
if (!updated_inode && truncated_block && !ret && !err) {
/*
@ -2807,6 +2828,234 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
return 0;
}
static int btrfs_fallocate_update_isize(struct inode *inode,
const u64 end,
const int mode)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
int ret2;
if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
return 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
inode->i_ctime = current_time(inode);
i_size_write(inode, end);
btrfs_ordered_update_i_size(inode, end, NULL);
ret = btrfs_update_inode(trans, root, inode);
ret2 = btrfs_end_transaction(trans);
return ret ? ret : ret2;
}
enum {
RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
RANGE_BOUNDARY_HOLE = 2,
};
static int btrfs_zero_range_check_range_boundary(struct inode *inode,
u64 offset)
{
const u64 sectorsize = btrfs_inode_sectorsize(inode);
struct extent_map *em;
int ret;
offset = round_down(offset, sectorsize);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em))
return PTR_ERR(em);
if (em->block_start == EXTENT_MAP_HOLE)
ret = RANGE_BOUNDARY_HOLE;
else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
else
ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
free_extent_map(em);
return ret;
}
static int btrfs_zero_range(struct inode *inode,
loff_t offset,
loff_t len,
const int mode)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_map *em;
struct extent_changeset *data_reserved = NULL;
int ret;
u64 alloc_hint = 0;
const u64 sectorsize = btrfs_inode_sectorsize(inode);
u64 alloc_start = round_down(offset, sectorsize);
u64 alloc_end = round_up(offset + len, sectorsize);
u64 bytes_to_reserve = 0;
bool space_reserved = false;
inode_dio_wait(inode);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
alloc_start, alloc_end - alloc_start, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
/*
* Avoid hole punching and extent allocation for some cases. More cases
* could be considered, but these are unlikely common and we keep things
* as simple as possible for now. Also, intentionally, if the target
* range contains one or more prealloc extents together with regular
* extents and holes, we drop all the existing extents and allocate a
* new prealloc extent, so that we get a larger contiguous disk extent.
*/
if (em->start <= alloc_start &&
test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
const u64 em_end = em->start + em->len;
if (em_end >= offset + len) {
/*
* The whole range is already a prealloc extent,
* do nothing except updating the inode's i_size if
* needed.
*/
free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
}
/*
* Part of the range is already a prealloc extent, so operate
* only on the remaining part of the range.
*/
alloc_start = em_end;
ASSERT(IS_ALIGNED(alloc_start, sectorsize));
len = offset + len - alloc_start;
offset = alloc_start;
alloc_hint = em->block_start + em->len;
}
free_extent_map(em);
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
alloc_start, sectorsize, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
}
if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
free_extent_map(em);
ret = btrfs_truncate_block(inode, offset, len, 0);
if (!ret)
ret = btrfs_fallocate_update_isize(inode,
offset + len,
mode);
return ret;
}
free_extent_map(em);
alloc_start = round_down(offset, sectorsize);
alloc_end = alloc_start + sectorsize;
goto reserve_space;
}
alloc_start = round_up(offset, sectorsize);
alloc_end = round_down(offset + len, sectorsize);
/*
* For unaligned ranges, check the pages at the boundaries, they might
* map to an extent, in which case we need to partially zero them, or
* they might map to a hole, in which case we need our allocation range
* to cover them.
*/
if (!IS_ALIGNED(offset, sectorsize)) {
ret = btrfs_zero_range_check_range_boundary(inode, offset);
if (ret < 0)
goto out;
if (ret == RANGE_BOUNDARY_HOLE) {
alloc_start = round_down(offset, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
ret = btrfs_truncate_block(inode, offset, 0, 0);
if (ret)
goto out;
} else {
ret = 0;
}
}
if (!IS_ALIGNED(offset + len, sectorsize)) {
ret = btrfs_zero_range_check_range_boundary(inode,
offset + len);
if (ret < 0)
goto out;
if (ret == RANGE_BOUNDARY_HOLE) {
alloc_end = round_up(offset + len, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
ret = btrfs_truncate_block(inode, offset + len, 0, 1);
if (ret)
goto out;
} else {
ret = 0;
}
}
reserve_space:
if (alloc_start < alloc_end) {
struct extent_state *cached_state = NULL;
const u64 lockstart = alloc_start;
const u64 lockend = alloc_end - 1;
bytes_to_reserve = alloc_end - alloc_start;
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
bytes_to_reserve);
if (ret < 0)
goto out;
space_reserved = true;
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
alloc_start, bytes_to_reserve);
if (ret)
goto out;
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
&cached_state);
if (ret)
goto out;
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
i_blocksize(inode),
offset + len, &alloc_hint);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state);
/* btrfs_prealloc_file_range releases reserved space on error */
if (ret) {
space_reserved = false;
goto out;
}
}
ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
out:
if (ret && space_reserved)
btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, bytes_to_reserve);
extent_changeset_free(data_reserved);
return ret;
}
static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
@ -2832,7 +3081,8 @@ static long btrfs_fallocate(struct file *file, int mode,
cur_offset = alloc_start;
/* Make sure we aren't being give some crap mode */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
@ -2843,10 +3093,12 @@ static long btrfs_fallocate(struct file *file, int mode,
*
* For qgroup space, it will be checked later.
*/
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
alloc_end - alloc_start);
if (ret < 0)
return ret;
if (!(mode & FALLOC_FL_ZERO_RANGE)) {
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
alloc_end - alloc_start);
if (ret < 0)
return ret;
}
inode_lock(inode);
@ -2888,6 +3140,12 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret)
goto out;
if (mode & FALLOC_FL_ZERO_RANGE) {
ret = btrfs_zero_range(inode, offset, len, mode);
inode_unlock(inode);
return ret;
}
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
@ -2897,15 +3155,15 @@ static long btrfs_fallocate(struct file *file, int mode,
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
locked_end, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode,
alloc_end - 1);
ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
if (ordered &&
ordered->file_offset + ordered->len > alloc_start &&
ordered->file_offset < alloc_end) {
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end,
&cached_state, GFP_KERNEL);
&cached_state);
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
@ -2923,7 +3181,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list);
while (1) {
while (cur_offset < alloc_end) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
if (IS_ERR(em)) {
@ -2959,8 +3217,6 @@ static long btrfs_fallocate(struct file *file, int mode,
}
free_extent_map(em);
cur_offset = last_byte;
if (cur_offset >= alloc_end)
break;
}
/*
@ -2983,37 +3239,18 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret < 0)
goto out_unlock;
if (actual_end > inode->i_size &&
!(mode & FALLOC_FL_KEEP_SIZE)) {
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
/*
* We didn't need to allocate any more space, but we
* still extended the size of the file so we need to
* update i_size and the inode item.
*/
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
} else {
inode->i_ctime = current_time(inode);
i_size_write(inode, actual_end);
btrfs_ordered_update_i_size(inode, actual_end, NULL);
ret = btrfs_update_inode(trans, root, inode);
if (ret)
btrfs_end_transaction(trans);
else
ret = btrfs_end_transaction(trans);
}
}
/*
* We didn't need to allocate any more space, but we still extended the
* size of the file so we need to update i_size and the inode item.
*/
ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_KERNEL);
&cached_state);
out:
inode_unlock(inode);
/* Let go of our reservation. */
if (ret != 0)
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
@ -3082,7 +3319,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
*offset = min_t(loff_t, start, inode->i_size);
}
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
&cached_state);
return ret;
}
@ -3146,7 +3383,7 @@ void btrfs_auto_defrag_exit(void)
kmem_cache_destroy(btrfs_inode_defrag_cachep);
}
int btrfs_auto_defrag_init(void)
int __init btrfs_auto_defrag_init(void)
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
sizeof(struct inode_defrag), 0,

View File

@ -993,8 +993,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
GFP_NOFS);
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
goto fail;
}
leaf = path->nodes[0];
@ -1008,7 +1007,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
NULL, GFP_NOFS);
NULL);
btrfs_release_path(path);
goto fail;
}
@ -1105,8 +1104,7 @@ static int flush_dirty_cache(struct inode *inode)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
GFP_NOFS);
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
return ret;
}
@ -1127,8 +1125,7 @@ cleanup_write_cache_enospc(struct inode *inode,
{
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, cached_state,
GFP_NOFS);
i_size_read(inode) - 1, cached_state);
}
static int __btrfs_wait_cache_io(struct btrfs_root *root,
@ -1322,7 +1319,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
i_size_read(inode) - 1, &cached_state);
/*
* at this point the pages are under IO and we're happy,

View File

@ -537,9 +537,14 @@ static noinline void compress_file_range(struct inode *inode,
*
* If the compression fails for any reason, we set the pages
* dirty again later on.
*
* Note that the remaining part is redirtied, the start pointer
* has moved, the end is the original one.
*/
extent_range_clear_dirty_for_io(inode, start, end);
redirty = 1;
if (!redirty) {
extent_range_clear_dirty_for_io(inode, start, end);
redirty = 1;
}
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
@ -766,11 +771,10 @@ static noinline void submit_compressed_extents(struct inode *inode,
* all those pages down to the drive.
*/
if (!page_started && !ret)
extent_write_locked_range(io_tree,
inode, async_extent->start,
extent_write_locked_range(inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
btrfs_get_extent,
WB_SYNC_ALL);
else if (ret)
unlock_page(async_cow->locked_page);
@ -1204,7 +1208,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
u64 cur_end;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
1, 0, NULL);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
BUG_ON(!async_cow); /* -ENOMEM */
@ -1952,7 +1956,21 @@ static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
/*
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
* on write, or reading the csums from the tree before a read.
*
* Rules about async/sync submit,
* a) read: sync submit
*
* b) write without checksum: sync submit
*
* c) write with checksum:
* c-1) if bio is issued by fsync: sync submit
* (sync_writers != 0)
*
* c-2) if root is reloc root: sync submit
* (only in case of buffered IO)
*
* c-3) otherwise: async submit
*/
static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
@ -2024,10 +2042,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sum;
list_for_each_entry(sum, list, list) {
trans->adding_csums = 1;
trans->adding_csums = true;
btrfs_csum_file_blocks(trans,
BTRFS_I(inode)->root->fs_info->csum_root, sum);
trans->adding_csums = 0;
trans->adding_csums = false;
}
return 0;
}
@ -2083,7 +2101,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
PAGE_SIZE);
if (ordered) {
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
page_end, &cached_state, GFP_NOFS);
page_end, &cached_state);
unlock_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
@ -2099,14 +2117,21 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
goto out;
}
btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state,
0);
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
&cached_state, 0);
if (ret) {
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
ClearPageChecked(page);
goto out;
}
ClearPageChecked(page);
set_page_dirty(page);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
&cached_state);
out_page:
unlock_page(page);
put_page(page);
@ -2698,7 +2723,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
btrfs_end_transaction(trans);
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
&cached, GFP_NOFS);
&cached);
iput(inode);
return ret;
}
@ -2987,7 +3012,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
clear_extent_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
EXTENT_DEFRAG, 0, 0, &cached_state);
}
if (nolock)
@ -3057,7 +3082,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->len - 1,
clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
0, &cached_state, GFP_NOFS);
0, &cached_state);
}
if (trans)
@ -3071,7 +3096,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
else
start = ordered_extent->file_offset;
end = ordered_extent->file_offset + ordered_extent->len - 1;
clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
clear_extent_uptodate(io_tree, start, end, NULL);
/* Drop the cache for the part of the extent we didn't write. */
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
@ -4747,8 +4772,8 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
u64 block_start;
u64 block_end;
if ((offset & (blocksize - 1)) == 0 &&
(!len || ((len & (blocksize - 1)) == 0)))
if (IS_ALIGNED(offset, blocksize) &&
(!len || IS_ALIGNED(len, blocksize)))
goto out;
block_start = round_down(from, blocksize);
@ -4790,7 +4815,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
&cached_state);
unlock_page(page);
put_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
@ -4801,13 +4826,13 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state, 0);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
&cached_state);
goto out_unlock;
}
@ -4826,8 +4851,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
}
ClearPageChecked(page);
set_page_dirty(page);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
GFP_NOFS);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
out_unlock:
if (ret)
@ -4928,7 +4952,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
if (!ordered)
break;
unlock_extent_cached(io_tree, hole_start, block_end - 1,
&cached_state, GFP_NOFS);
&cached_state);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
@ -4993,8 +5017,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
break;
}
free_extent_map(em);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
GFP_NOFS);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
return err;
}
@ -5237,8 +5260,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 1,
&cached_state, GFP_NOFS);
EXTENT_DEFRAG, 1, 1, &cached_state);
cond_resched();
spin_lock(&io_tree->lock);
@ -5897,7 +5919,6 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_private *private = file->private_data;
struct btrfs_dir_item *di;
@ -5965,9 +5986,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(fs_info, leaf, slot, di))
goto next;
name_len = btrfs_dir_name_len(leaf, di);
if ((total_len + sizeof(struct dir_entry) + name_len) >=
PAGE_SIZE) {
@ -6301,7 +6319,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
}
/*
* index_cnt is ignored for everything but a dir,
* btrfs_get_inode_index_count has an explanation for the magic
* btrfs_set_inode_index_count has an explanation for the magic
* number
*/
BTRFS_I(inode)->index_cnt = 2;
@ -6564,7 +6582,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
out_unlock:
btrfs_end_transaction(trans);
btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
if (drop_inode) {
inode_dec_link_count(inode);
@ -6645,7 +6662,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
inode_dec_link_count(inode);
iput(inode);
}
btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return err;
@ -6720,7 +6736,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
btrfs_balance_delayed_items(fs_info);
fail:
if (trans)
btrfs_end_transaction(trans);
@ -6798,7 +6813,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode_dec_link_count(inode);
iput(inode);
}
btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return err;
@ -6807,68 +6821,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
}
/* Find next extent map of a given extent map, caller needs to ensure locks */
static struct extent_map *next_extent_map(struct extent_map *em)
{
struct rb_node *next;
next = rb_next(&em->rb_node);
if (!next)
return NULL;
return container_of(next, struct extent_map, rb_node);
}
static struct extent_map *prev_extent_map(struct extent_map *em)
{
struct rb_node *prev;
prev = rb_prev(&em->rb_node);
if (!prev)
return NULL;
return container_of(prev, struct extent_map, rb_node);
}
/* helper for btfs_get_extent. Given an existing extent in the tree,
* the existing extent is the nearest extent to map_start,
* and an extent that you want to insert, deal with overlap and insert
* the best fitted new extent into the tree.
*/
static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em,
u64 map_start)
{
struct extent_map *prev;
struct extent_map *next;
u64 start;
u64 end;
u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
if (existing->start > map_start) {
next = existing;
prev = prev_extent_map(next);
} else {
prev = existing;
next = next_extent_map(prev);
}
start = prev ? extent_map_end(prev) : em->start;
start = max_t(u64, start, em->start);
end = next ? next->start : extent_map_end(em);
end = min_t(u64, end, extent_map_end(em));
start_diff = start - em->start;
em->start = start;
em->len = end - start;
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff;
em->block_len -= start_diff;
}
return add_extent_mapping(em_tree, em, 0);
}
static noinline int uncompress_inline(struct btrfs_path *path,
struct page *page,
size_t pg_offset, u64 extent_offset,
@ -6943,10 +6895,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct extent_map *em = NULL;
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_trans_handle *trans = NULL;
const bool new_inline = !page || create;
again:
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
@ -6985,8 +6935,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
path->reada = READA_FORWARD;
}
ret = btrfs_lookup_file_extent(trans, root, path,
objectid, start, trans != NULL);
ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
if (ret < 0) {
err = ret;
goto out;
@ -7087,7 +7036,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
em->orig_block_len = em->len;
em->orig_start = em->start;
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) {
if (!PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
BTRFS_COMPRESS_NONE) {
ret = uncompress_inline(path, page, pg_offset,
@ -7108,25 +7057,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
kunmap(page);
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
BUG();
if (!trans) {
kunmap(page);
free_extent_map(em);
em = NULL;
btrfs_release_path(path);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return ERR_CAST(trans);
goto again;
}
map = kmap(page);
write_extent_buffer(leaf, map + pg_offset, ptr,
copy_size);
kunmap(page);
btrfs_mark_buffer_dirty(leaf);
}
set_extent_uptodate(io_tree, em->start,
extent_map_end(em) - 1, NULL, GFP_NOFS);
@ -7138,7 +7068,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
set_bit(EXTENT_FLAG_VACANCY, &em->flags);
insert:
btrfs_release_path(path);
if (em->start > start || extent_map_end(em) <= start) {
@ -7151,62 +7080,13 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
err = 0;
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
*/
if (ret == -EEXIST) {
struct extent_map *existing;
ret = 0;
existing = search_extent_mapping(em_tree, start, len);
/*
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.
*/
if (existing->start == em->start &&
extent_map_end(existing) >= extent_map_end(em) &&
em->block_start == existing->block_start) {
/*
* The existing extent map already encompasses the
* entire extent map we tried to add.
*/
free_extent_map(em);
em = existing;
err = 0;
} else if (start >= extent_map_end(existing) ||
start <= existing->start) {
/*
* The existing extent map is the one nearest to
* the [start, start + len) range which overlaps
*/
err = merge_extent_mapping(em_tree, existing,
em, start);
free_extent_map(existing);
if (err) {
free_extent_map(em);
em = NULL;
}
} else {
free_extent_map(em);
em = existing;
err = 0;
}
}
err = btrfs_add_extent_mapping(em_tree, &em, start, len);
write_unlock(&em_tree->lock);
out:
trace_btrfs_get_extent(root, inode, em);
btrfs_free_path(path);
if (trans) {
ret = btrfs_end_transaction(trans);
if (!err)
err = ret;
}
if (err) {
free_extent_map(em);
return ERR_PTR(err);
@ -7328,7 +7208,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
em->block_start = EXTENT_MAP_DELALLOC;
em->block_len = found;
}
} else if (hole_em) {
} else {
return hole_em;
}
out:
@ -7645,7 +7525,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
break;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state, GFP_NOFS);
cached_state);
if (ordered) {
/*
@ -7930,7 +7810,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (lockstart < lockend) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, unlock_bits, 1, 0,
&cached_state, GFP_NOFS);
&cached_state);
} else {
free_extent_state(cached_state);
}
@ -7941,7 +7821,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
unlock_bits, 1, 0, &cached_state);
err:
if (dio_data)
current->journal_info = dio_data;
@ -7957,15 +7837,12 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
BUG_ON(bio_op(bio) == REQ_OP_WRITE);
bio_get(bio);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
if (ret)
goto err;
return ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
err:
bio_put(bio);
return ret;
}
@ -8466,11 +8343,10 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
bool write = bio_op(bio) == REQ_OP_WRITE;
blk_status_t ret;
/* Check btrfs_submit_bio_hook() for rules about async submit. */
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
bio_get(bio);
if (!write) {
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
if (ret)
@ -8503,7 +8379,6 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
map:
ret = btrfs_map_bio(fs_info, bio, 0, 0);
err:
bio_put(bio);
return ret;
}
@ -8860,7 +8735,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
return extent_fiemap(inode, fieinfo, start, len);
}
int btrfs_readpage(struct file *file, struct page *page)
@ -8872,7 +8747,6 @@ int btrfs_readpage(struct file *file, struct page *page)
static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
struct inode *inode = page->mapping->host;
int ret;
@ -8891,8 +8765,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
tree = &BTRFS_I(page->mapping->host)->io_tree;
ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
ret = extent_write_full_page(page, wbc);
btrfs_add_delayed_iput(inode);
return ret;
}
@ -8903,7 +8776,7 @@ static int btrfs_writepages(struct address_space *mapping,
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;
return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
return extent_writepages(tree, mapping, wbc);
}
static int
@ -8912,8 +8785,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
{
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;
return extent_readpages(tree, mapping, pages, nr_pages,
btrfs_get_extent);
return extent_readpages(tree, mapping, pages, nr_pages);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
@ -8984,8 +8856,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state,
GFP_NOFS);
EXTENT_DEFRAG, 1, 0, &cached_state);
/*
* whoever cleared the private bit is responsible
* for the finish_ordered_io
@ -9042,7 +8913,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
&cached_state, GFP_NOFS);
&cached_state);
__btrfs_releasepage(page, GFP_NOFS);
}
@ -9143,7 +9014,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
PAGE_SIZE);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
&cached_state);
unlock_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
@ -9170,13 +9041,13 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, page_start, end, 0,
&cached_state, 0);
if (ret) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
&cached_state);
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
@ -9202,7 +9073,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
out_unlock:
if (!ret) {
@ -9427,7 +9298,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
struct btrfs_inode *ei;
struct inode *inode;
ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
@ -9579,7 +9450,7 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_free_space_cachep);
}
int btrfs_init_cachep(void)
int __init btrfs_init_cachep(void)
{
btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
@ -10694,7 +10565,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
btrfs_end_transaction(trans);
if (ret)
iput(inode);
btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return ret;

View File

@ -308,12 +308,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ip->flags |= BTRFS_INODE_COMPRESS;
ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
comp = "lzo";
else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB)
comp = "zlib";
else
comp = "zstd";
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
ret = btrfs_set_prop(inode, "btrfs.compression",
comp, strlen(comp), 0);
if (ret)
@ -980,7 +978,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
/* get the big lock and read metadata off disk */
lock_extent_bits(io_tree, start, end, &cached);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
unlock_extent_cached(io_tree, start, end, &cached);
if (IS_ERR(em))
return NULL;
@ -1131,7 +1129,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
ordered = btrfs_lookup_ordered_extent(inode,
page_start);
unlock_extent_cached(tree, page_start, page_end,
&cached_state, GFP_NOFS);
&cached_state);
if (!ordered)
break;
@ -1191,7 +1189,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
&cached_state, GFP_NOFS);
&cached_state);
if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock);
@ -1207,8 +1205,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
&cached_state);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state,
GFP_NOFS);
page_start, page_end - 1, &cached_state);
for (i = 0; i < i_done; i++) {
clear_page_dirty_for_io(pages[i]);
@ -1504,7 +1501,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
goto out_free;
}
if (!device->writeable) {
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
btrfs_info(fs_info,
"resizer unable to apply on readonly device %llu",
devid);
@ -1529,7 +1526,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
}
}
if (device->is_tgtdev_for_dev_replace) {
if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
ret = -EPERM;
goto out_free;
}
@ -2676,14 +2673,12 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out;
}
mutex_lock(&fs_info->volume_mutex);
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
} else {
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
if (!ret) {
@ -2727,9 +2722,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
mutex_lock(&fs_info->volume_mutex);
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
mutex_unlock(&fs_info->volume_mutex);
if (!ret)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
@ -2754,16 +2747,16 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
if (!fi_args)
return -ENOMEM;
mutex_lock(&fs_devices->device_list_mutex);
rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
list_for_each_entry(device, &fs_devices->devices, dev_list) {
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id)
fi_args->max_id = device->devid;
}
mutex_unlock(&fs_devices->device_list_mutex);
rcu_read_unlock();
memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
fi_args->nodesize = fs_info->nodesize;
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
@ -2780,7 +2773,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
{
struct btrfs_ioctl_dev_info_args *di_args;
struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
int ret = 0;
char *s_uuid = NULL;
@ -2791,7 +2783,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (!btrfs_is_empty_uuid(di_args->uuid))
s_uuid = di_args->uuid;
mutex_lock(&fs_devices->device_list_mutex);
rcu_read_lock();
dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL);
if (!dev) {
@ -2806,17 +2798,15 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (dev->name) {
struct rcu_string *name;
rcu_read_lock();
name = rcu_dereference(dev->name);
strncpy(di_args->path, name->str, sizeof(di_args->path));
rcu_read_unlock();
strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
}
out:
mutex_unlock(&fs_devices->device_list_mutex);
rcu_read_unlock();
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
ret = -EFAULT;

View File

@ -164,7 +164,6 @@ static int iterate_object_props(struct btrfs_root *root,
size_t),
void *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
char *name_buf = NULL;
char *value_buf = NULL;
@ -215,12 +214,6 @@ static int iterate_object_props(struct btrfs_root *root,
name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len;
if (verify_dir_item(fs_info, leaf,
path->slots[0], di)) {
ret = -EIO;
goto out;
}
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr,
@ -430,11 +423,11 @@ static const char *prop_compression_extract(struct inode *inode)
{
switch (BTRFS_I(inode)->prop_compress) {
case BTRFS_COMPRESS_ZLIB:
return "zlib";
case BTRFS_COMPRESS_LZO:
return "lzo";
case BTRFS_COMPRESS_ZSTD:
return "zstd";
return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
default:
break;
}
return NULL;

View File

@ -2883,8 +2883,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(&reserved->range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
GFP_NOFS);
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
extent_changeset_release(reserved);
return ret;
}

View File

@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
init_waitqueue_head(&cur->wait);
}
x = cmpxchg(&info->stripe_hash_table, NULL, table);
@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
* bio list here, anyone else that wants to
* change this stripe needs to do their own rmw.
*/
if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
cur->operation == BTRFS_RBIO_PARITY_SCRUB)
if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
cur->operation == BTRFS_RBIO_REBUILD_MISSING)
if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
return 0;
if (last->operation == BTRFS_RBIO_READ_REBUILD) {
int fa = last->faila;
int fb = last->failb;
int cur_fa = cur->faila;
int cur_fb = cur->failb;
if (last->faila >= last->failb) {
fa = last->failb;
fb = last->faila;
}
if (cur->faila >= cur->failb) {
cur_fa = cur->failb;
cur_fb = cur->faila;
}
if (fa != cur_fa || fb != cur_fb)
return 0;
}
return 1;
}
@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending;
unsigned long flags;
DEFINE_WAIT(wait);
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
}
goto done_nolock;
/*
* The barrier for this waitqueue_active is not needed,
* we're protected by h->lock and can't miss a wakeup.
*/
} else if (waitqueue_active(&h->wait)) {
spin_unlock(&rbio->bio_list_lock);
spin_unlock_irqrestore(&h->lock, flags);
wake_up(&h->wait);
goto done_nolock;
}
}
done:
@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
kfree(rbio);
}
static void free_raid_bio(struct btrfs_raid_bio *rbio)
static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
{
unlock_stripe(rbio);
__free_raid_bio(rbio);
struct bio *next;
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
cur->bi_status = err;
bio_endio(cur);
cur = next;
}
}
/*
@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
struct bio *extra;
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
free_raid_bio(rbio);
/*
* At this moment, rbio->bio_list is empty, however since rbio does not
* always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
* hash list, rbio may be merged with others so that rbio->bio_list
* becomes non-empty.
* Once unlock_stripe() is done, rbio->bio_list will not be updated any
* more and we can call bio_endio() on all queued bios.
*/
unlock_stripe(rbio);
extra = bio_list_get(&rbio->bio_list);
__free_raid_bio(rbio);
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
cur->bi_status = err;
bio_endio(cur);
cur = next;
}
rbio_endio_bio_list(cur, err);
if (extra)
rbio_endio_bio_list(extra, err);
}
/*
@ -1435,14 +1454,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct bio *bio)
{
struct bio_vec bvec;
struct bvec_iter iter;
struct bio_vec *bvec;
int i;
if (bio_flagged(bio, BIO_CLONED))
bio->bi_iter = btrfs_io_bio(bio)->iter;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment(bvec, bio, iter)
SetPageUptodate(bvec.bv_page);
bio_for_each_segment_all(bvec, bio, i)
SetPageUptodate(bvec->bv_page);
}
/*
@ -1969,7 +1987,22 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
if (err == BLK_STS_OK)
/*
* - In case of two failures, where rbio->failb != -1:
*
* Do not cache this rbio since the above read reconstruction
* (raid6_datap_recov() or raid6_2data_recov()) may have
* changed some content of stripes which are not identical to
* on-disk content any more, otherwise, a later write/recover
* may steal stripe_pages from this rbio and end up with
* corruptions or rebuild failures.
*
* - In case of single failure, where rbio->failb == -1:
*
* Cache this rbio iff the above read reconstruction is
* excuted without problems.
*/
if (err == BLK_STS_OK && rbio->failb < 0)
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@ -2170,11 +2203,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
}
/*
* reconstruct from the q stripe if they are
* asking for mirror 3
* Loop retry:
* for 'mirror == 2', reconstruct from all other stripes.
* for 'mirror_num > 2', select a stripe to fail on every retry.
*/
if (mirror_num == 3)
rbio->failb = rbio->real_stripes - 2;
if (mirror_num > 2) {
/*
* 'mirror == 3' is to fail the p stripe and
* reconstruct from the q stripe. 'mirror > 3' is to
* fail a data stripe and reconstruct from p+q stripe.
*/
rbio->failb = rbio->real_stripes - (mirror_num - 1);
ASSERT(rbio->failb > 0);
if (rbio->failb <= rbio->faila)
rbio->failb--;
}
ret = lock_stripe_add(rbio);

View File

@ -606,8 +606,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
}
/* Walk up to the next node that needs to be processed */
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level)
static int walk_up_tree(struct btrfs_path *path, int *level)
{
int l;
@ -984,7 +983,6 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_path *path;
struct btrfs_root *root;
struct extent_buffer *eb;
u64 bytenr = 0, num_bytes = 0;
int ret, level;
@ -1014,7 +1012,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
&bytenr, &num_bytes);
if (ret)
break;
ret = walk_up_tree(root, path, &level);
ret = walk_up_tree(path, &level);
if (ret < 0)
break;
if (ret > 0) {

View File

@ -387,13 +387,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr,
name_len);
if (!ret) {
err = -EIO;
goto out;
}
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
*sequence = btrfs_root_ref_sequence(leaf, ref);

View File

@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
}
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{
@ -1323,15 +1328,34 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* could happen otherwise that a correct page would be
* overwritten by a bad one).
*/
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
for (mirror_index = 0; ;mirror_index++) {
struct scrub_block *sblock_other;
if (mirror_index == failed_mirror_index)
continue;
sblock_other = sblocks_for_recheck + mirror_index;
/* raid56's mirror can be more than BTRFS_MAX_MIRRORS */
if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
if (mirror_index >= BTRFS_MAX_MIRRORS)
break;
if (!sblocks_for_recheck[mirror_index].page_count)
break;
sblock_other = sblocks_for_recheck + mirror_index;
} else {
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
int max_allowed = r->bbio->num_stripes -
r->bbio->num_tgtdevs;
if (mirror_index >= max_allowed)
break;
if (!sblocks_for_recheck[1].page_count)
break;
ASSERT(failed_mirror_index == 0);
sblock_other = sblocks_for_recheck + 1;
sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
}
/* build and submit the bios, check checksums */
scrub_recheck_block(fs_info, sblock_other, 0);
@ -1666,49 +1690,32 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
return 0;
}
struct scrub_bio_ret {
struct completion event;
blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
ret->status = bio->bi_status;
complete(&ret->event);
}
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
complete(bio->bi_private);
}
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
struct bio *bio,
struct scrub_page *page)
{
struct scrub_bio_ret done;
DECLARE_COMPLETION_ONSTACK(done);
int ret;
int mirror_num;
init_completion(&done.event);
done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
mirror_num = page->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
page->recover->map_length,
page->mirror_num, 0);
mirror_num, 0);
if (ret)
return ret;
wait_for_completion_io(&done.event);
if (done.status)
return -EIO;
return 0;
wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
}
/*
@ -2535,7 +2542,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
}
WARN_ON(sblock->page_count == 0);
if (dev->missing) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
/*
* This case should only be hit for RAID 5/6 device replace. See
* the comment in scrub_missing_raid56_pages() for details.
@ -2870,7 +2877,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
if (dev->missing) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
scrub_parity_mark_sectors_error(sparity, logical, len);
return 0;
}
@ -4112,12 +4119,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!dev || (dev->missing && !is_dev_replace)) {
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
!is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -ENODEV;
}
if (!is_dev_replace && !readonly && !dev->writeable) {
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
rcu_read_lock();
name = rcu_dereference(dev->name);
@ -4128,14 +4137,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
}
mutex_lock(&fs_info->scrub_lock);
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EIO;
}
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device ||
if (dev->scrub_ctx ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
@ -4160,7 +4170,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return PTR_ERR(sctx);
}
sctx->readonly = readonly;
dev->scrub_device = sctx;
dev->scrub_ctx = sctx;
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
@ -4195,7 +4205,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_lock(&fs_info->scrub_lock);
dev->scrub_device = NULL;
dev->scrub_ctx = NULL;
scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
@ -4252,16 +4262,16 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
struct scrub_ctx *sctx;
mutex_lock(&fs_info->scrub_lock);
sctx = dev->scrub_device;
sctx = dev->scrub_ctx;
if (!sctx) {
mutex_unlock(&fs_info->scrub_lock);
return -ENOTCONN;
}
atomic_inc(&sctx->cancel_req);
while (dev->scrub_device) {
while (dev->scrub_ctx) {
mutex_unlock(&fs_info->scrub_lock);
wait_event(fs_info->scrub_pause_wait,
dev->scrub_device == NULL);
dev->scrub_ctx == NULL);
mutex_lock(&fs_info->scrub_lock);
}
mutex_unlock(&fs_info->scrub_lock);
@ -4278,7 +4288,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (dev)
sctx = dev->scrub_device;
sctx = dev->scrub_ctx;
if (sctx)
memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@ -4478,8 +4488,7 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
free_extent_map(em);
out_unlock:
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
GFP_NOFS);
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
return ret;
}

View File

@ -1059,12 +1059,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
}
}
ret = btrfs_is_name_len_valid(eb, path->slots[0],
(unsigned long)(di + 1), name_len + data_len);
if (!ret) {
ret = -EIO;
goto out;
}
if (name_len + data_len > buf_len) {
buf_len = name_len + data_len;
if (is_vmalloc_addr(buf)) {

View File

@ -61,12 +61,21 @@
#include "tests/btrfs-tests.h"
#include "qgroup.h"
#include "backref.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
static const struct super_operations btrfs_super_ops;
/*
* Types for mounting the default subvolume and a subvolume explicitly
* requested by subvol=/path. That way the callchain is straightforward and we
* don't have to play tricks with the mount options and recursive calls to
* btrfs_mount.
*
* The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
*/
static struct file_system_type btrfs_fs_type;
static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno)
return errstr;
}
/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
struct super_block *sb = fs_info->sb;
if (sb_rdonly(sb))
return;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not
* canceled here although there is no way to update
* the progress. It would add the risk of a deadlock,
* therefore the canceling is omitted. The only penalty
* is that some I/O remains active until the procedure
* completes. The next time when the filesystem is
* mounted writeable again, the device replace
* operation continues.
*/
}
}
/*
* __btrfs_handle_fs_error decodes expected errors from the caller and
* invokes the approciate error response.
@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/* Don't go through full error handling during mount */
if (sb->s_flags & SB_BORN)
btrfs_handle_error(fs_info);
if (!(sb->s_flags & SB_BORN))
return;
if (sb_rdonly(sb))
return;
/* btrfs handle error by forcing the filesystem readonly */
sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not canceled here
* although there is no way to update the progress. It would add the
* risk of a deadlock, therefore the canceling is omitted. The only
* penalty is that some I/O remains active until the procedure
* completes. The next time when the filesystem is mounted writeable
* again, the device replace operation continues.
*/
}
#ifdef CONFIG_PRINTK
@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags)
{
substring_t args[MAX_OPT_ARGS];
char *p, *num, *orig = NULL;
char *p, *num;
u64 cache_gen;
int intarg;
int ret = 0;
@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
if (!options)
goto check;
/*
* strsep changes the string, duplicate it because parse_options
* gets called twice
*/
options = kstrdup(options, GFP_KERNEL);
if (!options)
return -ENOMEM;
orig = options;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid:
case Opt_device:
/*
* These are parsed by btrfs_parse_early_options
* These are parsed by btrfs_parse_subvol_options
* and btrfs_parse_early_options
* and can be happily ignored here.
*/
break;
@ -877,7 +868,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_info(info, "disk space caching is enabled");
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(info, "using free space tree");
kfree(orig);
return ret;
}
@ -888,11 +878,60 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
* only when we need to allocate a new super block.
*/
static int btrfs_parse_early_options(const char *options, fmode_t flags,
void *holder, char **subvol_name, u64 *subvol_objectid,
struct btrfs_fs_devices **fs_devices)
void *holder, struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
int error = 0;
if (!options)
return 0;
/*
* strsep changes the string, duplicate it because btrfs_parse_options
* gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
return -ENOMEM;
orig = opts;
while ((p = strsep(&opts, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
if (token == Opt_device) {
device_name = match_strdup(&args[0]);
if (!device_name) {
error = -ENOMEM;
goto out;
}
error = btrfs_scan_one_device(device_name,
flags, holder, fs_devices);
kfree(device_name);
if (error)
goto out;
}
}
out:
kfree(orig);
return error;
}
/*
* Parse mount options that are related to subvolume id
*
* The value is later passed to mount_subvol()
*/
static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
char **subvol_name, u64 *subvol_objectid)
{
substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p;
char *num = NULL;
int error = 0;
@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
return 0;
/*
* strsep changes the string, duplicate it because parse_options
* gets called twice
* strsep changes the string, duplicate it because
* btrfs_parse_early_options gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
break;
case Opt_device:
device_name = match_strdup(&args[0]);
if (!device_name) {
error = -ENOMEM;
goto out;
}
error = btrfs_scan_one_device(device_name,
flags, holder, fs_devices);
kfree(device_name);
if (error)
goto out;
break;
default:
break;
}
@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
char *compress_type;
const char *compress_type;
if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
if (btrfs_test_opt(info, COMPRESS)) {
if (info->compress_type == BTRFS_COMPRESS_ZLIB)
compress_type = "zlib";
else if (info->compress_type == BTRFS_COMPRESS_LZO)
compress_type = "lzo";
else
compress_type = "zstd";
compress_type = btrfs_compress_type2str(info->compress_type);
if (btrfs_test_opt(info, FORCE_COMPRESS))
seq_printf(seq, ",compress-force=%s", compress_type);
else
@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode)
return 0;
}
/*
* This will add subvolid=0 to the argument string while removing any subvol=
* and subvolid= arguments to make sure we get the top-level root for path
* walking to the subvol we want.
*/
static char *setup_root_args(char *args)
{
char *buf, *dst, *sep;
if (!args)
return kstrdup("subvolid=0", GFP_KERNEL);
/* The worst case is that we add ",subvolid=0" to the end. */
buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
GFP_KERNEL);
if (!buf)
return NULL;
while (1) {
sep = strchrnul(args, ',');
if (!strstarts(args, "subvol=") &&
!strstarts(args, "subvolid=")) {
memcpy(dst, args, sep - args);
dst += sep - args;
*dst++ = ',';
}
if (*sep)
args = sep + 1;
else
break;
}
strcpy(dst, "subvolid=0");
return buf;
}
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
int flags, const char *device_name,
char *data)
const char *device_name, struct vfsmount *mnt)
{
struct dentry *root;
struct vfsmount *mnt = NULL;
char *newargs;
int ret;
newargs = setup_root_args(data);
if (!newargs) {
root = ERR_PTR(-ENOMEM);
goto out;
}
mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
if (flags & SB_RDONLY) {
mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
device_name, newargs);
} else {
mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
device_name, newargs);
if (IS_ERR(mnt)) {
root = ERR_CAST(mnt);
mnt = NULL;
goto out;
}
down_write(&mnt->mnt_sb->s_umount);
ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
up_write(&mnt->mnt_sb->s_umount);
if (ret < 0) {
root = ERR_PTR(ret);
goto out;
}
}
}
if (IS_ERR(mnt)) {
root = ERR_CAST(mnt);
mnt = NULL;
goto out;
}
if (!subvol_name) {
if (!subvol_objectid) {
ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
out:
mntput(mnt);
kfree(newargs);
kfree(subvol_name);
return root;
}
@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
/*
* Find a superblock for the given device / mount point.
*
* Note: This is based on get_sb_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
* Note: This is based on mount_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
*/
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
const char *device_name, void *data)
static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
int flags, const char *device_name, void *data)
{
struct block_device *bdev = NULL;
struct super_block *s;
@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_early_options(data, mode, fs_type,
&subvol_name, &subvol_objectid,
&fs_devices);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
}
if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
/* mount_subvol() will free subvol_name. */
return mount_subvol(subvol_name, subvol_objectid, flags,
device_name, data);
}
security_init_mnt_opts(&new_sec_opts);
if (data) {
error = parse_security_options(data, &new_sec_opts);
@ -1674,6 +1611,84 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
return ERR_PTR(error);
}
/*
* Mount function which is called by VFS layer.
*
* In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
* which needs vfsmount* of device's root (/). This means device's root has to
* be mounted internally in any case.
*
* Operation flow:
* 1. Parse subvol id related options for later use in mount_subvol().
*
* 2. Mount device's root (/) by calling vfs_kern_mount().
*
* NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
* first place. In order to avoid calling btrfs_mount() again, we use
* different file_system_type which is not registered to VFS by
* register_filesystem() (btrfs_root_fs_type). As a result,
* btrfs_mount_root() is called. The return value will be used by
* mount_subtree() in mount_subvol().
*
* 3. Call mount_subvol() to get the dentry of subvolume. Since there is
* "btrfs subvolume set-default", mount_subvol() is called always.
*/
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
const char *device_name, void *data)
{
struct vfsmount *mnt_root;
struct dentry *root;
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_subvol_options(data, mode,
&subvol_name, &subvol_objectid);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
}
/* mount device's root (/) */
mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
if (flags & SB_RDONLY) {
mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
flags & ~SB_RDONLY, device_name, data);
} else {
mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
flags | SB_RDONLY, device_name, data);
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
goto out;
}
down_write(&mnt_root->mnt_sb->s_umount);
error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
up_write(&mnt_root->mnt_sb->s_umount);
if (error < 0) {
root = ERR_PTR(error);
mntput(mnt_root);
goto out;
}
}
}
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
goto out;
}
/* mount_subvol() will free subvol_name and mnt_root */
root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
out:
return root;
}
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
{
@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
if (!btrfs_check_rw_degradable(fs_info)) {
if (!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"too many missing devices, writeable remount is not allowed");
ret = -EACCES;
@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (!device->in_fs_metadata || !device->bdev ||
device->is_tgtdev_for_dev_replace)
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&device->dev_state) ||
!device->bdev ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
continue;
if (i >= nr_devices)
@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};
static struct file_system_type btrfs_root_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
.mount = btrfs_mount_root,
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};
MODULE_ALIAS_FS("btrfs");
static int btrfs_control_open(struct inode *inode, struct file *file)
@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
&btrfs_root_fs_type, &fs_devices);
break;
case BTRFS_IOC_DEVICES_READY:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
&btrfs_root_fs_type, &fs_devices);
if (ret)
break;
ret = !(fs_devices->num_devices == fs_devices->total_devices);
@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) {
if (dev->missing)
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
continue;
@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = {
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");
static int btrfs_interface_init(void)
static int __init btrfs_interface_init(void)
{
return misc_register(&btrfs_misc);
}
@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void)
misc_deregister(&btrfs_misc);
}
static void btrfs_print_mod_info(void)
static void __init btrfs_print_mod_info(void)
{
pr_info("Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG

View File

@ -897,7 +897,7 @@ static int btrfs_init_debugfs(void)
return 0;
}
int btrfs_init_sysfs(void)
int __init btrfs_init_sysfs(void)
{
int ret;

View File

@ -277,6 +277,9 @@ int btrfs_run_sanity_tests(void)
goto out;
}
}
ret = btrfs_test_extent_map();
if (ret)
goto out;
out:
btrfs_destroy_test_fs();
return ret;

View File

@ -33,6 +33,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_map(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize);
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);

View File

@ -0,0 +1,366 @@
/*
* Copyright (C) 2017 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
static void free_extent_map_tree(struct extent_map_tree *em_tree)
{
struct extent_map *em;
struct rb_node *node;
while (!RB_EMPTY_ROOT(&em_tree->map)) {
node = rb_first(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
remove_extent_mapping(em_tree, em);
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
test_msg(
"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n",
em->start, em->len, em->block_start,
em->block_len, refcount_read(&em->refs));
refcount_set(&em->refs, 1);
}
#endif
free_extent_map(em);
}
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet, there is a file
* extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
* are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
* reading [0, 8K)
*
* t1 t2
* btrfs_get_extent() btrfs_get_extent()
* -> lookup_extent_mapping() ->lookup_extent_mapping()
* -> add_extent_mapping(0, 16K)
* -> return em
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
static void test_case_1(struct extent_map_tree *em_tree)
{
struct extent_map *em;
u64 start = 0;
u64 len = SZ_8K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip the test on error. */
return;
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
/* Add [16K, 20K) following [0, 16K) */
em = alloc_extent_map();
if (!em)
goto out;
em->start = SZ_16K;
em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 8K), should return [0, 16K) instead. */
em->start = start;
em->len = len;
em->block_start = start;
em->block_len = len;
ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
if (ret)
test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K))
test_msg(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
static void test_case_2(struct extent_map_tree *em_tree)
{
struct extent_map *em;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip the test on error. */
return;
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
/* Add [4K, 4K) following [0, 1K) */
em = alloc_extent_map();
if (!em)
goto out;
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
if (ret)
test_msg("case2 [0 1K]: ret %d\n", ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
test_msg(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip this test on error. */
return;
/* Add [4K, 8K) */
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
if (ret)
test_msg("case3 [0x%llx 0x%llx): ret %d\n",
start, start + len, ret);
/*
* Since bytes within em are contiguous, em->block_start is identical to
* em->start.
*/
if (em &&
(start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len))
test_msg(
"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 16K), two jobs are running concurrently
* against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
* read from [0, 4K) or [8K, 12K) or [12K, 16K).
*
* t1 goes ahead of t2 and adds em [4K, 8K) into tree.
*
* t1 t2
* cow_file_range() btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
static void test_case_3(struct extent_map_tree *em_tree)
{
__test_case_3(em_tree, 0);
__test_case_3(em_tree, SZ_8K);
__test_case_3(em_tree, (12 * 1024ULL));
}
static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip this test on error. */
return;
/* Add [0K, 8K) */
em->start = 0;
em->len = SZ_8K;
em->block_start = 0;
em->block_len = SZ_8K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [8K, 24K) */
em->start = SZ_8K;
em->len = 24 * 1024ULL;
em->block_start = SZ_16K; /* avoid merging */
em->block_len = 24 * 1024ULL;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0K, 32K) */
em->start = 0;
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
if (ret)
test_msg("case4 [0x%llx 0x%llx): ret %d\n",
start, len, ret);
if (em &&
(start < em->start || start + len > extent_map_end(em)))
test_msg(
"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
start, len, ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 32K), two jobs are running concurrently
* against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
* read from [0, 4K) or [4K, 8K).
*
* t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
*
* t1 t2
* btrfs_get_blocks_direct() btrfs_get_blocks_direct()
* -> btrfs_get_extent() -> btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping() -> lookup_extent_mapping()
* # load [0, 32K)
* -> btrfs_new_extent_direct()
* -> btrfs_drop_extent_cache()
* # split [0, 32K)
* -> add_extent_mapping()
* # add [8K, 32K)
* -> add_extent_mapping()
* # handle -EEXIST when adding
* # [0, 32K)
*/
static void test_case_4(struct extent_map_tree *em_tree)
{
__test_case_4(em_tree, 0);
__test_case_4(em_tree, SZ_4K);
}
int btrfs_test_extent_map()
{
struct extent_map_tree *em_tree;
test_msg("Running extent_map tests\n");
em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
if (!em_tree)
/* Skip the test on error. */
return 0;
extent_map_tree_init(em_tree);
test_case_1(em_tree);
test_case_2(em_tree);
test_case_3(em_tree);
test_case_4(em_tree);
kfree(em_tree);
return 0;
}

View File

@ -288,10 +288,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
test_msg("Vacancy flag wasn't set properly\n");
goto out;
}
free_extent_map(em);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
@ -1001,8 +997,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@ -1070,8 +1065,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@ -1104,8 +1098,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@ -1121,8 +1114,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
EXTENT_UPTODATE, 0, 0, NULL);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
@ -1134,7 +1126,6 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
int ret;
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n");

View File

@ -495,8 +495,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (current->journal_info) {
WARN_ON(type & TRANS_EXTWRITERS);
h = current->journal_info;
h->use_count++;
WARN_ON(h->use_count > 2);
refcount_inc(&h->use_count);
WARN_ON(refcount_read(&h->use_count) > 2);
h->orig_rsv = h->block_rsv;
h->block_rsv = NULL;
goto got_it;
@ -567,7 +567,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
h->transid = cur_trans->transid;
h->transaction = cur_trans;
h->root = root;
h->use_count = 1;
refcount_set(&h->use_count, 1);
h->fs_info = root->fs_info;
h->type = type;
@ -837,8 +837,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int err = 0;
int must_run_delayed_refs = 0;
if (trans->use_count > 1) {
trans->use_count--;
if (refcount_read(&trans->use_count) > 1) {
refcount_dec(&trans->use_count);
trans->block_rsv = trans->orig_rsv;
return 0;
}
@ -1016,8 +1016,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* it's safe to do it (through clear_btree_io_tree()).
*/
err = clear_extent_bit(dirty_pages, start, end,
EXTENT_NEED_WAIT,
0, 0, &cached_state, GFP_NOFS);
EXTENT_NEED_WAIT, 0, 0, &cached_state);
if (err == -ENOMEM)
err = 0;
if (!err)
@ -1869,7 +1868,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans = trans->transaction;
DEFINE_WAIT(wait);
WARN_ON(trans->use_count > 1);
WARN_ON(refcount_read(&trans->use_count) > 1);
btrfs_abort_transaction(trans, err);
@ -2266,16 +2265,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
}
ret = write_all_supers(fs_info, 0);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
goto scrub_continue;
}
/*
* the super is written, we can safely allow the tree-loggers
* to go about their business
*/
mutex_unlock(&fs_info->tree_log_mutex);
if (ret)
goto scrub_continue;
btrfs_finish_extent_commit(trans, fs_info);

View File

@ -58,6 +58,7 @@ struct btrfs_transaction {
/* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state;
int aborted;
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
@ -70,7 +71,6 @@ struct btrfs_transaction {
struct list_head dirty_bgs;
struct list_head io_bgs;
struct list_head dropped_roots;
u64 num_dirty_bgs;
/*
* we need to make sure block group deletion doesn't race with
@ -79,11 +79,11 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
unsigned int num_dirty_bgs;
/* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs;
spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
struct btrfs_fs_info *fs_info;
};
@ -111,20 +111,19 @@ struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
refcount_t use_count;
unsigned int type;
short aborted;
short adding_csums;
bool adding_csums;
bool allocating_chunk;
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
bool dirty;
unsigned int type;
struct btrfs_root *root;
struct btrfs_fs_info *fs_info;
struct list_head new_bgs;

View File

@ -30,6 +30,7 @@
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
#include "hash.h"
/*
* Error message should follow the following format:
@ -222,6 +223,142 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
return 0;
}
/*
* Customized reported for dir_item, only important new info is key->objectid,
* which represents inode number
*/
__printf(4, 5)
static void dir_item_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct btrfs_key key;
struct va_format vaf;
va_list args;
btrfs_item_key_to_cpu(eb, &key, slot);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
va_end(args);
}
static int check_dir_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_dir_item *di;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u32 cur = 0;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
while (cur < item_size) {
u32 name_len;
u32 data_len;
u32 max_name_len;
u32 total_size;
u32 name_hash;
u8 dir_type;
/* header itself should not cross item boundary */
if (cur + sizeof(*di) > item_size) {
dir_item_err(root, leaf, slot,
"dir item header crosses item boundary, have %zu boundary %u",
cur + sizeof(*di), item_size);
return -EUCLEAN;
}
/* dir type check */
dir_type = btrfs_dir_type(leaf, di);
if (dir_type >= BTRFS_FT_MAX) {
dir_item_err(root, leaf, slot,
"invalid dir item type, have %u expect [0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
}
if (key->type == BTRFS_XATTR_ITEM_KEY &&
dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"invalid dir item type for XATTR key, have %u expect %u",
dir_type, BTRFS_FT_XATTR);
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR &&
key->type != BTRFS_XATTR_ITEM_KEY) {
dir_item_err(root, leaf, slot,
"xattr dir type found for non-XATTR key");
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR)
max_name_len = XATTR_NAME_MAX;
else
max_name_len = BTRFS_NAME_LEN;
/* Name/data length check */
name_len = btrfs_dir_name_len(leaf, di);
data_len = btrfs_dir_data_len(leaf, di);
if (name_len > max_name_len) {
dir_item_err(root, leaf, slot,
"dir item name len too long, have %u max %u",
name_len, max_name_len);
return -EUCLEAN;
}
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
dir_item_err(root, leaf, slot,
"dir item name and data len too long, have %u max %u",
name_len + data_len,
BTRFS_MAX_XATTR_SIZE(root->fs_info));
return -EUCLEAN;
}
if (data_len && dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"dir item with invalid data len, have %u expect 0",
data_len);
return -EUCLEAN;
}
total_size = sizeof(*di) + name_len + data_len;
/* header and name/data should not cross item boundary */
if (cur + total_size > item_size) {
dir_item_err(root, leaf, slot,
"dir item data crosses item boundary, have %u boundary %u",
cur + total_size, item_size);
return -EUCLEAN;
}
/*
* Special check for XATTR/DIR_ITEM, as key->offset is name
* hash, should match its name
*/
if (key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_XATTR_ITEM_KEY) {
char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
read_extent_buffer(leaf, namebuf,
(unsigned long)(di + 1), name_len);
name_hash = btrfs_name_hash(namebuf, name_len);
if (key->offset != name_hash) {
dir_item_err(root, leaf, slot,
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
name_hash, key->offset);
return -EUCLEAN;
}
}
cur += total_size;
di = (struct btrfs_dir_item *)((void *)di + total_size);
}
return 0;
}
/*
* Common point to switch the item-specific validation.
*/
@ -238,6 +375,11 @@ static int check_leaf_item(struct btrfs_root *root,
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(root, leaf, key, slot);
break;
}
return ret;
}

View File

@ -1174,19 +1174,15 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
return 0;
}
static int extref_get_fields(struct extent_buffer *eb, int slot,
unsigned long ref_ptr, u32 *namelen, char **name,
u64 *index, u64 *parent_objectid)
static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
u32 *namelen, char **name, u64 *index,
u64 *parent_objectid)
{
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)ref_ptr;
*namelen = btrfs_inode_extref_name_len(eb, extref);
if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name,
*namelen))
return -EIO;
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@ -1201,19 +1197,14 @@ static int extref_get_fields(struct extent_buffer *eb, int slot,
return 0;
}
static int ref_get_fields(struct extent_buffer *eb, int slot,
unsigned long ref_ptr, u32 *namelen, char **name,
u64 *index)
static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
u32 *namelen, char **name, u64 *index)
{
struct btrfs_inode_ref *ref;
ref = (struct btrfs_inode_ref *)ref_ptr;
*namelen = btrfs_inode_ref_name_len(eb, ref);
if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1),
*namelen))
return -EIO;
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@ -1288,8 +1279,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
while (ref_ptr < ref_end) {
if (log_ref_ver) {
ret = extref_get_fields(eb, slot, ref_ptr, &namelen,
&name, &ref_index, &parent_objectid);
ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
&ref_index, &parent_objectid);
/*
* parent object can change from one array
* item to another.
@ -1301,8 +1292,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}
} else {
ret = ref_get_fields(eb, slot, ref_ptr, &namelen,
&name, &ref_index);
ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
&ref_index);
}
if (ret)
goto out;
@ -1836,7 +1827,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
u32 item_size = btrfs_item_size_nr(eb, slot);
struct btrfs_dir_item *di;
@ -1849,8 +1839,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
if (verify_dir_item(fs_info, eb, slot, di))
return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
if (ret < 0)
@ -2025,11 +2013,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
if (verify_dir_item(fs_info, eb, slot, di)) {
ret = -EIO;
goto out;
}
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
@ -2110,7 +2093,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
const u64 ino)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key search_key;
struct btrfs_path *log_path;
int i;
@ -2152,11 +2134,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
u32 this_len = sizeof(*di) + name_len + data_len;
char *name;
ret = verify_dir_item(fs_info, path->nodes[0], i, di);
if (ret) {
ret = -EIO;
goto out;
}
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
ret = -ENOMEM;
@ -4574,12 +4551,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
this_len = sizeof(*extref) + this_name_len;
}
ret = btrfs_is_name_len_valid(eb, slot, name_ptr,
this_name_len);
if (!ret) {
ret = -EIO;
goto out;
}
if (this_name_len > name_len) {
char *new_name;
@ -5434,11 +5405,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *parent,
const loff_t start,
const loff_t end,
int exists_only,
int inode_only,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
struct dentry *old_parent = NULL;
int ret = 0;
@ -5604,7 +5574,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
parent, start, end, 0, ctx);
parent, start, end, LOG_INODE_ALL, ctx);
dput(parent);
return ret;
@ -5867,6 +5837,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
return 0;
return btrfs_log_inode_parent(trans, root, inode, parent, 0,
LLONG_MAX, 1, NULL);
LLONG_MAX, LOG_INODE_EXISTS, NULL);
}

File diff suppressed because it is too large Load Diff

View File

@ -47,6 +47,12 @@ struct btrfs_pending_bios {
#define btrfs_device_data_ordered_init(device) do { } while (0)
#endif
#define BTRFS_DEV_STATE_WRITEABLE (0)
#define BTRFS_DEV_STATE_IN_FS_METADATA (1)
#define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
@ -69,11 +75,7 @@ struct btrfs_device {
/* the mode sent to blkdev_get */
fmode_t mode;
int writeable;
int in_fs_metadata;
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
unsigned long dev_state;
blk_status_t last_flush_error;
int flush_bio_sent;
@ -129,14 +131,12 @@ struct btrfs_device {
struct completion flush_wait;
/* per-device scrub information */
struct scrub_ctx *scrub_device;
struct scrub_ctx *scrub_ctx;
struct btrfs_work work;
struct rcu_head rcu;
struct work_struct rcu_work;
/* readahead state */
spinlock_t reada_lock;
atomic_t reada_in_flight;
u64 reada_next;
struct reada_zone *reada_curr_zone;
@ -489,15 +489,16 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 chunk_offset);
static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev)
{
return atomic_read(&dev->dev_stats_ccnt);
}
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
atomic_inc(dev->dev_stat_values + index);
/*
* This memory barrier orders stores updating statistics before stores
* updating dev_stats_ccnt.
*
* It pairs with smp_rmb() in btrfs_run_dev_stats().
*/
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
@ -514,7 +515,13 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0);
smp_mb__before_atomic();
/*
* atomic_xchg implies a full memory barriers as per atomic_t.txt:
* - RMW operations that have a return value are fully ordered;
*
* This implicit memory barriers is paired with the smp_rmb in
* btrfs_run_dev_stats
*/
atomic_inc(&dev->dev_stats_ccnt);
return ret;
}
@ -523,6 +530,12 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val)
{
atomic_set(dev->dev_stat_values + index, val);
/*
* This memory barrier orders stores updating statistics before stores
* updating dev_stats_ccnt.
*
* It pairs with smp_rmb() in btrfs_run_dev_stats().
*/
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
@ -540,7 +553,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
#endif

View File

@ -268,7 +268,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct btrfs_key key;
struct inode *inode = d_inode(dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
int ret = 0;
@ -337,11 +336,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
u32 this_len = sizeof(*di) + name_len + data_len;
unsigned long name_ptr = (unsigned long)(di + 1);
if (verify_dir_item(fs_info, leaf, slot, di)) {
ret = -EIO;
goto err;
}
total_size += name_len + 1;
/*
* We are just looking for how big our buffer needs to

View File

@ -43,6 +43,8 @@ struct workspace {
size_t size;
char *buf;
struct list_head list;
ZSTD_inBuffer in_buf;
ZSTD_outBuffer out_buf;
};
static void zstd_free_workspace(struct list_head *ws)
@ -94,8 +96,6 @@ static int zstd_compress_pages(struct list_head *ws,
int nr_pages = 0;
struct page *in_page = NULL; /* The current page to read */
struct page *out_page = NULL; /* The current page to write to */
ZSTD_inBuffer in_buf = { NULL, 0, 0 };
ZSTD_outBuffer out_buf = { NULL, 0, 0 };
unsigned long tot_in = 0;
unsigned long tot_out = 0;
unsigned long len = *total_out;
@ -118,9 +118,9 @@ static int zstd_compress_pages(struct list_head *ws,
/* map in the first page of input data */
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
in_buf.src = kmap(in_page);
in_buf.pos = 0;
in_buf.size = min_t(size_t, len, PAGE_SIZE);
workspace->in_buf.src = kmap(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
/* Allocate and map in the output buffer */
@ -130,14 +130,15 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
out_buf.dst = kmap(out_page);
out_buf.pos = 0;
out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
workspace->out_buf.dst = kmap(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
while (1) {
size_t ret2;
ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf);
ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
&workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@ -146,22 +147,22 @@ static int zstd_compress_pages(struct list_head *ws,
}
/* Check to see if we are making it bigger */
if (tot_in + in_buf.pos > 8192 &&
tot_in + in_buf.pos <
tot_out + out_buf.pos) {
if (tot_in + workspace->in_buf.pos > 8192 &&
tot_in + workspace->in_buf.pos <
tot_out + workspace->out_buf.pos) {
ret = -E2BIG;
goto out;
}
/* We've reached the end of our output range */
if (out_buf.pos >= max_out) {
tot_out += out_buf.pos;
if (workspace->out_buf.pos >= max_out) {
tot_out += workspace->out_buf.pos;
ret = -E2BIG;
goto out;
}
/* Check if we need more output space */
if (out_buf.pos == out_buf.size) {
if (workspace->out_buf.pos == workspace->out_buf.size) {
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
kunmap(out_page);
@ -176,19 +177,20 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
out_buf.dst = kmap(out_page);
out_buf.pos = 0;
out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
workspace->out_buf.dst = kmap(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out,
PAGE_SIZE);
}
/* We've reached the end of the input */
if (in_buf.pos >= len) {
tot_in += in_buf.pos;
if (workspace->in_buf.pos >= len) {
tot_in += workspace->in_buf.pos;
break;
}
/* Check if we need more input */
if (in_buf.pos == in_buf.size) {
if (workspace->in_buf.pos == workspace->in_buf.size) {
tot_in += PAGE_SIZE;
kunmap(in_page);
put_page(in_page);
@ -196,15 +198,15 @@ static int zstd_compress_pages(struct list_head *ws,
start += PAGE_SIZE;
len -= PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
in_buf.src = kmap(in_page);
in_buf.pos = 0;
in_buf.size = min_t(size_t, len, PAGE_SIZE);
workspace->in_buf.src = kmap(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
}
}
while (1) {
size_t ret2;
ret2 = ZSTD_endStream(stream, &out_buf);
ret2 = ZSTD_endStream(stream, &workspace->out_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_endStream returned %d\n",
ZSTD_getErrorCode(ret2));
@ -212,11 +214,11 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
if (ret2 == 0) {
tot_out += out_buf.pos;
tot_out += workspace->out_buf.pos;
break;
}
if (out_buf.pos >= max_out) {
tot_out += out_buf.pos;
if (workspace->out_buf.pos >= max_out) {
tot_out += workspace->out_buf.pos;
ret = -E2BIG;
goto out;
}
@ -235,9 +237,9 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
out_buf.dst = kmap(out_page);
out_buf.pos = 0;
out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
workspace->out_buf.dst = kmap(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
}
if (tot_out >= tot_in) {
@ -273,8 +275,6 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
unsigned long total_out = 0;
ZSTD_inBuffer in_buf = { NULL, 0, 0 };
ZSTD_outBuffer out_buf = { NULL, 0, 0 };
stream = ZSTD_initDStream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
@ -284,18 +284,19 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
in_buf.src = kmap(pages_in[page_in_index]);
in_buf.pos = 0;
in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
workspace->in_buf.src = kmap(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
out_buf.dst = workspace->buf;
out_buf.pos = 0;
out_buf.size = PAGE_SIZE;
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
workspace->out_buf.size = PAGE_SIZE;
while (1) {
size_t ret2;
ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
&workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@ -303,38 +304,38 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
buf_start = total_out;
total_out += out_buf.pos;
out_buf.pos = 0;
total_out += workspace->out_buf.pos;
workspace->out_buf.pos = 0;
ret = btrfs_decompress_buf2page(out_buf.dst, buf_start,
total_out, disk_start, orig_bio);
ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
buf_start, total_out, disk_start, orig_bio);
if (ret == 0)
break;
if (in_buf.pos >= srclen)
if (workspace->in_buf.pos >= srclen)
break;
/* Check if we've hit the end of a frame */
if (ret2 == 0)
break;
if (in_buf.pos == in_buf.size) {
if (workspace->in_buf.pos == workspace->in_buf.size) {
kunmap(pages_in[page_in_index++]);
if (page_in_index >= total_pages_in) {
in_buf.src = NULL;
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
srclen -= PAGE_SIZE;
in_buf.src = kmap(pages_in[page_in_index]);
in_buf.pos = 0;
in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
workspace->in_buf.src = kmap(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
}
}
ret = 0;
zero_fill_bio(orig_bio);
done:
if (in_buf.src)
if (workspace->in_buf.src)
kunmap(pages_in[page_in_index]);
return ret;
}
@ -348,8 +349,6 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
ZSTD_DStream *stream;
int ret = 0;
size_t ret2;
ZSTD_inBuffer in_buf = { NULL, 0, 0 };
ZSTD_outBuffer out_buf = { NULL, 0, 0 };
unsigned long total_out = 0;
unsigned long pg_offset = 0;
char *kaddr;
@ -364,16 +363,17 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(size_t, destlen, PAGE_SIZE);
in_buf.src = data_in;
in_buf.pos = 0;
in_buf.size = srclen;
workspace->in_buf.src = data_in;
workspace->in_buf.pos = 0;
workspace->in_buf.size = srclen;
out_buf.dst = workspace->buf;
out_buf.pos = 0;
out_buf.size = PAGE_SIZE;
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
workspace->out_buf.size = PAGE_SIZE;
ret2 = 1;
while (pg_offset < destlen && in_buf.pos < in_buf.size) {
while (pg_offset < destlen
&& workspace->in_buf.pos < workspace->in_buf.size) {
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
@ -384,7 +384,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
ret = -EIO;
goto finish;
}
ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
&workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@ -393,8 +394,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
}
buf_start = total_out;
total_out += out_buf.pos;
out_buf.pos = 0;
total_out += workspace->out_buf.pos;
workspace->out_buf.pos = 0;
if (total_out <= start_byte)
continue;
@ -405,10 +406,11 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
buf_offset = 0;
bytes = min_t(unsigned long, destlen - pg_offset,
out_buf.size - buf_offset);
workspace->out_buf.size - buf_offset);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes);
memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
bytes);
kunmap_atomic(kaddr);
pg_offset += bytes;

View File

@ -193,7 +193,6 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
__print_flags(flag, "|", \
{ (1 << EXTENT_FLAG_PINNED), "PINNED" },\
{ (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\
{ (1 << EXTENT_FLAG_VACANCY), "VACANCY" },\
{ (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\
{ (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\
{ (1 << EXTENT_FLAG_FILLING), "FILLING" },\

View File

@ -33,7 +33,12 @@ struct btrfs_ioctl_vol_args {
char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_SUBVOL_NAME_MAX 4039
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3)
@ -101,11 +106,7 @@ struct btrfs_ioctl_qgroup_limit_args {
* - BTRFS_IOC_SUBVOL_GETFLAGS
* - BTRFS_IOC_SUBVOL_SETFLAGS
*/
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;

View File

@ -456,6 +456,8 @@ struct btrfs_free_space_header {
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
/*