Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There are some new user features and the usual load of invisible
  enhancements or cleanups.

  New features:

   - extend mount options to specify zlib compression level, -o
     compress=zlib:9

   - v2 of ioctl "extent to inode mapping", addressing a usecase where
     we want to retrieve more but inaccurate results and do the
     postprocessing in userspace, aiding defragmentation or
     deduplication tools

   - populate compression heuristics logic, do data sampling and try to
     guess compressibility by: looking for repeated patterns, counting
     unique byte values and distribution, calculating Shannon entropy;
     this will need more benchmarking and possibly fine tuning, but the
     base should be good enough

   - enable indexing for btrfs as lower filesystem in overlayfs

   - speedup page cache readahead during send on large files

  Internal enhancements:

   - more sanity checks of b-tree items when reading them from disk

   - more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other
     errno or error handling fixes

   - remove some homegrown IO-related logic, that's been obsoleted by
     core block layer changes (batching, plug/unplug, own counters)

   - add ref-verify, optional debugging feature to verify extent
     reference accounting

   - simplify code handling outstanding extents, make it more clear
     where and how the accounting is done

   - make delalloc reservations per-inode, simplify the code and make
     the logic more straightforward

   - extensive cleanup of delayed refs code

  Notable fixes:

   - fix send ioctl on 32bit with 64bit kernel"

* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits)
  btrfs: Fix bug for misused dev_t when lookup in dev state hash table.
  Btrfs: heuristic: add Shannon entropy calculation
  Btrfs: heuristic: add byte core set calculation
  Btrfs: heuristic: add byte set calculation
  Btrfs: heuristic: add detection of repeated data patterns
  Btrfs: heuristic: implement sampling logic
  Btrfs: heuristic: add bucket and sample counters and other defines
  Btrfs: compression: separate heuristic/compression workspaces
  btrfs: move btrfs_truncate_block out of trans handle
  btrfs: don't call btrfs_start_delalloc_roots in flushoncommit
  btrfs: track refs in a rb_tree instead of a list
  btrfs: add a comp_refs() helper
  btrfs: switch args for comp_*_refs
  btrfs: make the delalloc block rsv per inode
  btrfs: add tracepoints for outstanding extents mods
  Btrfs: rework outstanding_extents
  btrfs: increase output size for LOGICAL_INO_V2 ioctl
  btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
  btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents
  btrfs: send: remove unused code
  ...
This commit is contained in:
Linus Torvalds 2017-11-14 13:35:29 -08:00
commit 5cea7647e6
51 changed files with 3361 additions and 1561 deletions

View File

@ -91,3 +91,14 @@ config BTRFS_ASSERT
any of the assertions trip. This is meant for btrfs developers only. any of the assertions trip. This is meant for btrfs developers only.
If unsure, say N. If unsure, say N.
config BTRFS_FS_REF_VERIFY
bool "Btrfs with the ref verify tool compiled in"
depends on BTRFS_FS
default n
help
Enable run-time extent reference verification instrumentation. This
is meant to be used by btrfs developers for tracking down extent
reference problems or verifying they didn't break something.
If unsure, say N.

View File

@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o free-space-tree.o uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \ tests/extent-buffer-tests.o tests/btrfs-tests.o \

View File

@ -67,7 +67,7 @@ struct btrfs_workqueue {
static void normal_work_helper(struct btrfs_work *work); static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \ #define BTRFS_WORK_HELPER(name) \
void btrfs_##name(struct work_struct *arg) \ noinline_for_stack void btrfs_##name(struct work_struct *arg) \
{ \ { \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \ struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \ normal_work); \

View File

@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
const struct extent_buffer *eb, const struct extent_buffer *eb,
const struct btrfs_file_extent_item *fi, const struct btrfs_file_extent_item *fi,
u64 extent_item_pos, u64 extent_item_pos,
struct extent_inode_elem **eie) struct extent_inode_elem **eie,
bool ignore_offset)
{ {
u64 offset = 0; u64 offset = 0;
struct extent_inode_elem *e; struct extent_inode_elem *e;
if (!btrfs_file_extent_compression(eb, fi) && if (!ignore_offset &&
!btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) && !btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) { !btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset; u64 data_offset;
@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
static int find_extent_in_eb(const struct extent_buffer *eb, static int find_extent_in_eb(const struct extent_buffer *eb,
u64 wanted_disk_byte, u64 extent_item_pos, u64 wanted_disk_byte, u64 extent_item_pos,
struct extent_inode_elem **eie) struct extent_inode_elem **eie,
bool ignore_offset)
{ {
u64 disk_byte; u64 disk_byte;
struct btrfs_key key; struct btrfs_key key;
@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
if (disk_byte != wanted_disk_byte) if (disk_byte != wanted_disk_byte)
continue; continue;
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
if (ret < 0) if (ret < 0)
return ret; return ret;
} }
@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents, struct prelim_ref *ref, struct ulist *parents, struct prelim_ref *ref,
int level, u64 time_seq, const u64 *extent_item_pos, int level, u64 time_seq, const u64 *extent_item_pos,
u64 total_refs) u64 total_refs, bool ignore_offset)
{ {
int ret = 0; int ret = 0;
int slot; int slot;
@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (extent_item_pos) { if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi, ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos, *extent_item_pos,
&eie); &eie, ignore_offset);
if (ret < 0) if (ret < 0)
break; break;
} }
@ -510,7 +513,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq, struct btrfs_path *path, u64 time_seq,
struct prelim_ref *ref, struct ulist *parents, struct prelim_ref *ref, struct ulist *parents,
const u64 *extent_item_pos, u64 total_refs) const u64 *extent_item_pos, u64 total_refs,
bool ignore_offset)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_key root_key; struct btrfs_key root_key;
@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
} }
ret = add_all_parents(root, path, parents, ref, level, time_seq, ret = add_all_parents(root, path, parents, ref, level, time_seq,
extent_item_pos, total_refs); extent_item_pos, total_refs, ignore_offset);
out: out:
path->lowest_level = 0; path->lowest_level = 0;
btrfs_release_path(path); btrfs_release_path(path);
@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq, struct btrfs_path *path, u64 time_seq,
struct preftrees *preftrees, struct preftrees *preftrees,
const u64 *extent_item_pos, u64 total_refs, const u64 *extent_item_pos, u64 total_refs,
struct share_check *sc) struct share_check *sc, bool ignore_offset)
{ {
int err; int err;
int ret = 0; int ret = 0;
@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
} }
err = resolve_indirect_ref(fs_info, path, time_seq, ref, err = resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos, parents, extent_item_pos,
total_refs); total_refs, ignore_offset);
/* /*
* we can only tolerate ENOENT,otherwise,we should catch error * we can only tolerate ENOENT,otherwise,we should catch error
* and return directly. * and return directly.
@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_key key; struct btrfs_key key;
struct btrfs_key tmp_op_key; struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL; struct btrfs_key *op_key = NULL;
struct rb_node *n;
int count; int count;
int ret = 0; int ret = 0;
@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
} }
spin_lock(&head->lock); spin_lock(&head->lock);
list_for_each_entry(node, &head->ref_list, list) { for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
if (node->seq > seq) if (node->seq > seq)
continue; continue;
@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
* *
* Otherwise this returns 0 for success and <0 for an error. * Otherwise this returns 0 for success and <0 for an error.
* *
* If ignore_offset is set to false, only extent refs whose offsets match
* extent_item_pos are returned. If true, every extent ref is returned
* and extent_item_pos is ignored.
*
* FIXME some caching might speed things up * FIXME some caching might speed things up
*/ */
static int find_parent_nodes(struct btrfs_trans_handle *trans, static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist *refs, u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos, struct ulist *roots, const u64 *extent_item_pos,
struct share_check *sc) struct share_check *sc, bool ignore_offset)
{ {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_path *path; struct btrfs_path *path;
@ -1178,7 +1189,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) { if (head) {
if (!mutex_trylock(&head->mutex)) { if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->node.refs); refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
btrfs_release_path(path); btrfs_release_path(path);
@ -1189,7 +1200,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
*/ */
mutex_lock(&head->mutex); mutex_lock(&head->mutex);
mutex_unlock(&head->mutex); mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref_head(head);
goto again; goto again;
} }
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
@ -1235,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root)); WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
extent_item_pos, total_refs, sc); extent_item_pos, total_refs, sc, ignore_offset);
if (ret) if (ret)
goto out; goto out;
@ -1282,7 +1293,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_tree_read_lock(eb); btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr, ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie); *extent_item_pos, &eie, ignore_offset);
btrfs_tree_read_unlock_blocking(eb); btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
if (ret < 0) if (ret < 0)
@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **leafs, u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos) const u64 *extent_item_pos, bool ignore_offset)
{ {
int ret; int ret;
@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM; return -ENOMEM;
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
*leafs, NULL, extent_item_pos, NULL); *leafs, NULL, extent_item_pos, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) { if (ret < 0 && ret != -ENOENT) {
free_leaf_list(*leafs); free_leaf_list(*leafs);
return ret; return ret;
@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/ */
static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots) u64 time_seq, struct ulist **roots,
bool ignore_offset)
{ {
struct ulist *tmp; struct ulist *tmp;
struct ulist_node *node = NULL; struct ulist_node *node = NULL;
@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter); ULIST_ITER_INIT(&uiter);
while (1) { while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
tmp, *roots, NULL, NULL); tmp, *roots, NULL, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) { if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp); ulist_free(tmp);
ulist_free(*roots); ulist_free(*roots);
@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots) u64 time_seq, struct ulist **roots,
bool ignore_offset)
{ {
int ret; int ret;
if (!trans) if (!trans)
down_read(&fs_info->commit_root_sem); down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots); time_seq, roots, ignore_offset);
if (!trans) if (!trans)
up_read(&fs_info->commit_root_sem); up_read(&fs_info->commit_root_sem);
return ret; return ret;
@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
ULIST_ITER_INIT(&uiter); ULIST_ITER_INIT(&uiter);
while (1) { while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared); roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) { if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */ /* this is the only condition under which we return 1 */
ret = 1; ret = 1;
@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info, int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid, u64 extent_item_pos, u64 extent_item_objectid, u64 extent_item_pos,
int search_commit_root, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx) iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{ {
int ret; int ret;
struct btrfs_trans_handle *trans = NULL; struct btrfs_trans_handle *trans = NULL;
@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs, tree_mod_seq_elem.seq, &refs,
&extent_item_pos); &extent_item_pos, ignore_offset);
if (ret) if (ret)
goto out; goto out;
ULIST_ITER_INIT(&ref_uiter); ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
tree_mod_seq_elem.seq, &roots); tree_mod_seq_elem.seq, &roots,
ignore_offset);
if (ret) if (ret)
break; break;
ULIST_ITER_INIT(&root_uiter); ULIST_ITER_INIT(&root_uiter);
@ -1943,7 +1958,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path, struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx) iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{ {
int ret; int ret;
u64 extent_item_pos; u64 extent_item_pos;
@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid; extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid, ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root, extent_item_pos, search_commit_root,
iterate, ctx); iterate, ctx, ignore_offset);
return ret; return ret;
} }

View File

@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info, int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid, u64 extent_item_objectid,
u64 extent_offset, int search_commit_root, u64 extent_offset, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx); iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path, struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx); iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots); u64 time_seq, struct ulist **roots, bool ignore_offset);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off, u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent, struct extent_buffer *eb_in, u64 parent,

View File

@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1 #define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2 #define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3 #define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4 #define BTRFS_INODE_HAS_ORPHAN_ITEM 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 #define BTRFS_INODE_HAS_ASYNC_EXTENT 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 #define BTRFS_INODE_NEEDS_FULL_SYNC 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_COPY_EVERYTHING 7
#define BTRFS_INODE_COPY_EVERYTHING 8 #define BTRFS_INODE_IN_DELALLOC_LIST 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 10
#define BTRFS_INODE_HAS_PROPS 11
/* in memory btrfs inode */ /* in memory btrfs inode */
struct btrfs_inode { struct btrfs_inode {
@ -176,7 +175,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for. * of extent items we've reserved metadata for.
*/ */
unsigned outstanding_extents; unsigned outstanding_extents;
unsigned reserved_extents;
struct btrfs_block_rsv block_rsv;
/* /*
* Cached values of inode properties * Cached values of inode properties
@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
return false; return false;
} }
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
int mod)
{
lockdep_assert_held(&inode->lock);
inode->outstanding_extents += mod;
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
mod);
}
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{ {
int ret = 0; int ret = 0;

View File

@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state_hashtable *h) struct btrfsic_dev_state_hashtable *h)
{ {
const unsigned int hashval = const unsigned int hashval =
(((unsigned int)((uintptr_t)ds->bdev)) & (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
list_add(&ds->collision_resolving_node, h->table + hashval); list_add(&ds->collision_resolving_node, h->table + hashval);
@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex); mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before /* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */ * btrfsic_mount(), this might return NULL */
dev_state = btrfsic_dev_state_lookup(bio_dev(bio)); dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
if (NULL != dev_state && if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0; unsigned int i = 0;
@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
state = kvzalloc(sizeof(*state), GFP_KERNEL); state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) { if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n"); pr_info("btrfs check-integrity: allocation failed!\n");
return -1; return -ENOMEM;
} }
if (!btrfsic_is_initialized) { if (!btrfsic_is_initialized) {
@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
if (NULL == ds) { if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n"); pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex); mutex_unlock(&btrfsic_mutex);
return -1; return -ENOMEM;
} }
ds->bdev = device->bdev; ds->bdev = device->bdev;
ds->state = state; ds->state = state;

View File

@ -33,6 +33,8 @@
#include <linux/bit_spinlock.h> #include <linux/bit_spinlock.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start, cb->start,
cb->start + cb->len - 1, cb->start + cb->len - 1,
NULL, NULL,
bio->bi_status ? 0 : 1); bio->bi_status ?
BLK_STS_OK : BLK_STS_NOTSUPP);
cb->compressed_pages[0]->mapping = NULL; cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb); end_compressed_writeback(inode, cb);
@ -706,7 +709,86 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
return ret; return ret;
} }
static struct { /*
* Heuristic uses systematic sampling to collect data from the input data
* range, the logic can be tuned by the following constants:
*
* @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
* @SAMPLING_INTERVAL - range from which the sampled data can be collected
*/
#define SAMPLING_READ_SIZE (16)
#define SAMPLING_INTERVAL (256)
/*
* For statistical analysis of the input data we consider bytes that form a
* Galois Field of 256 objects. Each object has an attribute count, ie. how
* many times the object appeared in the sample.
*/
#define BUCKET_SIZE (256)
/*
* The size of the sample is based on a statistical sampling rule of thumb.
* The common way is to perform sampling tests as long as the number of
* elements in each cell is at least 5.
*
* Instead of 5, we choose 32 to obtain more accurate results.
* If the data contain the maximum number of symbols, which is 256, we obtain a
* sample size bound by 8192.
*
* For a sample of at most 8KB of data per data range: 16 consecutive bytes
* from up to 512 locations.
*/
#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \
SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
struct bucket_item {
u32 count;
};
struct heuristic_ws {
/* Partial copy of input data */
u8 *sample;
u32 sample_size;
/* Buckets store counters for each byte value */
struct bucket_item *bucket;
struct list_head list;
};
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
workspace = list_entry(ws, struct heuristic_ws, list);
kvfree(workspace->sample);
kfree(workspace->bucket);
kfree(workspace);
}
static struct list_head *alloc_heuristic_ws(void)
{
struct heuristic_ws *ws;
ws = kzalloc(sizeof(*ws), GFP_KERNEL);
if (!ws)
return ERR_PTR(-ENOMEM);
ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
if (!ws->sample)
goto fail;
ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
if (!ws->bucket)
goto fail;
INIT_LIST_HEAD(&ws->list);
return &ws->list;
fail:
free_heuristic_ws(&ws->list);
return ERR_PTR(-ENOMEM);
}
struct workspaces_list {
struct list_head idle_ws; struct list_head idle_ws;
spinlock_t ws_lock; spinlock_t ws_lock;
/* Number of free workspaces */ /* Number of free workspaces */
@ -715,7 +797,11 @@ static struct {
atomic_t total_ws; atomic_t total_ws;
/* Waiters for a free workspace */ /* Waiters for a free workspace */
wait_queue_head_t ws_wait; wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; };
static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
static struct workspaces_list btrfs_heuristic_ws;
static const struct btrfs_compress_op * const btrfs_compress_op[] = { static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zlib_compress, &btrfs_zlib_compress,
@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
void __init btrfs_init_compress(void) void __init btrfs_init_compress(void)
{ {
struct list_head *workspace;
int i; int i;
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
struct list_head *workspace; spin_lock_init(&btrfs_heuristic_ws.ws_lock);
atomic_set(&btrfs_heuristic_ws.total_ws, 0);
init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
workspace = alloc_heuristic_ws();
if (IS_ERR(workspace)) {
pr_warn(
"BTRFS: cannot preallocate heuristic workspace, will try later\n");
} else {
atomic_set(&btrfs_heuristic_ws.total_ws, 1);
btrfs_heuristic_ws.free_ws = 1;
list_add(workspace, &btrfs_heuristic_ws.idle_ws);
}
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock); spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].total_ws, 0); atomic_set(&btrfs_comp_ws[i].total_ws, 0);
@ -756,18 +856,32 @@ void __init btrfs_init_compress(void)
* Preallocation makes a forward progress guarantees and we do not return * Preallocation makes a forward progress guarantees and we do not return
* errors. * errors.
*/ */
static struct list_head *find_workspace(int type) static struct list_head *__find_workspace(int type, bool heuristic)
{ {
struct list_head *workspace; struct list_head *workspace;
int cpus = num_online_cpus(); int cpus = num_online_cpus();
int idx = type - 1; int idx = type - 1;
unsigned nofs_flag; unsigned nofs_flag;
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
if (heuristic) {
idle_ws = &btrfs_heuristic_ws.idle_ws;
ws_lock = &btrfs_heuristic_ws.ws_lock;
total_ws = &btrfs_heuristic_ws.total_ws;
ws_wait = &btrfs_heuristic_ws.ws_wait;
free_ws = &btrfs_heuristic_ws.free_ws;
} else {
idle_ws = &btrfs_comp_ws[idx].idle_ws;
ws_lock = &btrfs_comp_ws[idx].ws_lock;
total_ws = &btrfs_comp_ws[idx].total_ws;
ws_wait = &btrfs_comp_ws[idx].ws_wait;
free_ws = &btrfs_comp_ws[idx].free_ws;
}
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *free_ws = &btrfs_comp_ws[idx].free_ws;
again: again:
spin_lock(ws_lock); spin_lock(ws_lock);
if (!list_empty(idle_ws)) { if (!list_empty(idle_ws)) {
@ -797,7 +911,10 @@ static struct list_head *find_workspace(int type)
* context of btrfs_compress_bio/btrfs_compress_pages * context of btrfs_compress_bio/btrfs_compress_pages
*/ */
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
workspace = btrfs_compress_op[idx]->alloc_workspace(); if (heuristic)
workspace = alloc_heuristic_ws();
else
workspace = btrfs_compress_op[idx]->alloc_workspace();
memalloc_nofs_restore(nofs_flag); memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) { if (IS_ERR(workspace)) {
@ -828,18 +945,38 @@ static struct list_head *find_workspace(int type)
return workspace; return workspace;
} }
static struct list_head *find_workspace(int type)
{
return __find_workspace(type, false);
}
/* /*
* put a workspace struct back on the list or free it if we have enough * put a workspace struct back on the list or free it if we have enough
* idle ones sitting around * idle ones sitting around
*/ */
static void free_workspace(int type, struct list_head *workspace) static void __free_workspace(int type, struct list_head *workspace,
bool heuristic)
{ {
int idx = type - 1; int idx = type - 1;
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; struct list_head *idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; spinlock_t *ws_lock;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws; atomic_t *total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; wait_queue_head_t *ws_wait;
int *free_ws = &btrfs_comp_ws[idx].free_ws; int *free_ws;
if (heuristic) {
idle_ws = &btrfs_heuristic_ws.idle_ws;
ws_lock = &btrfs_heuristic_ws.ws_lock;
total_ws = &btrfs_heuristic_ws.total_ws;
ws_wait = &btrfs_heuristic_ws.ws_wait;
free_ws = &btrfs_heuristic_ws.free_ws;
} else {
idle_ws = &btrfs_comp_ws[idx].idle_ws;
ws_lock = &btrfs_comp_ws[idx].ws_lock;
total_ws = &btrfs_comp_ws[idx].total_ws;
ws_wait = &btrfs_comp_ws[idx].ws_wait;
free_ws = &btrfs_comp_ws[idx].free_ws;
}
spin_lock(ws_lock); spin_lock(ws_lock);
if (*free_ws <= num_online_cpus()) { if (*free_ws <= num_online_cpus()) {
@ -850,7 +987,10 @@ static void free_workspace(int type, struct list_head *workspace)
} }
spin_unlock(ws_lock); spin_unlock(ws_lock);
btrfs_compress_op[idx]->free_workspace(workspace); if (heuristic)
free_heuristic_ws(workspace);
else
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(total_ws); atomic_dec(total_ws);
wake: wake:
/* /*
@ -861,6 +1001,11 @@ static void free_workspace(int type, struct list_head *workspace)
wake_up(ws_wait); wake_up(ws_wait);
} }
static void free_workspace(int type, struct list_head *ws)
{
return __free_workspace(type, ws, false);
}
/* /*
* cleanup function for module exit * cleanup function for module exit
*/ */
@ -869,6 +1014,13 @@ static void free_workspaces(void)
struct list_head *workspace; struct list_head *workspace;
int i; int i;
while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
workspace = btrfs_heuristic_ws.idle_ws.next;
list_del(workspace);
free_heuristic_ws(workspace);
atomic_dec(&btrfs_heuristic_ws.total_ws);
}
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) { while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
workspace = btrfs_comp_ws[i].idle_ws.next; workspace = btrfs_comp_ws[i].idle_ws.next;
@ -883,6 +1035,11 @@ static void free_workspaces(void)
* Given an address space and start and length, compress the bytes into @pages * Given an address space and start and length, compress the bytes into @pages
* that are allocated on demand. * that are allocated on demand.
* *
* @type_level is encoded algorithm and level, where level 0 means whatever
* default the algorithm chooses and is opaque here;
* - compression algo are 0-3
* - the level are bits 4-7
*
* @out_pages is an in/out parameter, holds maximum number of pages to allocate * @out_pages is an in/out parameter, holds maximum number of pages to allocate
* and returns number of actually allocated pages * and returns number of actually allocated pages
* *
@ -897,7 +1054,7 @@ static void free_workspaces(void)
* @max_out tells us the max number of bytes that we're allowed to * @max_out tells us the max number of bytes that we're allowed to
* stuff into pages * stuff into pages
*/ */
int btrfs_compress_pages(int type, struct address_space *mapping, int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages, u64 start, struct page **pages,
unsigned long *out_pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_in,
@ -905,9 +1062,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
{ {
struct list_head *workspace; struct list_head *workspace;
int ret; int ret;
int type = type_level & 0xF;
workspace = find_workspace(type); workspace = find_workspace(type);
btrfs_compress_op[type - 1]->set_level(workspace, type_level);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, pages, start, pages,
out_pages, out_pages,
@ -1065,6 +1224,211 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
return 1; return 1;
} }
/*
* Shannon Entropy calculation
*
* Pure byte distribution analysis fails to determine compressiability of data.
* Try calculating entropy to estimate the average minimum number of bits
* needed to encode the sampled data.
*
* For convenience, return the percentage of needed bits, instead of amount of
* bits directly.
*
* @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
* and can be compressible with high probability
*
* @ENTROPY_LVL_HIGH - data are not compressible with high probability
*
* Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
*/
#define ENTROPY_LVL_ACEPTABLE (65)
#define ENTROPY_LVL_HIGH (80)
/*
* For increasead precision in shannon_entropy calculation,
* let's do pow(n, M) to save more digits after comma:
*
* - maximum int bit length is 64
* - ilog2(MAX_SAMPLE_SIZE) -> 13
* - 13 * 4 = 52 < 64 -> M = 4
*
* So use pow(n, 4).
*/
static inline u32 ilog2_w(u64 n)
{
return ilog2(n * n * n * n);
}
static u32 shannon_entropy(struct heuristic_ws *ws)
{
const u32 entropy_max = 8 * ilog2_w(2);
u32 entropy_sum = 0;
u32 p, p_base, sz_base;
u32 i;
sz_base = ilog2_w(ws->sample_size);
for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
p = ws->bucket[i].count;
p_base = ilog2_w(p);
entropy_sum += p * (sz_base - p_base);
}
entropy_sum /= ws->sample_size;
return entropy_sum * 100 / entropy_max;
}
/* Compare buckets by size, ascending */
static int bucket_comp_rev(const void *lv, const void *rv)
{
const struct bucket_item *l = (const struct bucket_item *)lv;
const struct bucket_item *r = (const struct bucket_item *)rv;
return r->count - l->count;
}
/*
* Size of the core byte set - how many bytes cover 90% of the sample
*
* There are several types of structured binary data that use nearly all byte
* values. The distribution can be uniform and counts in all buckets will be
* nearly the same (eg. encrypted data). Unlikely to be compressible.
*
* Other possibility is normal (Gaussian) distribution, where the data could
* be potentially compressible, but we have to take a few more steps to decide
* how much.
*
* @BYTE_CORE_SET_LOW - main part of byte values repeated frequently,
* compression algo can easy fix that
* @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
* probability is not compressible
*/
#define BYTE_CORE_SET_LOW (64)
#define BYTE_CORE_SET_HIGH (200)
static int byte_core_set_size(struct heuristic_ws *ws)
{
u32 i;
u32 coreset_sum = 0;
const u32 core_set_threshold = ws->sample_size * 90 / 100;
struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;
if (coreset_sum > core_set_threshold)
return i;
for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
coreset_sum += bucket[i].count;
if (coreset_sum > core_set_threshold)
break;
}
return i;
}
/*
* Count byte values in buckets.
* This heuristic can detect textual data (configs, xml, json, html, etc).
* Because in most text-like data byte set is restricted to limited number of
* possible characters, and that restriction in most cases makes data easy to
* compress.
*
* @BYTE_SET_THRESHOLD - consider all data within this byte set size:
* less - compressible
* more - need additional analysis
*/
#define BYTE_SET_THRESHOLD (64)
static u32 byte_set_size(const struct heuristic_ws *ws)
{
u32 i;
u32 byte_set_size = 0;
for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
if (ws->bucket[i].count > 0)
byte_set_size++;
}
/*
* Continue collecting count of byte values in buckets. If the byte
* set size is bigger then the threshold, it's pointless to continue,
* the detection technique would fail for this type of data.
*/
for (; i < BUCKET_SIZE; i++) {
if (ws->bucket[i].count > 0) {
byte_set_size++;
if (byte_set_size > BYTE_SET_THRESHOLD)
return byte_set_size;
}
}
return byte_set_size;
}
static bool sample_repeated_patterns(struct heuristic_ws *ws)
{
const u32 half_of_sample = ws->sample_size / 2;
const u8 *data = ws->sample;
return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
}
static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
struct heuristic_ws *ws)
{
struct page *page;
u64 index, index_end;
u32 i, curr_sample_pos;
u8 *in_data;
/*
* Compression handles the input data by chunks of 128KiB
* (defined by BTRFS_MAX_UNCOMPRESSED)
*
* We do the same for the heuristic and loop over the whole range.
*
* MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
* process no more than BTRFS_MAX_UNCOMPRESSED at a time.
*/
if (end - start > BTRFS_MAX_UNCOMPRESSED)
end = start + BTRFS_MAX_UNCOMPRESSED;
index = start >> PAGE_SHIFT;
index_end = end >> PAGE_SHIFT;
/* Don't miss unaligned end */
if (!IS_ALIGNED(end, PAGE_SIZE))
index_end++;
curr_sample_pos = 0;
while (index < index_end) {
page = find_get_page(inode->i_mapping, index);
in_data = kmap(page);
/* Handle case where the start is not aligned to PAGE_SIZE */
i = start % PAGE_SIZE;
while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
/* Don't sample any garbage from the last page */
if (start > end - SAMPLING_READ_SIZE)
break;
memcpy(&ws->sample[curr_sample_pos], &in_data[i],
SAMPLING_READ_SIZE);
i += SAMPLING_INTERVAL;
start += SAMPLING_INTERVAL;
curr_sample_pos += SAMPLING_READ_SIZE;
}
kunmap(page);
put_page(page);
index++;
}
ws->sample_size = curr_sample_pos;
}
/* /*
* Compression heuristic. * Compression heuristic.
* *
@ -1082,18 +1446,87 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
*/ */
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
{ {
u64 index = start >> PAGE_SHIFT; struct list_head *ws_list = __find_workspace(0, true);
u64 end_index = end >> PAGE_SHIFT; struct heuristic_ws *ws;
struct page *page; u32 i;
int ret = 1; u8 byte;
int ret = 0;
while (index <= end_index) { ws = list_entry(ws_list, struct heuristic_ws, list);
page = find_get_page(inode->i_mapping, index);
kmap(page); heuristic_collect_sample(inode, start, end, ws);
kunmap(page);
put_page(page); if (sample_repeated_patterns(ws)) {
index++; ret = 1;
goto out;
} }
memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
for (i = 0; i < ws->sample_size; i++) {
byte = ws->sample[i];
ws->bucket[byte].count++;
}
i = byte_set_size(ws);
if (i < BYTE_SET_THRESHOLD) {
ret = 2;
goto out;
}
i = byte_core_set_size(ws);
if (i <= BYTE_CORE_SET_LOW) {
ret = 3;
goto out;
}
if (i >= BYTE_CORE_SET_HIGH) {
ret = 0;
goto out;
}
i = shannon_entropy(ws);
if (i <= ENTROPY_LVL_ACEPTABLE) {
ret = 4;
goto out;
}
/*
* For the levels below ENTROPY_LVL_HIGH, additional analysis would be
* needed to give green light to compression.
*
* For now just assume that compression at that level is not worth the
* resources because:
*
* 1. it is possible to defrag the data later
*
* 2. the data would turn out to be hardly compressible, eg. 150 byte
* values, every bucket has counter at level ~54. The heuristic would
* be confused. This can happen when data have some internal repeated
* patterns like "abbacbbc...". This can be detected by analyzing
* pairs of bytes, which is too costly.
*/
if (i < ENTROPY_LVL_HIGH) {
ret = 5;
goto out;
} else {
ret = 0;
goto out;
}
out:
__free_workspace(0, ws_list, true);
return ret; return ret;
} }
unsigned int btrfs_compress_str2level(const char *str)
{
if (strncmp(str, "zlib", 4) != 0)
return 0;
/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
return str[5] - '0';
return 0;
}

View File

@ -76,7 +76,7 @@ struct compressed_bio {
void btrfs_init_compress(void); void btrfs_init_compress(void);
void btrfs_exit_compress(void); void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping, int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages, u64 start, struct page **pages,
unsigned long *out_pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_in,
@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags); int mirror_num, unsigned long bio_flags);
unsigned btrfs_compress_str2level(const char *str);
enum btrfs_compression_type { enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_ZLIB = 1,
@ -124,6 +126,8 @@ struct btrfs_compress_op {
struct page *dest_page, struct page *dest_page,
unsigned long start_byte, unsigned long start_byte,
size_t srclen, size_t destlen); size_t srclen, size_t destlen);
void (*set_level)(struct list_head *ws, unsigned int type);
}; };
extern const struct btrfs_compress_op btrfs_zlib_compress; extern const struct btrfs_compress_op btrfs_zlib_compress;

View File

@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
* tree until you end up with a lock on the root. A locked buffer * tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held. * is returned, with a reference held.
*/ */
static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{ {
struct extent_buffer *eb; struct extent_buffer *eb;
@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out; goto out;
} else if (left_end_reached) { } else if (left_end_reached) {
if (right_level == 0) { if (right_level == 0) {
ret = changed_cb(left_root, right_root, ret = changed_cb(left_path, right_path,
left_path, right_path,
&right_key, &right_key,
BTRFS_COMPARE_TREE_DELETED, BTRFS_COMPARE_TREE_DELETED,
ctx); ctx);
@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
continue; continue;
} else if (right_end_reached) { } else if (right_end_reached) {
if (left_level == 0) { if (left_level == 0) {
ret = changed_cb(left_root, right_root, ret = changed_cb(left_path, right_path,
left_path, right_path,
&left_key, &left_key,
BTRFS_COMPARE_TREE_NEW, BTRFS_COMPARE_TREE_NEW,
ctx); ctx);
@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
if (left_level == 0 && right_level == 0) { if (left_level == 0 && right_level == 0) {
cmp = btrfs_comp_cpu_keys(&left_key, &right_key); cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
if (cmp < 0) { if (cmp < 0) {
ret = changed_cb(left_root, right_root, ret = changed_cb(left_path, right_path,
left_path, right_path,
&left_key, &left_key,
BTRFS_COMPARE_TREE_NEW, BTRFS_COMPARE_TREE_NEW,
ctx); ctx);
@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out; goto out;
advance_left = ADVANCE; advance_left = ADVANCE;
} else if (cmp > 0) { } else if (cmp > 0) {
ret = changed_cb(left_root, right_root, ret = changed_cb(left_path, right_path,
left_path, right_path,
&right_key, &right_key,
BTRFS_COMPARE_TREE_DELETED, BTRFS_COMPARE_TREE_DELETED,
ctx); ctx);
@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
result = BTRFS_COMPARE_TREE_CHANGED; result = BTRFS_COMPARE_TREE_CHANGED;
else else
result = BTRFS_COMPARE_TREE_SAME; result = BTRFS_COMPARE_TREE_SAME;
ret = changed_cb(left_root, right_root, ret = changed_cb(left_path, right_path,
left_path, right_path,
&left_key, result, ctx); &left_key, result, ctx);
if (ret < 0) if (ret < 0)
goto out; goto out;

View File

@ -523,7 +523,7 @@ struct btrfs_caching_control {
}; };
/* Once caching_thread() finds this much free space, it will wake up waiters. */ /* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2) #define CACHING_CTL_WAKE_UP SZ_2M
struct btrfs_io_ctl { struct btrfs_io_ctl {
void *cur, *orig; void *cur, *orig;
@ -763,8 +763,6 @@ struct btrfs_fs_info {
* delayed dir index item * delayed dir index item
*/ */
struct btrfs_block_rsv global_block_rsv; struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */ /* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv; struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */ /* block reservation for chunk tree */
@ -790,6 +788,7 @@ struct btrfs_fs_info {
*/ */
unsigned long pending_changes; unsigned long pending_changes;
unsigned long compress_type:4; unsigned long compress_type:4;
unsigned int compress_level;
int commit_interval; int commit_interval;
/* /*
* It is a suggestive number, the read side is safe even it gets a * It is a suggestive number, the read side is safe even it gets a
@ -878,9 +877,6 @@ struct btrfs_fs_info {
rwlock_t tree_mod_log_lock; rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log; struct rb_root tree_mod_log;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages; atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans; atomic_t open_ioctl_trans;
@ -1100,6 +1096,11 @@ struct btrfs_fs_info {
u32 nodesize; u32 nodesize;
u32 sectorsize; u32 sectorsize;
u32 stripesize; u32 stripesize;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
#endif
}; };
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048) #define BTRFS_DEFAULT_MAX_INLINE (2048)
@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf, struct extent_buffer *buf,
u64 parent, int last_ref); u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, struct btrfs_root *root, u64 owner,
u64 offset, u64 ram_bytes, u64 offset, u64 ram_bytes,
struct btrfs_key *ins); struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 flags, u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data); int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset); u64 owner, u64 offset);
@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info); struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset); u64 root_objectid, u64 owner, u64 offset);
@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
u64 *qgroup_reserved, bool use_global_rsv); u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes); void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, int btrfs_delalloc_reserve_space(struct inode *inode,
@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type); unsigned short type);
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
const struct btrfs_key *new_key); const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level, struct btrfs_key *key, int lowest_level,
u64 min_trans); u64 min_trans);
@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_CHANGED, BTRFS_COMPARE_TREE_CHANGED,
BTRFS_COMPARE_TREE_SAME, BTRFS_COMPARE_TREE_SAME,
}; };
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
struct btrfs_path *right_path, struct btrfs_path *right_path,
struct btrfs_key *key, struct btrfs_key *key,
enum btrfs_compare_tree_result result, enum btrfs_compare_tree_result result,

View File

@ -581,36 +581,12 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv; struct btrfs_block_rsv *dst_rsv;
u64 num_bytes; u64 num_bytes;
int ret; int ret;
bool release = false;
src_rsv = trans->block_rsv; src_rsv = trans->block_rsv;
dst_rsv = &fs_info->delayed_block_rsv; dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&inode->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&inode->lock);
}
/* /*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction * btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we * which doesn't reserve space for speed. This is a problem since we
@ -618,7 +594,7 @@ static int btrfs_delayed_inode_reserve_metadata(
* space. * space.
* *
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
* we're accounted for. * we always reserve enough to update the inode item.
*/ */
if (!src_rsv || (!trans->bytes_reserved && if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
} }
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
* migrated from their block rsv. If they go to release their
* reservation, that will decrease the size as well, so if migrate
* reduced size we'd end up with a negative size. But for the
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
* but we could in fact do this reserve/migrate dance several times
* between the time we did the original reservation and we'd clean it
* up. So to take care of this, release the space for the meta
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
if (!ret) { if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_inode", trace_btrfs_space_reservation(fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1); btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes; node->bytes_reserved = num_bytes;
} }
if (release) {
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
}
return ret; return ret;
} }

View File

@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
/* /*
* compare two delayed tree backrefs with same bytenr and type * compare two delayed tree backrefs with same bytenr and type
*/ */
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
struct btrfs_delayed_tree_ref *ref1, int type) struct btrfs_delayed_tree_ref *ref2)
{ {
if (type == BTRFS_TREE_BLOCK_REF_KEY) { if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
if (ref1->root < ref2->root) if (ref1->root < ref2->root)
return -1; return -1;
if (ref1->root > ref2->root) if (ref1->root > ref2->root)
@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
/* /*
* compare two delayed data backrefs with same bytenr and type * compare two delayed data backrefs with same bytenr and type
*/ */
static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
struct btrfs_delayed_data_ref *ref1) struct btrfs_delayed_data_ref *ref2)
{ {
if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
if (ref1->root < ref2->root) if (ref1->root < ref2->root)
@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
return 0; return 0;
} }
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
struct btrfs_delayed_ref_node *ref2,
bool check_seq)
{
int ret = 0;
if (ref1->type < ref2->type)
return -1;
if (ref1->type > ref2->type)
return 1;
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
btrfs_delayed_node_to_tree_ref(ref2));
else
ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
btrfs_delayed_node_to_data_ref(ref2));
if (ret)
return ret;
if (check_seq) {
if (ref1->seq < ref2->seq)
return -1;
if (ref1->seq > ref2->seq)
return 1;
}
return 0;
}
/* insert a new ref to head ref rbtree */ /* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node) struct rb_node *node)
@ -96,15 +124,43 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
u64 bytenr; u64 bytenr;
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
bytenr = ins->node.bytenr; bytenr = ins->bytenr;
while (*p) { while (*p) {
parent_node = *p; parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
href_node); href_node);
if (bytenr < entry->node.bytenr) if (bytenr < entry->bytenr)
p = &(*p)->rb_left; p = &(*p)->rb_left;
else if (bytenr > entry->node.bytenr) else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
return entry;
}
rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
struct btrfs_delayed_ref_node *ins)
{
struct rb_node **p = &root->rb_node;
struct rb_node *node = &ins->ref_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
while (*p) {
int comp;
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
ref_node);
comp = comp_refs(ins, entry, true);
if (comp < 0)
p = &(*p)->rb_left;
else if (comp > 0)
p = &(*p)->rb_right; p = &(*p)->rb_right;
else else
return entry; return entry;
@ -133,15 +189,15 @@ find_ref_head(struct rb_root *root, u64 bytenr,
while (n) { while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
if (bytenr < entry->node.bytenr) if (bytenr < entry->bytenr)
n = n->rb_left; n = n->rb_left;
else if (bytenr > entry->node.bytenr) else if (bytenr > entry->bytenr)
n = n->rb_right; n = n->rb_right;
else else
return entry; return entry;
} }
if (entry && return_bigger) { if (entry && return_bigger) {
if (bytenr > entry->node.bytenr) { if (bytenr > entry->bytenr) {
n = rb_next(&entry->href_node); n = rb_next(&entry->href_node);
if (!n) if (!n)
n = rb_first(root); n = rb_first(root);
@ -164,17 +220,17 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
if (mutex_trylock(&head->mutex)) if (mutex_trylock(&head->mutex))
return 0; return 0;
refcount_inc(&head->node.refs); refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex); mutex_lock(&head->mutex);
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
if (!head->node.in_tree) { if (RB_EMPTY_NODE(&head->href_node)) {
mutex_unlock(&head->mutex); mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref_head(head);
return -EAGAIN; return -EAGAIN;
} }
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref_head(head);
return 0; return 0;
} }
@ -183,15 +239,11 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref) struct btrfs_delayed_ref_node *ref)
{ {
if (btrfs_delayed_ref_is_head(ref)) { assert_spin_locked(&head->lock);
head = btrfs_delayed_node_to_head(ref); rb_erase(&ref->ref_node, &head->ref_tree);
rb_erase(&head->href_node, &delayed_refs->href_root); RB_CLEAR_NODE(&ref->ref_node);
} else { if (!list_empty(&ref->add_list))
assert_spin_locked(&head->lock); list_del(&ref->add_list);
list_del(&ref->list);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
}
ref->in_tree = 0; ref->in_tree = 0;
btrfs_put_delayed_ref(ref); btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries); atomic_dec(&delayed_refs->num_entries);
@ -206,36 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
u64 seq) u64 seq)
{ {
struct btrfs_delayed_ref_node *next; struct btrfs_delayed_ref_node *next;
struct rb_node *node = rb_next(&ref->ref_node);
bool done = false; bool done = false;
next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, while (!done && node) {
list);
while (!done && &next->list != &head->ref_list) {
int mod; int mod;
struct btrfs_delayed_ref_node *next2;
next2 = list_next_entry(next, list);
if (next == ref)
goto next;
next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
node = rb_next(node);
if (seq && next->seq >= seq) if (seq && next->seq >= seq)
goto next; break;
if (comp_refs(ref, next, false))
if (next->type != ref->type) break;
goto next;
if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
btrfs_delayed_node_to_tree_ref(next),
ref->type))
goto next;
if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
btrfs_delayed_node_to_data_ref(next)))
goto next;
if (ref->action == next->action) { if (ref->action == next->action) {
mod = next->ref_mod; mod = next->ref_mod;
@ -259,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY); ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
} }
next:
next = next2;
} }
return done; return done;
@ -272,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head) struct btrfs_delayed_ref_head *head)
{ {
struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0; u64 seq = 0;
assert_spin_locked(&head->lock); assert_spin_locked(&head->lock);
if (list_empty(&head->ref_list)) if (RB_EMPTY_ROOT(&head->ref_tree))
return; return;
/* We don't have too many refs to merge for data. */ /* We don't have too many refs to merge for data. */
@ -293,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
} }
spin_unlock(&fs_info->tree_mod_seq_lock); spin_unlock(&fs_info->tree_mod_seq_lock);
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, again:
list); for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
while (&ref->list != &head->ref_list) { ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
if (seq && ref->seq >= seq) if (seq && ref->seq >= seq)
goto next;
if (merge_ref(trans, delayed_refs, head, ref, seq)) {
if (list_empty(&head->ref_list))
break;
ref = list_first_entry(&head->ref_list,
struct btrfs_delayed_ref_node,
list);
continue; continue;
} if (merge_ref(trans, delayed_refs, head, ref, seq))
next: goto again;
ref = list_next_entry(ref, list);
} }
} }
@ -380,8 +404,8 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
head->processing = 1; head->processing = 1;
WARN_ON(delayed_refs->num_heads_ready == 0); WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--; delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->node.bytenr + delayed_refs->run_delayed_start = head->bytenr +
head->node.num_bytes; head->num_bytes;
return head; return head;
} }
@ -391,37 +415,19 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
* Return 0 for insert. * Return 0 for insert.
* Return >0 for merge. * Return >0 for merge.
*/ */
static int static int insert_delayed_ref(struct btrfs_trans_handle *trans,
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_root *root, struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_head *href, struct btrfs_delayed_ref_node *ref)
struct btrfs_delayed_ref_node *ref)
{ {
struct btrfs_delayed_ref_node *exist; struct btrfs_delayed_ref_node *exist;
int mod; int mod;
int ret = 0; int ret = 0;
spin_lock(&href->lock); spin_lock(&href->lock);
/* Check whether we can merge the tail node with ref */ exist = tree_insert(&href->ref_tree, ref);
if (list_empty(&href->ref_list)) if (!exist)
goto add_tail; goto inserted;
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
list);
/* No need to compare bytenr nor is_head */
if (exist->type != ref->type || exist->seq != ref->seq)
goto add_tail;
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
btrfs_delayed_node_to_tree_ref(ref),
ref->type))
goto add_tail;
if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
btrfs_delayed_node_to_data_ref(ref)))
goto add_tail;
/* Now we are sure we can merge */ /* Now we are sure we can merge */
ret = 1; ret = 1;
@ -452,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
drop_delayed_ref(trans, root, href, exist); drop_delayed_ref(trans, root, href, exist);
spin_unlock(&href->lock); spin_unlock(&href->lock);
return ret; return ret;
inserted:
add_tail:
list_add_tail(&ref->list, &href->ref_list);
if (ref->action == BTRFS_ADD_DELAYED_REF) if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list); list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries); atomic_inc(&root->num_entries);
@ -469,20 +473,16 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
*/ */
static noinline void static noinline void
update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_node *existing, struct btrfs_delayed_ref_head *existing,
struct btrfs_delayed_ref_node *update, struct btrfs_delayed_ref_head *update,
int *old_ref_mod_ret) int *old_ref_mod_ret)
{ {
struct btrfs_delayed_ref_head *existing_ref;
struct btrfs_delayed_ref_head *ref;
int old_ref_mod; int old_ref_mod;
existing_ref = btrfs_delayed_node_to_head(existing); BUG_ON(existing->is_data != update->is_data);
ref = btrfs_delayed_node_to_head(update);
BUG_ON(existing_ref->is_data != ref->is_data);
spin_lock(&existing_ref->lock); spin_lock(&existing->lock);
if (ref->must_insert_reserved) { if (update->must_insert_reserved) {
/* if the extent was freed and then /* if the extent was freed and then
* reallocated before the delayed ref * reallocated before the delayed ref
* entries were processed, we can end up * entries were processed, we can end up
@ -490,7 +490,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
* the must_insert_reserved flag set. * the must_insert_reserved flag set.
* Set it again here * Set it again here
*/ */
existing_ref->must_insert_reserved = ref->must_insert_reserved; existing->must_insert_reserved = update->must_insert_reserved;
/* /*
* update the num_bytes so we make sure the accounting * update the num_bytes so we make sure the accounting
@ -500,22 +500,22 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
} }
if (ref->extent_op) { if (update->extent_op) {
if (!existing_ref->extent_op) { if (!existing->extent_op) {
existing_ref->extent_op = ref->extent_op; existing->extent_op = update->extent_op;
} else { } else {
if (ref->extent_op->update_key) { if (update->extent_op->update_key) {
memcpy(&existing_ref->extent_op->key, memcpy(&existing->extent_op->key,
&ref->extent_op->key, &update->extent_op->key,
sizeof(ref->extent_op->key)); sizeof(update->extent_op->key));
existing_ref->extent_op->update_key = true; existing->extent_op->update_key = true;
} }
if (ref->extent_op->update_flags) { if (update->extent_op->update_flags) {
existing_ref->extent_op->flags_to_set |= existing->extent_op->flags_to_set |=
ref->extent_op->flags_to_set; update->extent_op->flags_to_set;
existing_ref->extent_op->update_flags = true; existing->extent_op->update_flags = true;
} }
btrfs_free_delayed_extent_op(ref->extent_op); btrfs_free_delayed_extent_op(update->extent_op);
} }
} }
/* /*
@ -523,23 +523,23 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
* only need the lock for this case cause we could be processing it * only need the lock for this case cause we could be processing it
* currently, for refs we just added we know we're a-ok. * currently, for refs we just added we know we're a-ok.
*/ */
old_ref_mod = existing_ref->total_ref_mod; old_ref_mod = existing->total_ref_mod;
if (old_ref_mod_ret) if (old_ref_mod_ret)
*old_ref_mod_ret = old_ref_mod; *old_ref_mod_ret = old_ref_mod;
existing->ref_mod += update->ref_mod; existing->ref_mod += update->ref_mod;
existing_ref->total_ref_mod += update->ref_mod; existing->total_ref_mod += update->ref_mod;
/* /*
* If we are going to from a positive ref mod to a negative or vice * If we are going to from a positive ref mod to a negative or vice
* versa we need to make sure to adjust pending_csums accordingly. * versa we need to make sure to adjust pending_csums accordingly.
*/ */
if (existing_ref->is_data) { if (existing->is_data) {
if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0) if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
delayed_refs->pending_csums -= existing->num_bytes; delayed_refs->pending_csums -= existing->num_bytes;
if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0) if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
delayed_refs->pending_csums += existing->num_bytes; delayed_refs->pending_csums += existing->num_bytes;
} }
spin_unlock(&existing_ref->lock); spin_unlock(&existing->lock);
} }
/* /*
@ -550,14 +550,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
static noinline struct btrfs_delayed_ref_head * static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info, add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans, struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord, struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data, int *qrecord_inserted_ret, int action, int is_data, int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod) int *old_ref_mod, int *new_ref_mod)
{ {
struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1; int count_mod = 1;
int must_insert_reserved = 0; int must_insert_reserved = 0;
@ -593,26 +592,21 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */ refcount_set(&head_ref->refs, 1);
refcount_set(&ref->refs, 1); head_ref->bytenr = bytenr;
ref->bytenr = bytenr; head_ref->num_bytes = num_bytes;
ref->num_bytes = num_bytes; head_ref->ref_mod = count_mod;
ref->ref_mod = count_mod;
ref->type = 0;
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved; head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data; head_ref->is_data = is_data;
INIT_LIST_HEAD(&head_ref->ref_list); head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list); INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
head_ref->processing = 0; head_ref->processing = 0;
head_ref->total_ref_mod = count_mod; head_ref->total_ref_mod = count_mod;
head_ref->qgroup_reserved = 0; head_ref->qgroup_reserved = 0;
head_ref->qgroup_ref_root = 0; head_ref->qgroup_ref_root = 0;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
/* Record qgroup extent info if provided */ /* Record qgroup extent info if provided */
if (qrecord) { if (qrecord) {
@ -632,17 +626,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord_inserted = 1; qrecord_inserted = 1;
} }
spin_lock_init(&head_ref->lock); trace_add_delayed_ref_head(fs_info, head_ref, action);
mutex_init(&head_ref->mutex);
trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
existing = htree_insert(&delayed_refs->href_root, existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node); &head_ref->href_node);
if (existing) { if (existing) {
WARN_ON(ref_root && reserved && existing->qgroup_ref_root WARN_ON(ref_root && reserved && existing->qgroup_ref_root
&& existing->qgroup_reserved); && existing->qgroup_reserved);
update_existing_head_ref(delayed_refs, &existing->node, ref, update_existing_head_ref(delayed_refs, existing, head_ref,
old_ref_mod); old_ref_mod);
/* /*
* we've updated the existing ref, free the newly * we've updated the existing ref, free the newly
@ -699,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0; ref->is_head = 0;
ref->in_tree = 1; ref->in_tree = 1;
ref->seq = seq; ref->seq = seq;
INIT_LIST_HEAD(&ref->list); RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list); INIT_LIST_HEAD(&ref->add_list);
full_ref = btrfs_delayed_node_to_tree_ref(ref); full_ref = btrfs_delayed_node_to_tree_ref(ref);
@ -713,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_tree_ref(fs_info, ref, full_ref, action); trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
/* /*
* XXX: memory should be freed at the same level allocated. * XXX: memory should be freed at the same level allocated.
@ -756,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0; ref->is_head = 0;
ref->in_tree = 1; ref->in_tree = 1;
ref->seq = seq; ref->seq = seq;
INIT_LIST_HEAD(&ref->list); RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list); INIT_LIST_HEAD(&ref->add_list);
full_ref = btrfs_delayed_node_to_data_ref(ref); full_ref = btrfs_delayed_node_to_data_ref(ref);
@ -772,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_data_ref(fs_info, ref, full_ref, action); trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
if (ret > 0) if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} }
@ -821,7 +811,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping * insert both the head node and the new ref without dropping
* the spin lock * the spin lock
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, 0, 0, action, 0, bytenr, num_bytes, 0, 0, action, 0,
&qrecord_inserted, old_ref_mod, &qrecord_inserted, old_ref_mod,
new_ref_mod); new_ref_mod);
@ -888,7 +878,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping * insert both the head node and the new ref without dropping
* the spin lock * the spin lock
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, ref_root, reserved, bytenr, num_bytes, ref_root, reserved,
action, 1, &qrecord_inserted, action, 1, &qrecord_inserted,
old_ref_mod, new_ref_mod); old_ref_mod, new_ref_mod);
@ -920,7 +910,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data, NULL, NULL, NULL); extent_op->is_data, NULL, NULL, NULL);

View File

@ -26,18 +26,8 @@
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
/*
* XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
* same ref_node structure.
* Ref_head is in a higher logic level than tree/data ref, and duplicated
* bytenr/num_bytes in ref_node is really a waste or memory, they should be
* referred from ref_head.
* This gets more disgusting after we use list to store tree/data ref in
* ref_head. Must clean this mess up later.
*/
struct btrfs_delayed_ref_node { struct btrfs_delayed_ref_node {
/*data/tree ref use list, stored in ref_head->ref_list. */ struct rb_node ref_node;
struct list_head list;
/* /*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to * If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the * ref_head->ref_add_list, then we do not need to iterate the
@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op {
* reference count modifications we've queued up. * reference count modifications we've queued up.
*/ */
struct btrfs_delayed_ref_head { struct btrfs_delayed_ref_head {
struct btrfs_delayed_ref_node node; u64 bytenr;
u64 num_bytes;
refcount_t refs;
/* /*
* the mutex is held while running the refs, and it is also * the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications. * held when checking the sum of reference modifications.
@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head {
struct mutex mutex; struct mutex mutex;
spinlock_t lock; spinlock_t lock;
struct list_head ref_list; struct rb_root ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list; struct list_head ref_add_list;
@ -115,6 +106,14 @@ struct btrfs_delayed_ref_head {
*/ */
int total_ref_mod; int total_ref_mod;
/*
* This is the current outstanding mod references for this bytenr. This
* is used with lookup_extent_info to get an accurate reference count
* for a bytenr, so it is adjusted as delayed refs are run so that any
* on disk reference count + ref_mod is accurate.
*/
int ref_mod;
/* /*
* For qgroup reserved space freeing. * For qgroup reserved space freeing.
* *
@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
case BTRFS_SHARED_DATA_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY:
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
break; break;
case 0:
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
break;
default: default:
BUG(); BUG();
} }
} }
} }
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
{
if (refcount_dec_and_test(&head->refs))
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
}
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans, struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
@ -282,36 +284,18 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_root *delayed_refs,
u64 seq); u64 seq);
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
*/
static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
{
return node->is_head;
}
/* /*
* helper functions to cast a node into its container * helper functions to cast a node into its container
*/ */
static inline struct btrfs_delayed_tree_ref * static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
{ {
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_tree_ref, node); return container_of(node, struct btrfs_delayed_tree_ref, node);
} }
static inline struct btrfs_delayed_data_ref * static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
{ {
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_data_ref, node); return container_of(node, struct btrfs_delayed_data_ref, node);
} }
static inline struct btrfs_delayed_ref_head *
btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
{
WARN_ON(!btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_ref_head, node);
}
#endif #endif

View File

@ -50,6 +50,8 @@
#include "sysfs.h" #include "sysfs.h"
#include "qgroup.h" #include "qgroup.h"
#include "compression.h" #include "compression.h"
#include "tree-checker.h"
#include "ref-verify.h"
#ifdef CONFIG_X86 #ifdef CONFIG_X86
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret; return ret;
} }
#define CORRUPT(reason, eb, root, slot) \
btrfs_crit(root->fs_info, \
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
reason, btrfs_header_bytenr(eb), root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
struct btrfs_key leaf_key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
* extent buffer alone we can not find out what is the id of the
* corresponding subvolume tree, so we can not figure out if the extent
* buffer corresponds to the root of the relocation tree or not. So skip
* this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
struct btrfs_root *check_root;
key.objectid = btrfs_header_owner(leaf);
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
check_root = btrfs_get_fs_root(fs_info, &key, false);
/*
* The only reason we also check NULL here is that during
* open_ctree() some roots has not yet been set up.
*/
if (!IS_ERR_OR_NULL(check_root)) {
struct extent_buffer *eb;
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
CORRUPT("non-root leaf's nritems is 0",
leaf, check_root, 0);
free_extent_buffer(eb);
return -EIO;
}
free_extent_buffer(eb);
}
return 0;
}
if (nritems == 0)
return 0;
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("invalid item offset size pair", leaf, root, 0);
return -EIO;
}
/*
* Check to make sure each items keys are in the correct order and their
* offsets make sense. We only have to loop through nritems-1 because
* we check the current slot against the next slot, which verifies the
* next slot's offset+size makes sense and that the current's slot
* offset is correct.
*/
for (slot = 0; slot < nritems - 1; slot++) {
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
CORRUPT("bad key order", leaf, root, slot);
return -EIO;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (btrfs_item_offset_nr(leaf, slot) !=
btrfs_item_end_nr(leaf, slot + 1)) {
CORRUPT("slot offset bad", leaf, root, slot);
return -EIO;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("slot end outside of leaf", leaf, root, slot);
return -EIO;
}
}
return 0;
}
static int check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
u64 bytenr;
int ret = 0;
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
"corrupt node: block %llu root %llu nritems %lu",
node->start, root->objectid, nr);
return -EIO;
}
for (slot = 0; slot < nr - 1; slot++) {
bytenr = btrfs_node_blockptr(node, slot);
btrfs_node_key_to_cpu(node, &key, slot);
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
CORRUPT("invalid item slot", node, root, slot);
ret = -EIO;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
CORRUPT("bad key order", node, root, slot);
ret = -EIO;
goto out;
}
}
out:
return ret;
}
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page, u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror) u64 start, u64 end, int mirror)
@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just * that we don't try and read the other copies of this block, just
* return -EIO. * return -EIO.
*/ */
if (found_level == 0 && check_leaf(root, eb)) { if (found_level == 0 && btrfs_check_leaf(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO; ret = -EIO;
} }
if (found_level > 0 && check_node(root, eb)) if (found_level > 0 && btrfs_check_node(root, eb))
ret = -EIO; ret = -EIO;
if (!ret) if (!ret)
@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work)
static void run_one_async_done(struct btrfs_work *work) static void run_one_async_done(struct btrfs_work *work)
{ {
struct btrfs_fs_info *fs_info;
struct async_submit_bio *async; struct async_submit_bio *async;
int limit;
async = container_of(work, struct async_submit_bio, work); async = container_of(work, struct async_submit_bio, work);
fs_info = async->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
/*
* atomic_dec_return implies a barrier for waitqueue_active
*/
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */ /* If an error occurred we just want to clean up the bio and move on */
if (async->status) { if (async->status) {
@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
async->status = 0; async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
if (op_is_sync(bio->bi_opf)) if (op_is_sync(bio->bi_opf))
btrfs_set_work_high_priority(&async->work); btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work); btrfs_queue_work(fs_info->workers, &async->work);
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->nr_async_submits)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0));
}
return 0; return 0;
} }
@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
return ret; return ret;
} }
static int check_async_write(unsigned long bio_flags) static int check_async_write(struct btrfs_inode *bi)
{ {
if (bio_flags & EXTENT_BIO_TREE_LOG) if (atomic_read(&bi->sync_writers))
return 0; return 0;
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (static_cpu_has(X86_FEATURE_XMM4_2)) if (static_cpu_has(X86_FEATURE_XMM4_2))
@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
{ {
struct inode *inode = private_data; struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags); int async = check_async_write(BTRFS_I(inode));
blk_status_t ret; blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) { if (bio_op(bio) != REQ_OP_WRITE) {
@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes; goto fail_delalloc_bytes;
} }
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->trans_list);
@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb,
btrfs_mapping_init(&fs_info->mapping_tree); btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv, btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL); BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
BTRFS_BLOCK_RSV_DELALLOC);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv, btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS); BTRFS_BLOCK_RSV_DELOPS);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0); atomic_set(&fs_info->reada_works_cnt, 0);
@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb,
/* readahead state */ /* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
spin_lock_init(&fs_info->reada_lock); spin_lock_init(&fs_info->reada_lock);
btrfs_init_ref_verify(fs_info);
fs_info->thread_pool_size = min_t(unsigned long, fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8); num_online_cpus() + 2, 8);
INIT_LIST_HEAD(&fs_info->ordered_roots); INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock); spin_lock_init(&fs_info->ordered_root_lock);
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
GFP_KERNEL); GFP_KERNEL);
if (!fs_info->delayed_root) { if (!fs_info->delayed_root) {
@ -2895,12 +2731,13 @@ int open_ctree(struct super_block *sb,
sb->s_bdi->congested_fn = btrfs_congested_fn; sb->s_bdi->congested_fn = btrfs_congested_fn;
sb->s_bdi->congested_data = fs_info; sb->s_bdi->congested_data = fs_info;
sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
sb->s_blocksize = sectorsize; sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize); sb->s_blocksize_bits = blksize_bits(sectorsize);
memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE);
mutex_lock(&fs_info->chunk_mutex); mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(fs_info); ret = btrfs_read_sys_array(fs_info);
@ -3083,6 +2920,9 @@ int open_ctree(struct super_block *sb,
if (ret) if (ret)
goto fail_trans_kthread; goto fail_trans_kthread;
if (btrfs_build_ref_tree(fs_info))
btrfs_err(fs_info, "couldn't build ref tree");
/* do not make disk changes in broken FS or nologreplay is given */ /* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 && if (btrfs_super_log_root(disk_super) != 0 &&
!btrfs_test_opt(fs_info, NOLOGREPLAY)) { !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
@ -3948,6 +3788,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
cleanup_srcu_struct(&fs_info->subvol_srcu); cleanup_srcu_struct(&fs_info->subvol_srcu);
btrfs_free_stripe_hash_table(fs_info); btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
__btrfs_free_block_rsv(root->orphan_block_rsv); __btrfs_free_block_rsv(root->orphan_block_rsv);
root->orphan_block_rsv = NULL; root->orphan_block_rsv = NULL;
@ -4007,7 +3848,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len, buf->len,
fs_info->dirty_metadata_batch); fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
btrfs_print_leaf(buf); btrfs_print_leaf(buf);
ASSERT(0); ASSERT(0);
} }
@ -4272,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
while ((node = rb_first(&delayed_refs->href_root)) != NULL) { while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_node *tmp; struct rb_node *n;
bool pin_bytes = false; bool pin_bytes = false;
head = rb_entry(node, struct btrfs_delayed_ref_head, head = rb_entry(node, struct btrfs_delayed_ref_head,
href_node); href_node);
if (!mutex_trylock(&head->mutex)) { if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->node.refs); refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex); mutex_lock(&head->mutex);
mutex_unlock(&head->mutex); mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref_head(head);
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
continue; continue;
} }
spin_lock(&head->lock); spin_lock(&head->lock);
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list, while ((n = rb_first(&head->ref_tree)) != NULL) {
list) { ref = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
ref->in_tree = 0; ref->in_tree = 0;
list_del(&ref->list); rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list)) if (!list_empty(&ref->add_list))
list_del(&ref->add_list); list_del(&ref->add_list);
atomic_dec(&delayed_refs->num_entries); atomic_dec(&delayed_refs->num_entries);
@ -4304,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (head->processing == 0) if (head->processing == 0)
delayed_refs->num_heads_ready--; delayed_refs->num_heads_ready--;
atomic_dec(&delayed_refs->num_entries); atomic_dec(&delayed_refs->num_entries);
head->node.in_tree = 0;
rb_erase(&head->href_node, &delayed_refs->href_root); rb_erase(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node);
spin_unlock(&head->lock); spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
mutex_unlock(&head->mutex); mutex_unlock(&head->mutex);
if (pin_bytes) if (pin_bytes)
btrfs_pin_extent(fs_info, head->node.bytenr, btrfs_pin_extent(fs_info, head->bytenr,
head->node.num_bytes, 1); head->num_bytes, 1);
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref_head(head);
cond_resched(); cond_resched();
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
} }

File diff suppressed because it is too large Load Diff

View File

@ -110,7 +110,6 @@ struct extent_page_data {
struct bio *bio; struct bio *bio;
struct extent_io_tree *tree; struct extent_io_tree *tree;
get_extent_t *get_extent; get_extent_t *get_extent;
unsigned long bio_flags;
/* tells writepage not to lock the state bits for this range /* tells writepage not to lock the state bits for this range
* it still does the unlocking * it still does the unlocking
@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
*/ */
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc, struct writeback_control *wbc,
struct page *page, sector_t sector, struct page *page, u64 offset,
size_t size, unsigned long offset, size_t size, unsigned long pg_offset,
struct block_device *bdev, struct block_device *bdev,
struct bio **bio_ret, struct bio **bio_ret,
bio_end_io_t end_io_func, bio_end_io_t end_io_func,
@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
int contig = 0; int contig = 0;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_SIZE); size_t page_size = min_t(size_t, size, PAGE_SIZE);
sector_t sector = offset >> 9;
if (bio_ret && *bio_ret) { if (bio_ret && *bio_ret) {
bio = *bio_ret; bio = *bio_ret;
@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
if (prev_bio_flags != bio_flags || !contig || if (prev_bio_flags != bio_flags || !contig ||
force_bio_submit || force_bio_submit ||
merge_bio(tree, page, offset, page_size, bio, bio_flags) || merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) { bio_add_page(bio, page, page_size, pg_offset) < page_size) {
ret = submit_one_bio(bio, mirror_num, prev_bio_flags); ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) { if (ret < 0) {
*bio_ret = NULL; *bio_ret = NULL;
@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
} }
} }
bio = btrfs_bio_alloc(bdev, (u64)sector << 9); bio = btrfs_bio_alloc(bdev, offset);
bio_add_page(bio, page, page_size, offset); bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func; bio->bi_end_io = end_io_func;
bio->bi_private = tree; bio->bi_private = tree;
bio->bi_write_hint = page->mapping->host->i_write_hint; bio->bi_write_hint = page->mapping->host->i_write_hint;
@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree,
u64 last_byte = i_size_read(inode); u64 last_byte = i_size_read(inode);
u64 block_start; u64 block_start;
u64 cur_end; u64 cur_end;
sector_t sector;
struct extent_map *em; struct extent_map *em;
struct block_device *bdev; struct block_device *bdev;
int ret = 0; int ret = 0;
@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree,
} }
while (cur <= end) { while (cur <= end) {
bool force_bio_submit = false; bool force_bio_submit = false;
u64 offset;
if (cur >= last_byte) { if (cur >= last_byte) {
char *userpage; char *userpage;
@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree,
iosize = ALIGN(iosize, blocksize); iosize = ALIGN(iosize, blocksize);
if (this_bio_flag & EXTENT_BIO_COMPRESSED) { if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
disk_io_size = em->block_len; disk_io_size = em->block_len;
sector = em->block_start >> 9; offset = em->block_start;
} else { } else {
sector = (em->block_start + extent_offset) >> 9; offset = em->block_start + extent_offset;
disk_io_size = iosize; disk_io_size = iosize;
} }
bdev = em->bdev; bdev = em->bdev;
@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree,
} }
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL, ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
page, sector, disk_io_size, pg_offset, page, offset, disk_io_size,
bdev, bio, pg_offset, bdev, bio,
end_bio_extent_readpage, mirror_num, end_bio_extent_readpage, mirror_num,
*bio_flags, *bio_flags,
this_bio_flag, this_bio_flag,
@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 extent_offset; u64 extent_offset;
u64 block_start; u64 block_start;
u64 iosize; u64 iosize;
sector_t sector;
struct extent_map *em; struct extent_map *em;
struct block_device *bdev; struct block_device *bdev;
size_t pg_offset = 0; size_t pg_offset = 0;
@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) { while (cur <= end) {
u64 em_end; u64 em_end;
u64 offset;
if (cur >= i_size) { if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook) if (tree->ops && tree->ops->writepage_end_io_hook)
@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
BUG_ON(end < cur); BUG_ON(end < cur);
iosize = min(em_end - cur, end - cur + 1); iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize); iosize = ALIGN(iosize, blocksize);
sector = (em->block_start + extent_offset) >> 9; offset = em->block_start + extent_offset;
bdev = em->bdev; bdev = em->bdev;
block_start = em->block_start; block_start = em->block_start;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
} }
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
page, sector, iosize, pg_offset, page, offset, iosize, pg_offset,
bdev, &epd->bio, bdev, &epd->bio,
end_bio_extent_writepage, end_bio_extent_writepage,
0, 0, 0, false); 0, 0, 0, false);
@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start; u64 offset = eb->start;
u32 nritems; u32 nritems;
unsigned long i, num_pages; unsigned long i, num_pages;
unsigned long bio_flags = 0;
unsigned long start, end; unsigned long start, end;
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0; int ret = 0;
@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len); num_pages = num_extent_pages(eb->start, eb->len);
atomic_set(&eb->io_pages, num_pages); atomic_set(&eb->io_pages, num_pages);
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
/* set btree blocks beyond nritems with 0 to avoid stale content. */ /* set btree blocks beyond nritems with 0 to avoid stale content. */
nritems = btrfs_header_nritems(eb); nritems = btrfs_header_nritems(eb);
@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p); clear_page_dirty_for_io(p);
set_page_writeback(p); set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
p, offset >> 9, PAGE_SIZE, 0, bdev, p, offset, PAGE_SIZE, 0, bdev,
&epd->bio, &epd->bio,
end_bio_extent_buffer_writepage, end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false); 0, 0, 0, false);
epd->bio_flags = bio_flags;
if (ret) { if (ret) {
set_btree_ioerr(p); set_btree_ioerr(p);
if (PageWriteback(p)) if (PageWriteback(p))
@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping,
.tree = tree, .tree = tree,
.extent_locked = 0, .extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL, .sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
}; };
int ret = 0; int ret = 0;
int done = 0; int done = 0;
@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
if (epd->bio) { if (epd->bio) {
int ret; int ret;
ret = submit_one_bio(epd->bio, 0, epd->bio_flags); ret = submit_one_bio(epd->bio, 0, 0);
BUG_ON(ret < 0); /* -ENOMEM */ BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL; epd->bio = NULL;
} }
@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.get_extent = get_extent, .get_extent = get_extent,
.extent_locked = 0, .extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL, .sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
}; };
ret = __extent_writepage(page, wbc, &epd); ret = __extent_writepage(page, wbc, &epd);
@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.get_extent = get_extent, .get_extent = get_extent,
.extent_locked = 1, .extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL, .sync_io = mode == WB_SYNC_ALL,
.bio_flags = 0,
}; };
struct writeback_control wbc_writepages = { struct writeback_control wbc_writepages = {
.sync_mode = mode, .sync_mode = mode,
@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree,
.get_extent = get_extent, .get_extent = get_extent,
.extent_locked = 0, .extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL, .sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
}; };
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,

View File

@ -34,7 +34,6 @@
* type for this bio * type for this bio
*/ */
#define EXTENT_BIO_COMPRESSED 1 #define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
#define EXTENT_BIO_FLAG_SHIFT 16 #define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */ /* these are bit numbers for test/set bit */

View File

@ -856,7 +856,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) { if (update_refs && disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, fs_info, ret = btrfs_inc_extent_ref(trans, root,
disk_bytenr, num_bytes, 0, disk_bytenr, num_bytes, 0,
root->root_key.objectid, root->root_key.objectid,
new_key.objectid, new_key.objectid,
@ -940,7 +940,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent_end = ALIGN(extent_end, extent_end = ALIGN(extent_end,
fs_info->sectorsize); fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) { } else if (update_refs && disk_bytenr > 0) {
ret = btrfs_free_extent(trans, fs_info, ret = btrfs_free_extent(trans, root,
disk_bytenr, num_bytes, 0, disk_bytenr, num_bytes, 0,
root->root_key.objectid, root->root_key.objectid,
key.objectid, key.offset - key.objectid, key.offset -
@ -1234,7 +1234,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end - split); extent_end - split);
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes, ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
0, root->root_key.objectid, 0, root->root_key.objectid,
ino, orig_offset); ino, orig_offset);
if (ret) { if (ret) {
@ -1268,7 +1268,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end = other_end; extent_end = other_end;
del_slot = path->slots[0] + 1; del_slot = path->slots[0] + 1;
del_nr++; del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid, 0, root->root_key.objectid,
ino, orig_offset); ino, orig_offset);
if (ret) { if (ret) {
@ -1288,7 +1288,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
key.offset = other_start; key.offset = other_start;
del_slot = path->slots[0]; del_slot = path->slots[0];
del_nr++; del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid, 0, root->root_key.objectid,
ino, orig_offset); ino, orig_offset);
if (ret) { if (ret) {
@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
int ret = 0; int ret = 0;
bool only_release_metadata = false; bool only_release_metadata = false;
bool force_page_uptodate = false; bool force_page_uptodate = false;
bool need_unlock;
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
PAGE_SIZE / (sizeof(struct page *))); PAGE_SIZE / (sizeof(struct page *)));
@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t copied; size_t copied;
size_t dirty_sectors; size_t dirty_sectors;
size_t num_sectors; size_t num_sectors;
int extents_locked;
WARN_ON(num_pages > nrptrs); WARN_ON(num_pages > nrptrs);
@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
} }
} }
WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
reserve_bytes); reserve_bytes);
if (ret) { if (ret) {
@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
} }
release_bytes = reserve_bytes; release_bytes = reserve_bytes;
need_unlock = false;
again: again:
/* /*
* This is going to setup the pages array with the number of * This is going to setup the pages array with the number of
@ -1679,19 +1679,23 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
ret = prepare_pages(inode, pages, num_pages, ret = prepare_pages(inode, pages, num_pages,
pos, write_bytes, pos, write_bytes,
force_page_uptodate); force_page_uptodate);
if (ret) if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
break; break;
}
ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages, extents_locked = lock_and_cleanup_extent_if_need(
BTRFS_I(inode), pages,
num_pages, pos, write_bytes, &lockstart, num_pages, pos, write_bytes, &lockstart,
&lockend, &cached_state); &lockend, &cached_state);
if (ret < 0) { if (extents_locked < 0) {
if (ret == -EAGAIN) if (extents_locked == -EAGAIN)
goto again; goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
ret = extents_locked;
break; break;
} else if (ret > 0) {
need_unlock = true;
ret = 0;
} }
copied = btrfs_copy_from_user(pos, write_bytes, pages, i); copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
@ -1718,23 +1722,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
PAGE_SIZE); PAGE_SIZE);
} }
/*
* If we had a short copy we need to release the excess delaloc
* bytes we reserved. We need to increment outstanding_extents
* because btrfs_delalloc_release_space and
* btrfs_delalloc_release_metadata will decrement it, but
* we still have an outstanding extent for the chunk we actually
* managed to copy.
*/
if (num_sectors > dirty_sectors) { if (num_sectors > dirty_sectors) {
/* release everything except the sectors we dirtied */ /* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors << release_bytes -= dirty_sectors <<
fs_info->sb->s_blocksize_bits; fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
if (only_release_metadata) { if (only_release_metadata) {
btrfs_delalloc_release_metadata(BTRFS_I(inode), btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes); release_bytes);
@ -1756,10 +1747,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied > 0) if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages, ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, NULL); pos, copied, NULL);
if (need_unlock) if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree, unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state, lockstart, lockend, &cached_state,
GFP_NOFS); GFP_NOFS);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) { if (ret) {
btrfs_drop_pages(pages, num_pages); btrfs_drop_pages(pages, num_pages);
break; break;
@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx; struct btrfs_log_ctx ctx;
int ret = 0, err; int ret = 0, err;
bool full_sync = 0; bool full_sync = false;
u64 len; u64 len;
/* /*

View File

@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group, struct btrfs_block_group_cache *block_group,
struct btrfs_path *path) struct btrfs_path *path)
{ {
u64 start, end;
int ret; int ret;
start = block_group->key.objectid;
end = block_group->key.objectid + block_group->key.offset;
block_group->needs_free_space = 0; block_group->needs_free_space = 0;
ret = add_new_free_space_info(trans, fs_info, block_group, path); ret = add_new_free_space_info(trans, fs_info, block_group, path);

View File

@ -500,11 +500,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint); prealloc, prealloc, &alloc_hint);
if (ret) { if (ret) {
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc); btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
goto out_put; goto out_put;
} }
ret = btrfs_write_out_ino_cache(root, trans, path, inode); ret = btrfs_write_out_ino_cache(root, trans, path, inode);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put: out_put:
iput(inode); iput(inode);
out_release: out_release:

View File

@ -42,6 +42,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h> #include <linux/posix_acl_xattr.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/magic.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
@ -67,7 +68,6 @@ struct btrfs_iget_args {
}; };
struct btrfs_dio_data { struct btrfs_dio_data {
u64 outstanding_extents;
u64 reserve; u64 reserve;
u64 unsubmitted_oe_range_start; u64 unsubmitted_oe_range_start;
u64 unsubmitted_oe_range_end; u64 unsubmitted_oe_range_end;
@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
btrfs_free_path(path); btrfs_free_path(path);
return PTR_ERR(trans); return PTR_ERR(trans);
} }
trans->block_rsv = &fs_info->delalloc_block_rsv; trans->block_rsv = &BTRFS_I(inode)->block_rsv;
if (compressed_size && compressed_pages) if (compressed_size && compressed_pages)
extent_item_size = btrfs_file_extent_calc_inline_size( extent_item_size = btrfs_file_extent_calc_inline_size(
@ -348,7 +348,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
} }
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
out: out:
/* /*
@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 num_bytes;
u64 blocksize = fs_info->sectorsize; u64 blocksize = fs_info->sectorsize;
u64 actual_end; u64 actual_end;
u64 isize = i_size_read(inode); u64 isize = i_size_read(inode);
@ -508,8 +506,6 @@ static noinline void compress_file_range(struct inode *inode,
total_compressed = min_t(unsigned long, total_compressed, total_compressed = min_t(unsigned long, total_compressed,
BTRFS_MAX_UNCOMPRESSED); BTRFS_MAX_UNCOMPRESSED);
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
total_in = 0; total_in = 0;
ret = 0; ret = 0;
@ -542,7 +538,10 @@ static noinline void compress_file_range(struct inode *inode,
*/ */
extent_range_clear_dirty_for_io(inode, start, end); extent_range_clear_dirty_for_io(inode, start, end);
redirty = 1; redirty = 1;
ret = btrfs_compress_pages(compress_type,
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
inode->i_mapping, start, inode->i_mapping, start,
pages, pages,
&nr_pages, &nr_pages,
@ -570,7 +569,7 @@ static noinline void compress_file_range(struct inode *inode,
cont: cont:
if (start == 0) { if (start == 0) {
/* lets try to make an inline extent */ /* lets try to make an inline extent */
if (ret || total_in < (actual_end - start)) { if (ret || total_in < actual_end) {
/* we didn't compress the entire range, try /* we didn't compress the entire range, try
* to make an uncompressed inline extent. * to make an uncompressed inline extent.
*/ */
@ -584,16 +583,21 @@ static noinline void compress_file_range(struct inode *inode,
} }
if (ret <= 0) { if (ret <= 0) {
unsigned long clear_flags = EXTENT_DELALLOC | unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG; EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
unsigned long page_error_op; unsigned long page_error_op;
clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
page_error_op = ret < 0 ? PAGE_SET_ERROR : 0; page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
/* /*
* inline extent creation worked or returned error, * inline extent creation worked or returned error,
* we don't need to create any more async work items. * we don't need to create any more async work items.
* Unlock and free up our temp pages. * Unlock and free up our temp pages.
*
* We use DO_ACCOUNTING here because we need the
* delalloc_release_metadata to be done _after_ we drop
* our outstanding extent for clearing delalloc for this
* range.
*/ */
extent_clear_unlock_delalloc(inode, start, end, end, extent_clear_unlock_delalloc(inode, start, end, end,
NULL, clear_flags, NULL, clear_flags,
@ -602,10 +606,6 @@ static noinline void compress_file_range(struct inode *inode,
PAGE_SET_WRITEBACK | PAGE_SET_WRITEBACK |
page_error_op | page_error_op |
PAGE_END_WRITEBACK); PAGE_END_WRITEBACK);
if (ret == 0)
btrfs_free_reserved_data_space_noquota(inode,
start,
end - start + 1);
goto free_pages_out; goto free_pages_out;
} }
} }
@ -625,7 +625,6 @@ static noinline void compress_file_range(struct inode *inode,
*/ */
total_in = ALIGN(total_in, PAGE_SIZE); total_in = ALIGN(total_in, PAGE_SIZE);
if (total_compressed + blocksize <= total_in) { if (total_compressed + blocksize <= total_in) {
num_bytes = total_in;
*num_added += 1; *num_added += 1;
/* /*
@ -633,12 +632,12 @@ static noinline void compress_file_range(struct inode *inode,
* allocation on disk for these compressed pages, and * allocation on disk for these compressed pages, and
* will submit them to the elevator. * will submit them to the elevator.
*/ */
add_async_extent(async_cow, start, num_bytes, add_async_extent(async_cow, start, total_in,
total_compressed, pages, nr_pages, total_compressed, pages, nr_pages,
compress_type); compress_type);
if (start + num_bytes < end) { if (start + total_in < end) {
start += num_bytes; start += total_in;
pages = NULL; pages = NULL;
cond_resched(); cond_resched();
goto again; goto again;
@ -982,15 +981,19 @@ static noinline int cow_file_range(struct inode *inode,
ret = cow_file_range_inline(root, inode, start, end, 0, ret = cow_file_range_inline(root, inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL); BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) { if (ret == 0) {
/*
* We use DO_ACCOUNTING here because we need the
* delalloc_release_metadata to be run _after_ we drop
* our outstanding extent for clearing delalloc for this
* range.
*/
extent_clear_unlock_delalloc(inode, start, end, extent_clear_unlock_delalloc(inode, start, end,
delalloc_end, NULL, delalloc_end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DEFRAG, PAGE_UNLOCK | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK); PAGE_END_WRITEBACK);
btrfs_free_reserved_data_space_noquota(inode, start,
end - start + 1);
*nr_written = *nr_written + *nr_written = *nr_written +
(end - start + PAGE_SIZE) / PAGE_SIZE; (end - start + PAGE_SIZE) / PAGE_SIZE;
*page_started = 1; *page_started = 1;
@ -1226,13 +1229,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->async_delalloc_pages) ==
0));
}
*nr_written += nr_pages; *nr_written += nr_pages;
start = cur_end + 1; start = cur_end + 1;
} }
@ -1635,7 +1631,7 @@ static void btrfs_split_extent_hook(void *private_data,
} }
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++; btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
} }
@ -1665,7 +1661,7 @@ static void btrfs_merge_extent_hook(void *private_data,
/* we're not bigger than the max, unreserve the space and go */ /* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) { if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--; btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
return; return;
} }
@ -1696,7 +1692,7 @@ static void btrfs_merge_extent_hook(void *private_data,
return; return;
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--; btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
} }
@ -1766,15 +1762,12 @@ static void btrfs_set_bit_hook(void *private_data,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
if (*bits & EXTENT_FIRST_DELALLOC) { spin_lock(&BTRFS_I(inode)->lock);
*bits &= ~EXTENT_FIRST_DELALLOC; btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
} else { spin_unlock(&BTRFS_I(inode)->lock);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
/* For sanity tests */ /* For sanity tests */
if (btrfs_is_testing(fs_info)) if (btrfs_is_testing(fs_info))
@ -1828,13 +1821,9 @@ static void btrfs_clear_bit_hook(void *private_data,
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
bool do_list = !btrfs_is_free_space_inode(inode); bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) { spin_lock(&inode->lock);
*bits &= ~EXTENT_FIRST_DELALLOC; btrfs_mod_outstanding_extents(inode, -num_extents);
} else if (!(*bits & EXTENT_CLEAR_META_RESV)) { spin_unlock(&inode->lock);
spin_lock(&inode->lock);
inode->outstanding_extents -= num_extents;
spin_unlock(&inode->lock);
}
/* /*
* We don't reserve metadata space for space cache inodes so we * We don't reserve metadata space for space cache inodes so we
@ -2105,6 +2094,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
0); 0);
ClearPageChecked(page); ClearPageChecked(page);
set_page_dirty(page); set_page_dirty(page);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
out: out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state, GFP_NOFS); &cached_state, GFP_NOFS);
@ -2229,8 +2219,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret < 0) if (ret < 0)
goto out; goto out;
qg_released = ret; qg_released = ret;
ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, ret = btrfs_alloc_reserved_file_extent(trans, root,
btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins); btrfs_ino(BTRFS_I(inode)),
file_pos, qg_released, &ins);
out: out:
btrfs_free_path(path); btrfs_free_path(path);
@ -2464,7 +2455,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
ret = iterate_inodes_from_logical(old->bytenr + ret = iterate_inodes_from_logical(old->bytenr +
old->extent_offset, fs_info, old->extent_offset, fs_info,
path, record_one_backref, path, record_one_backref,
old); old, false);
if (ret < 0 && ret != -ENOENT) if (ret < 0 && ret != -ENOENT)
return false; return false;
@ -2682,7 +2673,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
inode_add_bytes(inode, len); inode_add_bytes(inode, len);
btrfs_release_path(path); btrfs_release_path(path);
ret = btrfs_inc_extent_ref(trans, fs_info, new->bytenr, ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
new->disk_len, 0, new->disk_len, 0,
backref->root_id, backref->inum, backref->root_id, backref->inum,
new->file_pos); /* start - extent_offset */ new->file_pos); /* start - extent_offset */
@ -2964,7 +2955,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL; trans = NULL;
goto out; goto out;
} }
trans->block_rsv = &fs_info->delalloc_block_rsv; trans->block_rsv = &BTRFS_I(inode)->block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode); ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */ if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
@ -3000,7 +2991,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out; goto out;
} }
trans->block_rsv = &fs_info->delalloc_block_rsv; trans->block_rsv = &BTRFS_I(inode)->block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compress_type = ordered_extent->compress_type; compress_type = ordered_extent->compress_type;
@ -3058,9 +3049,6 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
0, &cached_state, GFP_NOFS); 0, &cached_state, GFP_NOFS);
} }
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
ordered_extent->len);
if (trans) if (trans)
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
@ -4372,47 +4360,11 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
} }
static int truncate_inline_extent(struct inode *inode, /*
struct btrfs_path *path, * Return this if we need to call truncate_block for the last bit of the
struct btrfs_key *found_key, * truncate.
const u64 item_end, */
const u64 new_size) #define NEED_TRUNCATE_BLOCK 1
{
struct extent_buffer *leaf = path->nodes[0];
int slot = path->slots[0];
struct btrfs_file_extent_item *fi;
u32 size = (u32)(new_size - found_key->offset);
struct btrfs_root *root = BTRFS_I(inode)->root;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
loff_t offset = new_size;
loff_t page_end = ALIGN(offset, PAGE_SIZE);
/*
* Zero out the remaining of the last page of our inline extent,
* instead of directly truncating our inline extent here - that
* would be much more complex (decompressing all the data, then
* compressing the truncated data, which might be bigger than
* the size of the inline extent, resize the extent, etc).
* We release the path because to get the page we might need to
* read the extent item from disk (data not in the page cache).
*/
btrfs_release_path(path);
return btrfs_truncate_block(inode, offset, page_end - offset,
0);
}
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size = btrfs_file_extent_calc_inline_size(size);
btrfs_truncate_item(root->fs_info, path, size, 1);
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
inode_sub_bytes(inode, item_end + 1 - new_size);
return 0;
}
/* /*
* this can truncate away extent items, csum items and directory items. * this can truncate away extent items, csum items and directory items.
@ -4451,9 +4403,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int err = 0; int err = 0;
u64 ino = btrfs_ino(BTRFS_I(inode)); u64 ino = btrfs_ino(BTRFS_I(inode));
u64 bytes_deleted = 0; u64 bytes_deleted = 0;
bool be_nice = 0; bool be_nice = false;
bool should_throttle = 0; bool should_throttle = false;
bool should_end = 0; bool should_end = false;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
@ -4463,7 +4415,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
*/ */
if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
test_bit(BTRFS_ROOT_REF_COWS, &root->state)) test_bit(BTRFS_ROOT_REF_COWS, &root->state))
be_nice = 1; be_nice = true;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
@ -4573,11 +4525,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (found_type != BTRFS_EXTENT_DATA_KEY) if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete; goto delete;
if (del_item)
last_size = found_key.offset;
else
last_size = new_size;
if (extent_type != BTRFS_FILE_EXTENT_INLINE) { if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec; u64 num_dec;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@ -4619,40 +4566,30 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
*/ */
if (!del_item && if (!del_item &&
btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 &&
btrfs_file_extent_other_encoding(leaf, fi) == 0) { btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
btrfs_file_extent_compression(leaf, fi) == 0) {
u32 size = (u32)(new_size - found_key.offset);
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size = btrfs_file_extent_calc_inline_size(size);
btrfs_truncate_item(root->fs_info, path, size, 1);
} else if (!del_item) {
/* /*
* Need to release path in order to truncate a * We have to bail so the last_size is set to
* compressed extent. So delete any accumulated * just before this extent.
* extent items so far.
*/ */
if (btrfs_file_extent_compression(leaf, fi) != err = NEED_TRUNCATE_BLOCK;
BTRFS_COMPRESS_NONE && pending_del_nr) { break;
err = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
if (err) {
btrfs_abort_transaction(trans,
err);
goto error;
}
pending_del_nr = 0;
}
err = truncate_inline_extent(inode, path,
&found_key,
item_end,
new_size);
if (err) {
btrfs_abort_transaction(trans, err);
goto error;
}
} else if (test_bit(BTRFS_ROOT_REF_COWS,
&root->state)) {
inode_sub_bytes(inode, item_end + 1 - new_size);
} }
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
inode_sub_bytes(inode, item_end + 1 - new_size);
} }
delete: delete:
if (del_item)
last_size = found_key.offset;
else
last_size = new_size;
if (del_item) { if (del_item) {
if (!pending_del_nr) { if (!pending_del_nr) {
/* no pending yet, add ourselves */ /* no pending yet, add ourselves */
@ -4669,14 +4606,14 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
} else { } else {
break; break;
} }
should_throttle = 0; should_throttle = false;
if (found_extent && if (found_extent &&
(test_bit(BTRFS_ROOT_REF_COWS, &root->state) || (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == fs_info->tree_root)) { root == fs_info->tree_root)) {
btrfs_set_path_blocking(path); btrfs_set_path_blocking(path);
bytes_deleted += extent_num_bytes; bytes_deleted += extent_num_bytes;
ret = btrfs_free_extent(trans, fs_info, extent_start, ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0, extent_num_bytes, 0,
btrfs_header_owner(leaf), btrfs_header_owner(leaf),
ino, extent_offset); ino, extent_offset);
@ -4688,11 +4625,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (be_nice) { if (be_nice) {
if (truncate_space_check(trans, root, if (truncate_space_check(trans, root,
extent_num_bytes)) { extent_num_bytes)) {
should_end = 1; should_end = true;
} }
if (btrfs_should_throttle_delayed_refs(trans, if (btrfs_should_throttle_delayed_refs(trans,
fs_info)) fs_info))
should_throttle = 1; should_throttle = true;
} }
} }
@ -4801,8 +4738,11 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
(!len || ((len & (blocksize - 1)) == 0))) (!len || ((len & (blocksize - 1)) == 0)))
goto out; goto out;
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
round_down(from, blocksize), blocksize); block_start, blocksize);
if (ret) if (ret)
goto out; goto out;
@ -4810,15 +4750,12 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
page = find_or_create_page(mapping, index, mask); page = find_or_create_page(mapping, index, mask);
if (!page) { if (!page) {
btrfs_delalloc_release_space(inode, data_reserved, btrfs_delalloc_release_space(inode, data_reserved,
round_down(from, blocksize), block_start, blocksize);
blocksize); btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page); ret = btrfs_readpage(NULL, page);
lock_page(page); lock_page(page);
@ -4883,6 +4820,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
if (ret) if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start, btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize); blocksize);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
out: out:
@ -7797,33 +7735,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
return em; return em;
} }
static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
{
unsigned num_extents = count_max_extents(len);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (dio_data->outstanding_extents >= num_extents) {
dio_data->outstanding_extents -= num_extents;
} else {
/*
* If dio write length has been split due to no large enough
* contiguous space, we need to compensate our inode counter
* appropriately.
*/
u64 num_needed = num_extents - dio_data->outstanding_extents;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents += num_needed;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
@ -7985,7 +7896,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (!dio_data->overwrite && start + len > i_size_read(inode)) if (!dio_data->overwrite && start + len > i_size_read(inode))
i_size_write(inode, start + len); i_size_write(inode, start + len);
adjust_dio_outstanding_extents(inode, dio_data, len);
WARN_ON(dio_data->reserve < len); WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len; dio_data->reserve -= len;
dio_data->unsubmitted_oe_range_end = start + len; dio_data->unsubmitted_oe_range_end = start + len;
@ -8015,14 +7925,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
err: err:
if (dio_data) if (dio_data)
current->journal_info = dio_data; current->journal_info = dio_data;
/*
* Compensate the delalloc release we do in btrfs_direct_IO() when we
* write less data then expected, so that we don't underflow our inode's
* outstanding extents counter.
*/
if (create && dio_data)
adjust_dio_outstanding_extents(inode, dio_data, len);
return ret; return ret;
} }
@ -8495,7 +8397,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (dip->errors) { if (dip->errors) {
bio_io_error(dip->orig_bio); bio_io_error(dip->orig_bio);
} else { } else {
dip->dio_bio->bi_status = 0; dip->dio_bio->bi_status = BLK_STS_OK;
bio_endio(dip->orig_bio); bio_endio(dip->orig_bio);
} }
out: out:
@ -8577,7 +8479,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
goto err; goto err;
} }
map: map:
ret = btrfs_map_bio(fs_info, bio, 0, async_submit); ret = btrfs_map_bio(fs_info, bio, 0, 0);
err: err:
bio_put(bio); bio_put(bio);
return ret; return ret;
@ -8786,7 +8688,6 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
} }
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
struct kiocb *iocb,
const struct iov_iter *iter, loff_t offset) const struct iov_iter *iter, loff_t offset)
{ {
int seg; int seg;
@ -8833,7 +8734,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
bool relock = false; bool relock = false;
ssize_t ret; ssize_t ret;
if (check_direct_IO(fs_info, iocb, iter, offset)) if (check_direct_IO(fs_info, iter, offset))
return 0; return 0;
inode_dio_begin(inode); inode_dio_begin(inode);
@ -8868,7 +8769,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
offset, count); offset, count);
if (ret) if (ret)
goto out; goto out;
dio_data.outstanding_extents = count_max_extents(count);
/* /*
* We need to know how many extents we reserved so that we can * We need to know how many extents we reserved so that we can
@ -8915,6 +8815,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else if (ret >= 0 && (size_t)ret < count) } else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved, btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret); offset, count - (size_t)ret);
btrfs_delalloc_release_extents(BTRFS_I(inode), count);
} }
out: out:
if (wakeup) if (wakeup)
@ -9232,9 +9133,6 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
fs_info->sectorsize); fs_info->sectorsize);
if (reserved_space < PAGE_SIZE) { if (reserved_space < PAGE_SIZE) {
end = page_start + reserved_space - 1; end = page_start + reserved_space - 1;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved, btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE - reserved_space); page_start, PAGE_SIZE - reserved_space);
} }
@ -9286,12 +9184,14 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
out_unlock: out_unlock:
if (!ret) { if (!ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved); extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED; return VM_FAULT_LOCKED;
} }
unlock_page(page); unlock_page(page);
out: out:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved, page_start, btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space); reserved_space);
out_noreserve: out_noreserve:
@ -9387,12 +9287,12 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_truncate_inode_items(trans, root, inode, ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size, inode->i_size,
BTRFS_EXTENT_DATA_KEY); BTRFS_EXTENT_DATA_KEY);
trans->block_rsv = &fs_info->trans_block_rsv;
if (ret != -ENOSPC && ret != -EAGAIN) { if (ret != -ENOSPC && ret != -EAGAIN) {
err = ret; err = ret;
break; break;
} }
trans->block_rsv = &fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode); ret = btrfs_update_inode(trans, root, inode);
if (ret) { if (ret) {
err = ret; err = ret;
@ -9416,6 +9316,27 @@ static int btrfs_truncate(struct inode *inode)
trans->block_rsv = rsv; trans->block_rsv = rsv;
} }
/*
* We can't call btrfs_truncate_block inside a trans handle as we could
* deadlock with freeze, if we got NEED_TRUNCATE_BLOCK then we know
* we've truncated everything except the last little bit, and can do
* btrfs_truncate_block and then update the disk_i_size.
*/
if (ret == NEED_TRUNCATE_BLOCK) {
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
if (ret)
goto out;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
}
if (ret == 0 && inode->i_nlink > 0) { if (ret == 0 && inode->i_nlink > 0) {
trans->block_rsv = root->orphan_block_rsv; trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_orphan_del(trans, BTRFS_I(inode)); ret = btrfs_orphan_del(trans, BTRFS_I(inode));
@ -9480,6 +9401,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct inode *btrfs_alloc_inode(struct super_block *sb) struct inode *btrfs_alloc_inode(struct super_block *sb)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_inode *ei; struct btrfs_inode *ei;
struct inode *inode; struct inode *inode;
@ -9506,8 +9428,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->lock); spin_lock_init(&ei->lock);
ei->outstanding_extents = 0; ei->outstanding_extents = 0;
ei->reserved_extents = 0; if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
BTRFS_BLOCK_RSV_DELALLOC);
ei->runtime_flags = 0; ei->runtime_flags = 0;
ei->prop_compress = BTRFS_COMPRESS_NONE; ei->prop_compress = BTRFS_COMPRESS_NONE;
ei->defrag_compress = BTRFS_COMPRESS_NONE; ei->defrag_compress = BTRFS_COMPRESS_NONE;
@ -9557,8 +9480,9 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!hlist_empty(&inode->i_dentry)); WARN_ON(!hlist_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages); WARN_ON(inode->i_data.nrpages);
WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
WARN_ON(BTRFS_I(inode)->block_rsv.size);
WARN_ON(BTRFS_I(inode)->outstanding_extents); WARN_ON(BTRFS_I(inode)->outstanding_extents);
WARN_ON(BTRFS_I(inode)->reserved_extents);
WARN_ON(BTRFS_I(inode)->delalloc_bytes); WARN_ON(BTRFS_I(inode)->delalloc_bytes);
WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
WARN_ON(BTRFS_I(inode)->csum_bytes); WARN_ON(BTRFS_I(inode)->csum_bytes);
@ -10337,19 +10261,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
ret = __start_delalloc_inodes(root, delay_iput, -1); ret = __start_delalloc_inodes(root, delay_iput, -1);
if (ret > 0) if (ret > 0)
ret = 0; ret = 0;
/*
* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
*/
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0 &&
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
return ret; return ret;
} }
@ -10391,14 +10302,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
spin_unlock(&fs_info->delalloc_root_lock); spin_unlock(&fs_info->delalloc_root_lock);
ret = 0; ret = 0;
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0 &&
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
out: out:
if (!list_empty_careful(&splice)) { if (!list_empty_careful(&splice)) {
spin_lock(&fs_info->delalloc_root_lock); spin_lock(&fs_info->delalloc_root_lock);

View File

@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 {
struct btrfs_ioctl_received_subvol_args_32) struct btrfs_ioctl_received_subvol_args_32)
#endif #endif
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
compat_uptr_t clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
} __attribute__ ((__packed__));
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
#endif
static int btrfs_clone(struct inode *src, struct inode *inode, static int btrfs_clone(struct inode *src, struct inode *inode,
u64 off, u64 olen, u64 olen_aligned, u64 destoff, u64 off, u64 olen, u64 olen_aligned, u64 destoff,
@ -609,23 +622,6 @@ static noinline int create_subvol(struct inode *dir,
return ret; return ret;
} }
static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root)
{
s64 writers;
DEFINE_WAIT(wait);
do {
prepare_to_wait(&root->subv_writers->wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&root->subv_writers->counter);
if (writers)
schedule();
finish_wait(&root->subv_writers->wait, &wait);
} while (writers);
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir, static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, struct dentry *dentry,
u64 *async_transid, bool readonly, u64 *async_transid, bool readonly,
@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
atomic_inc(&root->will_be_snapshotted); atomic_inc(&root->will_be_snapshotted);
smp_mb__after_atomic(); smp_mb__after_atomic();
btrfs_wait_for_no_snapshotting_writes(root); /* wait for no snapshot writes */
wait_event(root->subv_writers->wait,
percpu_counter_sum(&root->subv_writers->counter) == 0);
ret = btrfs_start_delalloc_inodes(root, 0); ret = btrfs_start_delalloc_inodes(root, 0);
if (ret) if (ret)
@ -1219,6 +1217,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
unlock_page(pages[i]); unlock_page(pages[i]);
put_page(pages[i]); put_page(pages[i]);
} }
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved); extent_changeset_free(data_reserved);
return i_done; return i_done;
out: out:
@ -1229,6 +1228,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
btrfs_delalloc_release_space(inode, data_reserved, btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT, start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT); page_cnt << PAGE_SHIFT);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved); extent_changeset_free(data_reserved);
return ret; return ret;
@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
} }
if (do_compress) {
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
*/
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0 &&
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) { if (range->compress_type == BTRFS_COMPRESS_LZO) {
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
ret = btrfs_update_root(trans, fs_info->tree_root, ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item); &root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans);
goto out_reset;
}
ret = btrfs_commit_transaction(trans);
btrfs_commit_transaction(trans);
out_reset: out_reset:
if (ret) if (ret)
btrfs_set_root_flags(&root->root_item, root_flags); btrfs_set_root_flags(&root->root_item, root_flags);
@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
inode = file_inode(file); inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size, ret = search_ioctl(inode, &args.key, &buf_size,
(char *)(&uarg->buf[0])); (char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
ret = -EFAULT; ret = -EFAULT;
else if (ret == -EOVERFLOW && else if (ret == -EOVERFLOW &&
@ -3706,7 +3696,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
if (disko) { if (disko) {
inode_add_bytes(inode, datal); inode_add_bytes(inode, datal);
ret = btrfs_inc_extent_ref(trans, ret = btrfs_inc_extent_ref(trans,
fs_info, root,
disko, diskl, 0, disko, diskl, 0,
root->root_key.objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(inode)), btrfs_ino(BTRFS_I(inode)),
@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_space_info *dest_orig; struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info __user *user_dest; struct btrfs_ioctl_space_info __user *user_dest;
struct btrfs_space_info *info; struct btrfs_space_info *info;
u64 types[] = {BTRFS_BLOCK_GROUP_DATA, static const u64 types[] = {
BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
};
int num_types = 4; int num_types = 4;
int alloc_size; int alloc_size;
int ret = 0; int ret = 0;
@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr; ipath->fspath->val[i] = rel_ptr;
} }
ret = copy_to_user((void *)(unsigned long)ipa->fspath, ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
(void *)(unsigned long)ipath->fspath, size); ipath->fspath, size);
if (ret) { if (ret) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
} }
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
void __user *arg) void __user *arg, int version)
{ {
int ret = 0; int ret = 0;
int size; int size;
struct btrfs_ioctl_logical_ino_args *loi; struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL; struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL; struct btrfs_path *path = NULL;
bool ignore_offset;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
if (IS_ERR(loi)) if (IS_ERR(loi))
return PTR_ERR(loi); return PTR_ERR(loi);
if (version == 1) {
ignore_offset = false;
size = min_t(u32, loi->size, SZ_64K);
} else {
/* All reserved bits must be 0 for now */
if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
ret = -EINVAL;
goto out_loi;
}
/* Only accept flags we have defined so far */
if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
ret = -EINVAL;
goto out_loi;
}
ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
size = min_t(u32, loi->size, SZ_16M);
}
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) { if (!path) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
size = min_t(u32, loi->size, SZ_64K);
inodes = init_data_container(size); inodes = init_data_container(size);
if (IS_ERR(inodes)) { if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes); ret = PTR_ERR(inodes);
@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
} }
ret = iterate_inodes_from_logical(loi->logical, fs_info, path, ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes); build_ino_list, inodes, ignore_offset);
if (ret == -EINVAL) if (ret == -EINVAL)
ret = -ENOENT; ret = -ENOENT;
if (ret < 0) if (ret < 0)
goto out; goto out;
ret = copy_to_user((void *)(unsigned long)loi->inodes, ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
(void *)(unsigned long)inodes, size); size);
if (ret) if (ret)
ret = -EFAULT; ret = -EFAULT;
out: out:
btrfs_free_path(path); btrfs_free_path(path);
kvfree(inodes); kvfree(inodes);
out_loi:
kfree(loi); kfree(loi);
return ret; return ret;
@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
root->root_key.objectid); root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) { if (ret < 0 && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out; goto out;
} }
} }
ret = btrfs_commit_transaction(trans); ret = btrfs_commit_transaction(trans);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
out: out:
up_write(&fs_info->subvol_sem); up_write(&fs_info->subvol_sem);
mnt_drop_write_file(file); mnt_drop_write_file(file);
@ -5490,6 +5497,41 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
return ret; return ret;
} }
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 args32;
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
return -EFAULT;
arg = kzalloc(sizeof(*arg), GFP_KERNEL);
if (!arg)
return -ENOMEM;
arg->send_fd = args32.send_fd;
arg->clone_sources_count = args32.clone_sources_count;
arg->clone_sources = compat_ptr(args32.clone_sources);
arg->parent_root = args32.parent_root;
arg->flags = args32.flags;
memcpy(arg->reserved, args32.reserved,
sizeof(args32.reserved));
#else
return -ENOTTY;
#endif
} else {
arg = memdup_user(argp, sizeof(*arg));
if (IS_ERR(arg))
return PTR_ERR(arg);
}
ret = btrfs_ioctl_send(file, arg);
kfree(arg);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg) cmd, unsigned long arg)
{ {
@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_INO_PATHS: case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp); return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO: case BTRFS_IOC_LOGICAL_INO:
return btrfs_ioctl_logical_to_ino(fs_info, argp); return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
case BTRFS_IOC_LOGICAL_INO_V2:
return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
case BTRFS_IOC_SPACE_INFO: case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(fs_info, argp); return btrfs_ioctl_space_info(fs_info, argp);
case BTRFS_IOC_SYNC: { case BTRFS_IOC_SYNC: {
@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp); return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif #endif
case BTRFS_IOC_SEND: case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp); return _btrfs_ioctl_send(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(file, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS: case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp); return btrfs_ioctl_get_dev_stats(fs_info, argp);
case BTRFS_IOC_QUOTA_CTL: case BTRFS_IOC_QUOTA_CTL:

View File

@ -430,10 +430,15 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
return ret; return ret;
} }
static void lzo_set_level(struct list_head *ws, unsigned int type)
{
}
const struct btrfs_compress_op btrfs_lzo_compress = { const struct btrfs_compress_op btrfs_lzo_compress = {
.alloc_workspace = lzo_alloc_workspace, .alloc_workspace = lzo_alloc_workspace,
.free_workspace = lzo_free_workspace, .free_workspace = lzo_free_workspace,
.compress_pages = lzo_compress_pages, .compress_pages = lzo_compress_pages,
.decompress_bio = lzo_decompress_bio, .decompress_bio = lzo_decompress_bio,
.decompress = lzo_decompress, .decompress = lzo_decompress,
.set_level = lzo_set_level,
}; };

View File

@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
} }
spin_unlock(&root->ordered_extent_lock); spin_unlock(&root->ordered_extent_lock);
/*
* We don't need the count_max_extents here, we can assume that all of
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
return 0; return 0;
} }
@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_inode_tree *tree; struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
struct rb_node *node; struct rb_node *node;
bool dec_pending_ordered = false; bool dec_pending_ordered = false;
tree = &BTRFS_I(inode)->ordered_tree; /* This is paired with btrfs_add_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
node = &entry->rb_node; node = &entry->rb_node;
rb_erase(node, &tree->tree); rb_erase(node, &tree->tree);

View File

@ -1441,7 +1441,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
u64 bytenr = qrecord->bytenr; u64 bytenr = qrecord->bytenr;
int ret; int ret;
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -2031,7 +2031,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
/* Search commit root to find old_roots */ /* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info, ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0, record->bytenr, 0,
&record->old_roots); &record->old_roots, false);
if (ret < 0) if (ret < 0)
goto cleanup; goto cleanup;
} }
@ -2042,7 +2042,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
* root. It's safe inside commit_transaction(). * root. It's safe inside commit_transaction().
*/ */
ret = btrfs_find_all_roots(trans, fs_info, ret = btrfs_find_all_roots(trans, fs_info,
record->bytenr, SEQ_LAST, &new_roots); record->bytenr, SEQ_LAST, &new_roots, false);
if (ret < 0) if (ret < 0)
goto cleanup; goto cleanup;
if (qgroup_to_skip) { if (qgroup_to_skip) {
@ -2570,7 +2570,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
num_bytes = found.offset; num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
&roots); &roots, false);
if (ret < 0) if (ret < 0)
goto out; goto out;
/* For rescan, just pass old_roots as NULL */ /* For rescan, just pass old_roots as NULL */

View File

@ -1326,6 +1326,9 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
cleanup: cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR); rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
} }
/* /*
@ -1582,6 +1585,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
cleanup: cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR); rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return -EIO; return -EIO;
finish: finish:
@ -2107,6 +2114,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
if (rbio->operation == BTRFS_RBIO_READ_REBUILD || if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
rbio_orig_end_io(rbio, BLK_STS_IOERR); rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return -EIO; return -EIO;
} }
@ -2231,12 +2242,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
ASSERT(!bio->bi_iter.bi_size); ASSERT(!bio->bi_iter.bi_size);
rbio->operation = BTRFS_RBIO_PARITY_SCRUB; rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
for (i = 0; i < rbio->real_stripes; i++) { /*
* After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
* to the end position, so this search can start from the first parity
* stripe.
*/
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
if (bbio->stripes[i].dev == scrub_dev) { if (bbio->stripes[i].dev == scrub_dev) {
rbio->scrubp = i; rbio->scrubp = i;
break; break;
} }
} }
ASSERT(i < rbio->real_stripes);
/* Now we just support the sectorsize equals to page size */ /* Now we just support the sectorsize equals to page size */
ASSERT(fs_info->sectorsize == PAGE_SIZE); ASSERT(fs_info->sectorsize == PAGE_SIZE);
@ -2454,6 +2471,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
cleanup: cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR); rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
} }
static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@ -2563,12 +2583,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
int stripe; int stripe;
struct bio *bio; struct bio *bio;
bio_list_init(&bio_list);
ret = alloc_rbio_essential_pages(rbio); ret = alloc_rbio_essential_pages(rbio);
if (ret) if (ret)
goto cleanup; goto cleanup;
bio_list_init(&bio_list);
atomic_set(&rbio->error, 0); atomic_set(&rbio->error, 0);
/* /*
* build a list of bios to read all the missing parts of this * build a list of bios to read all the missing parts of this
@ -2636,6 +2656,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
cleanup: cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR); rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return; return;
finish: finish:

1031
fs/btrfs/ref-verify.c Normal file

File diff suppressed because it is too large Load Diff

62
fs/btrfs/ref-verify.h Normal file
View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2014 Facebook. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __REF_VERIFY__
#define __REF_VERIFY__
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root, u64 owner, u64 offset,
int action);
void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
u64 len);
static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
{
spin_lock_init(&fs_info->ref_verify_lock);
fs_info->block_tree = RB_ROOT;
}
#else
static inline int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{
return 0;
}
static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
{
}
static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root,
u64 owner, u64 offset, int action)
{
return 0;
}
static inline void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info,
u64 start, u64 len)
{
}
static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
{
}
#endif /* CONFIG_BTRFS_FS_REF_VERIFY */
#endif /* _REF_VERIFY__ */

View File

@ -1742,7 +1742,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
dirty = 1; dirty = 1;
key.offset -= btrfs_file_extent_offset(leaf, fi); key.offset -= btrfs_file_extent_offset(leaf, fi);
ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr, ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent, num_bytes, parent,
btrfs_header_owner(leaf), btrfs_header_owner(leaf),
key.objectid, key.offset); key.objectid, key.offset);
@ -1751,7 +1751,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
break; break;
} }
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf), parent, btrfs_header_owner(leaf),
key.objectid, key.offset); key.objectid, key.offset);
if (ret) { if (ret) {
@ -1952,21 +1952,21 @@ int replace_path(struct btrfs_trans_handle *trans,
path->slots[level], old_ptr_gen); path->slots[level], old_ptr_gen);
btrfs_mark_buffer_dirty(path->nodes[level]); btrfs_mark_buffer_dirty(path->nodes[level]);
ret = btrfs_inc_extent_ref(trans, fs_info, old_bytenr, ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
blocksize, path->nodes[level]->start, blocksize, path->nodes[level]->start,
src->root_key.objectid, level - 1, 0); src->root_key.objectid, level - 1, 0);
BUG_ON(ret); BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr, ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
blocksize, 0, dest->root_key.objectid, blocksize, 0, dest->root_key.objectid,
level - 1, 0); level - 1, 0);
BUG_ON(ret); BUG_ON(ret);
ret = btrfs_free_extent(trans, fs_info, new_bytenr, blocksize, ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start, path->nodes[level]->start,
src->root_key.objectid, level - 1, 0); src->root_key.objectid, level - 1, 0);
BUG_ON(ret); BUG_ON(ret);
ret = btrfs_free_extent(trans, fs_info, old_bytenr, blocksize, ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1, 0, dest->root_key.objectid, level - 1,
0); 0);
BUG_ON(ret); BUG_ON(ret);
@ -2808,7 +2808,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
trans->transid); trans->transid);
btrfs_mark_buffer_dirty(upper->eb); btrfs_mark_buffer_dirty(upper->eb);
ret = btrfs_inc_extent_ref(trans, root->fs_info, ret = btrfs_inc_extent_ref(trans, root,
node->eb->start, blocksize, node->eb->start, blocksize,
upper->eb->start, upper->eb->start,
btrfs_header_owner(upper->eb), btrfs_header_owner(upper->eb),
@ -3246,6 +3246,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page); put_page(page);
btrfs_delalloc_release_metadata(BTRFS_I(inode), btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE); PAGE_SIZE);
btrfs_delalloc_release_extents(BTRFS_I(inode),
PAGE_SIZE);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -3275,6 +3277,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page); put_page(page);
index++; index++;
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
balance_dirty_pages_ratelimited(inode->i_mapping); balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info); btrfs_throttle(fs_info);
} }

View File

@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *root; struct btrfs_root *root;
int err = 0; int err = 0;
int ret; int ret;
bool can_recover = true;
if (sb_rdonly(fs_info->sb))
can_recover = false;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)

View File

@ -231,7 +231,7 @@ struct scrub_warning {
struct btrfs_path *path; struct btrfs_path *path;
u64 extent_item_size; u64 extent_item_size;
const char *errstr; const char *errstr;
sector_t sector; u64 physical;
u64 logical; u64 logical;
struct btrfs_device *dev; struct btrfs_device *dev;
}; };
@ -797,10 +797,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
*/ */
for (i = 0; i < ipath->fspath->elem_cnt; ++i) for (i = 0; i < ipath->fspath->elem_cnt; ++i)
btrfs_warn_in_rcu(fs_info, btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)", "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
swarn->errstr, swarn->logical, swarn->errstr, swarn->logical,
rcu_str_deref(swarn->dev->name), rcu_str_deref(swarn->dev->name),
(unsigned long long)swarn->sector, swarn->physical,
root, inum, offset, root, inum, offset,
min(isize - offset, (u64)PAGE_SIZE), nlink, min(isize - offset, (u64)PAGE_SIZE), nlink,
(char *)(unsigned long)ipath->fspath->val[i]); (char *)(unsigned long)ipath->fspath->val[i]);
@ -810,10 +810,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
err: err:
btrfs_warn_in_rcu(fs_info, btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical, swarn->errstr, swarn->logical,
rcu_str_deref(swarn->dev->name), rcu_str_deref(swarn->dev->name),
(unsigned long long)swarn->sector, swarn->physical,
root, inum, offset, ret); root, inum, offset, ret);
free_ipath(ipath); free_ipath(ipath);
@ -845,7 +845,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
if (!path) if (!path)
return; return;
swarn.sector = (sblock->pagev[0]->physical) >> 9; swarn.physical = sblock->pagev[0]->physical;
swarn.logical = sblock->pagev[0]->logical; swarn.logical = sblock->pagev[0]->logical;
swarn.errstr = errstr; swarn.errstr = errstr;
swarn.dev = NULL; swarn.dev = NULL;
@ -868,10 +868,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
item_size, &ref_root, item_size, &ref_root,
&ref_level); &ref_level);
btrfs_warn_in_rcu(fs_info, btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu", "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical, errstr, swarn.logical,
rcu_str_deref(dev->name), rcu_str_deref(dev->name),
(unsigned long long)swarn.sector, swarn.physical,
ref_level ? "node" : "leaf", ref_level ? "node" : "leaf",
ret < 0 ? -1 : ref_level, ret < 0 ? -1 : ref_level,
ret < 0 ? -1 : ref_root); ret < 0 ? -1 : ref_root);
@ -883,7 +883,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
swarn.dev = dev; swarn.dev = dev;
iterate_extent_inodes(fs_info, found_key.objectid, iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, 1, extent_item_pos, 1,
scrub_print_warning_inode, &swarn); scrub_print_warning_inode, &swarn, false);
} }
out: out:
@ -1047,7 +1047,7 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
* can be found. * can be found.
*/ */
ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
scrub_fixup_readpage, fixup); scrub_fixup_readpage, fixup, false);
if (ret < 0) { if (ret < 0) {
uncorrectable = 1; uncorrectable = 1;
goto out; goto out;
@ -4390,7 +4390,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
} }
ret = iterate_inodes_from_logical(logical, fs_info, path, ret = iterate_inodes_from_logical(logical, fs_info, path,
record_inode_for_nocow, nocow_ctx); record_inode_for_nocow, nocow_ctx, false);
if (ret != 0 && ret != -ENOENT) { if (ret != 0 && ret != -ENOENT) {
btrfs_warn(fs_info, btrfs_warn(fs_info,
"iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",

View File

@ -26,6 +26,7 @@
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/compat.h>
#include "send.h" #include "send.h"
#include "backref.h" #include "backref.h"
@ -992,7 +993,6 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
* path must point to the dir item when called. * path must point to the dir item when called.
*/ */
static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *found_key,
iterate_dir_item_t iterate, void *ctx) iterate_dir_item_t iterate, void *ctx)
{ {
int ret = 0; int ret = 0;
@ -1271,12 +1271,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
*/ */
if (ino >= bctx->cur_objectid) if (ino >= bctx->cur_objectid)
return 0; return 0;
#if 0
if (ino > bctx->cur_objectid)
return 0;
if (offset + bctx->extent_len > bctx->cur_offset)
return 0;
#endif
} }
bctx->found++; bctx->found++;
@ -1429,7 +1423,7 @@ static int find_extent_clone(struct send_ctx *sctx,
extent_item_pos = 0; extent_item_pos = 0;
ret = iterate_extent_inodes(fs_info, found_key.objectid, ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, 1, __iterate_backrefs, extent_item_pos, 1, __iterate_backrefs,
backref_ctx); backref_ctx, false);
if (ret < 0) if (ret < 0)
goto out; goto out;
@ -4106,8 +4100,8 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
return ret; return ret;
} }
static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
struct fs_path *name, void *ctx, struct list_head *refs) void *ctx, struct list_head *refs)
{ {
int ret = 0; int ret = 0;
struct send_ctx *sctx = ctx; struct send_ctx *sctx = ctx;
@ -4143,8 +4137,7 @@ static int __record_new_ref(int num, u64 dir, int index,
void *ctx) void *ctx)
{ {
struct send_ctx *sctx = ctx; struct send_ctx *sctx = ctx;
return record_ref(sctx->send_root, num, dir, index, name, return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
ctx, &sctx->new_refs);
} }
@ -4153,8 +4146,8 @@ static int __record_deleted_ref(int num, u64 dir, int index,
void *ctx) void *ctx)
{ {
struct send_ctx *sctx = ctx; struct send_ctx *sctx = ctx;
return record_ref(sctx->parent_root, num, dir, index, name, return record_ref(sctx->parent_root, dir, name, ctx,
ctx, &sctx->deleted_refs); &sctx->deleted_refs);
} }
static int record_new_ref(struct send_ctx *sctx) static int record_new_ref(struct send_ctx *sctx)
@ -4498,7 +4491,7 @@ static int process_new_xattr(struct send_ctx *sctx)
int ret = 0; int ret = 0;
ret = iterate_dir_item(sctx->send_root, sctx->left_path, ret = iterate_dir_item(sctx->send_root, sctx->left_path,
sctx->cmp_key, __process_new_xattr, sctx); __process_new_xattr, sctx);
return ret; return ret;
} }
@ -4506,7 +4499,7 @@ static int process_new_xattr(struct send_ctx *sctx)
static int process_deleted_xattr(struct send_ctx *sctx) static int process_deleted_xattr(struct send_ctx *sctx)
{ {
return iterate_dir_item(sctx->parent_root, sctx->right_path, return iterate_dir_item(sctx->parent_root, sctx->right_path,
sctx->cmp_key, __process_deleted_xattr, sctx); __process_deleted_xattr, sctx);
} }
struct find_xattr_ctx { struct find_xattr_ctx {
@ -4551,7 +4544,7 @@ static int find_xattr(struct btrfs_root *root,
ctx.found_data = NULL; ctx.found_data = NULL;
ctx.found_data_len = 0; ctx.found_data_len = 0;
ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); ret = iterate_dir_item(root, path, __find_xattr, &ctx);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -4621,11 +4614,11 @@ static int process_changed_xattr(struct send_ctx *sctx)
int ret = 0; int ret = 0;
ret = iterate_dir_item(sctx->send_root, sctx->left_path, ret = iterate_dir_item(sctx->send_root, sctx->left_path,
sctx->cmp_key, __process_changed_new_xattr, sctx); __process_changed_new_xattr, sctx);
if (ret < 0) if (ret < 0)
goto out; goto out;
ret = iterate_dir_item(sctx->parent_root, sctx->right_path, ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
sctx->cmp_key, __process_changed_deleted_xattr, sctx); __process_changed_deleted_xattr, sctx);
out: out:
return ret; return ret;
@ -4675,8 +4668,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
goto out; goto out;
} }
ret = iterate_dir_item(root, path, &found_key, ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
__process_new_xattr, sctx);
if (ret < 0) if (ret < 0)
goto out; goto out;
@ -4723,16 +4715,27 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
/* initial readahead */ /* initial readahead */
memset(&sctx->ra, 0, sizeof(struct file_ra_state)); memset(&sctx->ra, 0, sizeof(struct file_ra_state));
file_ra_state_init(&sctx->ra, inode->i_mapping); file_ra_state_init(&sctx->ra, inode->i_mapping);
page_cache_sync_readahead(inode->i_mapping, &sctx->ra, NULL, index,
last_index - index + 1);
while (index <= last_index) { while (index <= last_index) {
unsigned cur_len = min_t(unsigned, len, unsigned cur_len = min_t(unsigned, len,
PAGE_SIZE - pg_offset); PAGE_SIZE - pg_offset);
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
page = find_lock_page(inode->i_mapping, index);
if (!page) { if (!page) {
ret = -ENOMEM; page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
break; NULL, index, last_index + 1 - index);
page = find_or_create_page(inode->i_mapping, index,
GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
break;
}
}
if (PageReadahead(page)) {
page_cache_async_readahead(inode->i_mapping, &sctx->ra,
NULL, page, index, last_index + 1 - index);
} }
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
@ -6162,9 +6165,7 @@ static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
* Updates compare related fields in sctx and simply forwards to the actual * Updates compare related fields in sctx and simply forwards to the actual
* changed_xxx functions. * changed_xxx functions.
*/ */
static int changed_cb(struct btrfs_root *left_root, static int changed_cb(struct btrfs_path *left_path,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
struct btrfs_path *right_path, struct btrfs_path *right_path,
struct btrfs_key *key, struct btrfs_key *key,
enum btrfs_compare_tree_result result, enum btrfs_compare_tree_result result,
@ -6246,8 +6247,8 @@ static int full_send_tree(struct send_ctx *sctx)
slot = path->slots[0]; slot = path->slots[0];
btrfs_item_key_to_cpu(eb, &found_key, slot); btrfs_item_key_to_cpu(eb, &found_key, slot);
ret = changed_cb(send_root, NULL, path, NULL, ret = changed_cb(path, NULL, &found_key,
&found_key, BTRFS_COMPARE_TREE_NEW, sctx); BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0) if (ret < 0)
goto out; goto out;
@ -6365,13 +6366,12 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
spin_unlock(&root->root_item_lock); spin_unlock(&root->root_item_lock);
} }
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
{ {
int ret = 0; int ret = 0;
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
struct btrfs_fs_info *fs_info = send_root->fs_info; struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root; struct btrfs_root *clone_root;
struct btrfs_ioctl_send_args *arg = NULL;
struct btrfs_key key; struct btrfs_key key;
struct send_ctx *sctx = NULL; struct send_ctx *sctx = NULL;
u32 i; u32 i;
@ -6407,13 +6407,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out; goto out;
} }
arg = memdup_user(arg_, sizeof(*arg));
if (IS_ERR(arg)) {
ret = PTR_ERR(arg);
arg = NULL;
goto out;
}
/* /*
* Check that we don't overflow at later allocations, we request * Check that we don't overflow at later allocations, we request
* clone_sources_count + 1 items, and compare to unsigned long inside * clone_sources_count + 1 items, and compare to unsigned long inside
@ -6654,7 +6647,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
btrfs_root_dec_send_in_progress(sctx->parent_root); btrfs_root_dec_send_in_progress(sctx->parent_root);
kfree(arg);
kvfree(clone_sources_tmp); kvfree(clone_sources_tmp);
if (sctx) { if (sctx) {

View File

@ -130,5 +130,5 @@ enum {
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__ #ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
#endif #endif

View File

@ -202,7 +202,6 @@ static struct ratelimit_state printk_limits[] = {
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{ {
struct super_block *sb = fs_info->sb;
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
struct va_format vaf; struct va_format vaf;
va_list args; va_list args;
@ -228,7 +227,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
vaf.va = &args; vaf.va = &args;
if (__ratelimit(ratelimit)) if (__ratelimit(ratelimit))
printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf); printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
va_end(args); va_end(args);
} }
@ -292,7 +292,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
vaf.va = &args; vaf.va = &args;
errstr = btrfs_decode_error(errno); errstr = btrfs_decode_error(errno);
if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)) if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
s_id, function, line, &vaf, errno, errstr); s_id, function, line, &vaf, errno, errstr);
@ -325,6 +325,9 @@ enum {
Opt_nologreplay, Opt_norecovery, Opt_nologreplay, Opt_norecovery,
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
Opt_ref_verify,
#endif #endif
Opt_err, Opt_err,
}; };
@ -386,6 +389,9 @@ static const match_table_t tokens = {
{Opt_fragment_data, "fragment=data"}, {Opt_fragment_data, "fragment=data"},
{Opt_fragment_metadata, "fragment=metadata"}, {Opt_fragment_metadata, "fragment=metadata"},
{Opt_fragment_all, "fragment=all"}, {Opt_fragment_all, "fragment=all"},
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
{Opt_ref_verify, "ref_verify"},
#endif #endif
{Opt_err, NULL}, {Opt_err, NULL},
}; };
@ -502,6 +508,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
strncmp(args[0].from, "zlib", 4) == 0) { strncmp(args[0].from, "zlib", 4) == 0) {
compress_type = "zlib"; compress_type = "zlib";
info->compress_type = BTRFS_COMPRESS_ZLIB; info->compress_type = BTRFS_COMPRESS_ZLIB;
info->compress_level =
btrfs_compress_str2level(args[0].from);
btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_clear_opt(info->mount_opt, NODATASUM);
@ -549,9 +557,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
compress_force != saved_compress_force)) || compress_force != saved_compress_force)) ||
(!btrfs_test_opt(info, COMPRESS) && (!btrfs_test_opt(info, COMPRESS) &&
no_compress == 1)) { no_compress == 1)) {
btrfs_info(info, "%s %s compression", btrfs_info(info, "%s %s compression, level %d",
(compress_force) ? "force" : "use", (compress_force) ? "force" : "use",
compress_type); compress_type, info->compress_level);
} }
compress_force = false; compress_force = false;
break; break;
@ -824,6 +832,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_info(info, "fragmenting data"); btrfs_info(info, "fragmenting data");
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
break; break;
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
case Opt_ref_verify:
btrfs_info(info, "doing ref verification");
btrfs_set_opt(info->mount_opt, REF_VERIFY);
break;
#endif #endif
case Opt_err: case Opt_err:
btrfs_info(info, "unrecognized mount option '%s'", p); btrfs_info(info, "unrecognized mount option '%s'", p);
@ -1205,8 +1219,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
* happens. The pending operations are delayed to the * happens. The pending operations are delayed to the
* next commit after thawing. * next commit after thawing.
*/ */
if (__sb_start_write(sb, SB_FREEZE_WRITE, false)) if (sb_start_write_trylock(sb))
__sb_end_write(sb, SB_FREEZE_WRITE); sb_end_write(sb);
else else
return 0; return 0;
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
@ -1246,6 +1260,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_printf(seq, ",compress-force=%s", compress_type); seq_printf(seq, ",compress-force=%s", compress_type);
else else
seq_printf(seq, ",compress=%s", compress_type); seq_printf(seq, ",compress=%s", compress_type);
if (info->compress_level)
seq_printf(seq, ":%d", info->compress_level);
} }
if (btrfs_test_opt(info, NOSSD)) if (btrfs_test_opt(info, NOSSD))
seq_puts(seq, ",nossd"); seq_puts(seq, ",nossd");
@ -1305,6 +1321,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (btrfs_test_opt(info, FRAGMENT_METADATA)) if (btrfs_test_opt(info, FRAGMENT_METADATA))
seq_puts(seq, ",fragment=metadata"); seq_puts(seq, ",fragment=metadata");
#endif #endif
if (btrfs_test_opt(info, REF_VERIFY))
seq_puts(seq, ",ref_verify");
seq_printf(seq, ",subvolid=%llu", seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid); BTRFS_I(d_inode(dentry))->root->root_key.objectid);
seq_puts(seq, ",subvol="); seq_puts(seq, ",subvol=");
@ -2112,7 +2130,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* succeed even if the Avail is zero. But this is better than the other * succeed even if the Avail is zero. But this is better than the other
* way around. * way around.
*/ */
thresh = 4 * 1024 * 1024; thresh = SZ_4M;
if (!mixed && total_free_meta - thresh < block_rsv->size) if (!mixed && total_free_meta - thresh < block_rsv->size)
buf->f_bavail = 0; buf->f_bavail = 0;
@ -2318,6 +2336,9 @@ static void btrfs_print_mod_info(void)
#endif #endif
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on" ", integrity-checker=on"
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
", ref-verify=on"
#endif #endif
"\n", "\n",
btrfs_crc32c_impl()); btrfs_crc32c_impl());

View File

@ -247,7 +247,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj,
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
} }
BTRFS_ATTR(global_rsv_size, global_rsv_size_show); BTRFS_ATTR(allocation, global_rsv_size, global_rsv_size_show);
static ssize_t global_rsv_reserved_show(struct kobject *kobj, static ssize_t global_rsv_reserved_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf) struct kobj_attribute *a, char *buf)
@ -256,15 +256,15 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj,
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
} }
BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show); BTRFS_ATTR(allocation, global_rsv_reserved, global_rsv_reserved_show);
#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
#define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
static ssize_t raid_bytes_show(struct kobject *kobj, static ssize_t raid_bytes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf); struct kobj_attribute *attr, char *buf);
BTRFS_RAID_ATTR(total_bytes, raid_bytes_show); BTRFS_ATTR(raid, total_bytes, raid_bytes_show);
BTRFS_RAID_ATTR(used_bytes, raid_bytes_show); BTRFS_ATTR(raid, used_bytes, raid_bytes_show);
static ssize_t raid_bytes_show(struct kobject *kobj, static ssize_t raid_bytes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
@ -277,7 +277,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
down_read(&sinfo->groups_sem); down_read(&sinfo->groups_sem);
list_for_each_entry(block_group, &sinfo->block_groups[index], list) { list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes)) if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
val += block_group->key.offset; val += block_group->key.offset;
else else
val += btrfs_block_group_used(&block_group->item); val += btrfs_block_group_used(&block_group->item);
@ -287,8 +287,8 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
} }
static struct attribute *raid_attributes[] = { static struct attribute *raid_attributes[] = {
BTRFS_RAID_ATTR_PTR(total_bytes), BTRFS_ATTR_PTR(raid, total_bytes),
BTRFS_RAID_ATTR_PTR(used_bytes), BTRFS_ATTR_PTR(raid, used_bytes),
NULL NULL
}; };
@ -311,7 +311,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \
struct btrfs_space_info *sinfo = to_space_info(kobj); \ struct btrfs_space_info *sinfo = to_space_info(kobj); \
return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \
} \ } \
BTRFS_ATTR(field, btrfs_space_info_show_##field) BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
struct kobj_attribute *a, struct kobj_attribute *a,
@ -331,19 +331,20 @@ SPACE_INFO_ATTR(bytes_may_use);
SPACE_INFO_ATTR(bytes_readonly); SPACE_INFO_ATTR(bytes_readonly);
SPACE_INFO_ATTR(disk_used); SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total); SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned); BTRFS_ATTR(space_info, total_bytes_pinned,
btrfs_space_info_show_total_bytes_pinned);
static struct attribute *space_info_attrs[] = { static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(flags), BTRFS_ATTR_PTR(space_info, flags),
BTRFS_ATTR_PTR(total_bytes), BTRFS_ATTR_PTR(space_info, total_bytes),
BTRFS_ATTR_PTR(bytes_used), BTRFS_ATTR_PTR(space_info, bytes_used),
BTRFS_ATTR_PTR(bytes_pinned), BTRFS_ATTR_PTR(space_info, bytes_pinned),
BTRFS_ATTR_PTR(bytes_reserved), BTRFS_ATTR_PTR(space_info, bytes_reserved),
BTRFS_ATTR_PTR(bytes_may_use), BTRFS_ATTR_PTR(space_info, bytes_may_use),
BTRFS_ATTR_PTR(bytes_readonly), BTRFS_ATTR_PTR(space_info, bytes_readonly),
BTRFS_ATTR_PTR(disk_used), BTRFS_ATTR_PTR(space_info, disk_used),
BTRFS_ATTR_PTR(disk_total), BTRFS_ATTR_PTR(space_info, disk_total),
BTRFS_ATTR_PTR(total_bytes_pinned), BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
NULL, NULL,
}; };
@ -361,8 +362,8 @@ struct kobj_type space_info_ktype = {
}; };
static const struct attribute *allocation_attrs[] = { static const struct attribute *allocation_attrs[] = {
BTRFS_ATTR_PTR(global_rsv_reserved), BTRFS_ATTR_PTR(allocation, global_rsv_reserved),
BTRFS_ATTR_PTR(global_rsv_size), BTRFS_ATTR_PTR(allocation, global_rsv_size),
NULL, NULL,
}; };
@ -415,7 +416,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
return len; return len;
} }
BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); BTRFS_ATTR_RW(, label, btrfs_label_show, btrfs_label_store);
static ssize_t btrfs_nodesize_show(struct kobject *kobj, static ssize_t btrfs_nodesize_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf) struct kobj_attribute *a, char *buf)
@ -425,7 +426,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
} }
BTRFS_ATTR(nodesize, btrfs_nodesize_show); BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
static ssize_t btrfs_sectorsize_show(struct kobject *kobj, static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf) struct kobj_attribute *a, char *buf)
@ -436,7 +437,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
fs_info->super_copy->sectorsize); fs_info->super_copy->sectorsize);
} }
BTRFS_ATTR(sectorsize, btrfs_sectorsize_show); BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf) struct kobj_attribute *a, char *buf)
@ -447,7 +448,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
fs_info->super_copy->sectorsize); fs_info->super_copy->sectorsize);
} }
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
static ssize_t quota_override_show(struct kobject *kobj, static ssize_t quota_override_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf) struct kobj_attribute *a, char *buf)
@ -487,14 +488,14 @@ static ssize_t quota_override_store(struct kobject *kobj,
return len; return len;
} }
BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store); BTRFS_ATTR_RW(, quota_override, quota_override_show, quota_override_store);
static const struct attribute *btrfs_attrs[] = { static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(label), BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(nodesize), BTRFS_ATTR_PTR(, nodesize),
BTRFS_ATTR_PTR(sectorsize), BTRFS_ATTR_PTR(, sectorsize),
BTRFS_ATTR_PTR(clone_alignment), BTRFS_ATTR_PTR(, clone_alignment),
BTRFS_ATTR_PTR(quota_override), BTRFS_ATTR_PTR(, quota_override),
NULL, NULL,
}; };

View File

@ -21,21 +21,16 @@ enum btrfs_feature_set {
.store = _store, \ .store = _store, \
} }
#define BTRFS_ATTR_RW(_name, _show, _store) \ #define BTRFS_ATTR_RW(_prefix, _name, _show, _store) \
static struct kobj_attribute btrfs_attr_##_name = \ static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \
__INIT_KOBJ_ATTR(_name, 0644, _show, _store) __INIT_KOBJ_ATTR(_name, 0644, _show, _store)
#define BTRFS_ATTR(_name, _show) \ #define BTRFS_ATTR(_prefix, _name, _show) \
static struct kobj_attribute btrfs_attr_##_name = \ static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL) __INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
#define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) #define BTRFS_ATTR_PTR(_prefix, _name) \
(&btrfs_attr_##_prefix##_##_name.attr)
#define BTRFS_RAID_ATTR(_name, _show) \
static struct kobj_attribute btrfs_raid_attr_##_name = \
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
#define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr)
struct btrfs_feature_attr { struct btrfs_feature_attr {
@ -44,15 +39,16 @@ struct btrfs_feature_attr {
u64 feature_bit; u64 feature_bit;
}; };
#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \ #define BTRFS_FEAT_ATTR(_name, _feature_set, _feature_prefix, _feature_bit) \
static struct btrfs_feature_attr btrfs_attr_##_name = { \ static struct btrfs_feature_attr btrfs_attr_features_##_name = { \
.kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \ .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \
btrfs_feature_attr_show, \ btrfs_feature_attr_show, \
btrfs_feature_attr_store), \ btrfs_feature_attr_store), \
.feature_set = _feature_set, \ .feature_set = _feature_set, \
.feature_bit = _prefix ##_## _feature_bit, \ .feature_bit = _feature_prefix ##_## _feature_bit, \
} }
#define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr) #define BTRFS_FEAT_ATTR_PTR(_name) \
(&btrfs_attr_features_##_name.kobj_attr.attr)
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)

View File

@ -500,7 +500,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) { if (!path) {
test_msg("Couldn't allocate path\n"); test_msg("Couldn't allocate path\n");
return -ENOMEM; ret = -ENOMEM;
goto out;
} }
ret = add_block_group_free_space(&trans, root->fs_info, cache); ret = add_block_group_free_space(&trans, root->fs_info, cache);

View File

@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len; offset = em->start + em->len;
free_extent_map(em); free_extent_map(em);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0); em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
if (IS_ERR(em)) { if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n"); test_msg("Got an error when we shouldn't have\n");
goto out; goto out;
@ -968,7 +968,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
btrfs_test_inode_set_ops(inode); btrfs_test_inode_set_ops(inode);
/* [BTRFS_MAX_EXTENT_SIZE] */ /* [BTRFS_MAX_EXTENT_SIZE] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
NULL, 0); NULL, 0);
if (ret) { if (ret) {
@ -983,7 +982,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
} }
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
NULL, 0); NULL, 0);
@ -1003,7 +1001,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE >> 1, BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
@ -1017,7 +1015,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
} }
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) (BTRFS_MAX_EXTENT_SIZE >> 1)
+ sectorsize - 1, + sectorsize - 1,
@ -1035,12 +1032,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* /*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*
* I'm artificially adding 2 to outstanding_extents because in the
* buffered IO case we'd add things up as we go, but I don't feel like
* doing that here, this isn't the interesting case we want to test.
*/ */
BTRFS_I(inode)->outstanding_extents += 2;
ret = btrfs_set_extent_delalloc(inode, ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
@ -1059,7 +1051,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* /*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*/ */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
@ -1079,7 +1070,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
@ -1096,7 +1087,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
* Refill the hole again just for good measure, because I thought it * Refill the hole again just for good measure, because I thought it
* might fail and I'd rather satisfy my paranoia at this point. * might fail and I'd rather satisfy my paranoia at this point.
*/ */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
@ -1114,7 +1104,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */ /* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
@ -1131,7 +1121,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
if (ret) if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL); NULL, GFP_KERNEL);
iput(inode); iput(inode);
btrfs_free_dummy_root(root); btrfs_free_dummy_root(root);

View File

@ -240,7 +240,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
* we can only call btrfs_qgroup_account_extent() directly to test * we can only call btrfs_qgroup_account_extent() directly to test
* quota. * quota.
*/ */
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret); test_msg("Couldn't find old roots: %d\n", ret);
@ -252,7 +253,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) if (ret)
return ret; return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
ulist_free(new_roots); ulist_free(new_roots);
@ -275,7 +277,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
old_roots = NULL; old_roots = NULL;
new_roots = NULL; new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret); test_msg("Couldn't find old roots: %d\n", ret);
@ -286,7 +289,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) if (ret)
return -EINVAL; return -EINVAL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
ulist_free(new_roots); ulist_free(new_roots);
@ -337,7 +341,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret; return ret;
} }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret); test_msg("Couldn't find old roots: %d\n", ret);
@ -349,7 +354,8 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) if (ret)
return ret; return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
ulist_free(new_roots); ulist_free(new_roots);
@ -370,7 +376,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL; return -EINVAL;
} }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret); test_msg("Couldn't find old roots: %d\n", ret);
@ -382,7 +389,8 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) if (ret)
return ret; return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
ulist_free(new_roots); ulist_free(new_roots);
@ -409,7 +417,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL; return -EINVAL;
} }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret); test_msg("Couldn't find old roots: %d\n", ret);
@ -421,7 +430,8 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) if (ret)
return ret; return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) { if (ret) {
ulist_free(old_roots); ulist_free(old_roots);
ulist_free(new_roots); ulist_free(new_roots);

View File

@ -797,8 +797,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
if (fs_info->global_block_rsv.space_info->full && if (btrfs_check_space_for_delayed_refs(trans, fs_info))
btrfs_check_space_for_delayed_refs(trans, fs_info))
return 1; return 1;
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
@ -950,6 +949,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0; u64 start = 0;
u64 end; u64 end;
atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers);
while (!find_first_extent_bit(dirty_pages, start, &start, &end, while (!find_first_extent_bit(dirty_pages, start, &start, &end,
mark, &cached_state)) { mark, &cached_state)) {
bool wait_writeback = false; bool wait_writeback = false;
@ -985,6 +985,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
cond_resched(); cond_resched();
start = end + 1; start = end + 1;
} }
atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers);
return werr; return werr;
} }
@ -1915,8 +1916,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{ {
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
* Currently are holding the fs freeze lock, if we do an async flush
* we'll do btrfs_join_transaction() and deadlock because we need to
* wait for the fs freeze lock. Using the direct flushing we benefit
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
return btrfs_start_delalloc_roots(fs_info, 1, -1); writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
return 0; return 0;
} }

425
fs/btrfs/tree-checker.c Normal file
View File

@ -0,0 +1,425 @@
/*
* Copyright (C) Qu Wenruo 2017. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program.
*/
/*
* The module is used to catch unexpected/corrupted tree block data.
* Such behavior can be caused either by a fuzzed image or bugs.
*
* The objective is to do leaf/node validation checks when tree block is read
* from disk, and check *every* possible member, so other code won't
* need to checking them again.
*
* Due to the potential and unwanted damage, every checker needs to be
* carefully reviewed otherwise so it does not prevent mount of valid images.
*/
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
/*
* Error message should follow the following format:
* corrupt <type>: <identifier>, <reason>[, <bad_value>]
*
* @type: leaf or node
* @identifier: the necessary info to locate the leaf/node.
* It's recommened to decode key.objecitd/offset if it's
* meaningful.
* @reason: describe the error
* @bad_value: optional, it's recommened to output bad value and its
* expected value (range).
*
* Since comma is used to separate the components, only space is allowed
* inside each component.
*/
/*
* Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
* Allows callers to customize the output.
*/
__printf(4, 5)
static void generic_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
va_end(args);
}
/*
* Customized reporter for extent data item, since its key objectid and
* offset has its own meaning.
*/
__printf(4, 5)
static void file_extent_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct btrfs_key key;
struct va_format vaf;
va_list args;
btrfs_item_key_to_cpu(eb, &key, slot);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
va_end(args);
}
/*
* Return 0 if the btrfs_file_extent_##name is aligned to @alignment
* Else return 1
*/
#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment) \
({ \
if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
file_extent_err((root), (leaf), (slot), \
"invalid %s for file extent, have %llu, should be aligned to %u", \
(#name), btrfs_file_extent_##name((leaf), (fi)), \
(alignment)); \
(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \
})
static int check_extent_data_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_file_extent_item *fi;
u32 sectorsize = root->fs_info->sectorsize;
u32 item_size = btrfs_item_size_nr(leaf, slot);
if (!IS_ALIGNED(key->offset, sectorsize)) {
file_extent_err(root, leaf, slot,
"unaligned file_offset for file extent, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
}
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
file_extent_err(root, leaf, slot,
"invalid type for file extent, have %u expect range [0, %u]",
btrfs_file_extent_type(leaf, fi),
BTRFS_FILE_EXTENT_TYPES);
return -EUCLEAN;
}
/*
* Support for new compression/encrption must introduce incompat flag,
* and must be caught in open_ctree().
*/
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
file_extent_err(root, leaf, slot,
"invalid compression for file extent, have %u expect range [0, %u]",
btrfs_file_extent_compression(leaf, fi),
BTRFS_COMPRESS_TYPES);
return -EUCLEAN;
}
if (btrfs_file_extent_encryption(leaf, fi)) {
file_extent_err(root, leaf, slot,
"invalid encryption for file extent, have %u expect 0",
btrfs_file_extent_encryption(leaf, fi));
return -EUCLEAN;
}
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
/* Inline extent must have 0 as key offset */
if (key->offset) {
file_extent_err(root, leaf, slot,
"invalid file_offset for inline file extent, have %llu expect 0",
key->offset);
return -EUCLEAN;
}
/* Compressed inline extent has no on-disk size, skip it */
if (btrfs_file_extent_compression(leaf, fi) !=
BTRFS_COMPRESS_NONE)
return 0;
/* Uncompressed inline extent size must match item size */
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi)) {
file_extent_err(root, leaf, slot,
"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi));
return -EUCLEAN;
}
return 0;
}
/* Regular or preallocated extent has fixed item size */
if (item_size != sizeof(*fi)) {
file_extent_err(root, leaf, slot,
"invalid item size for reg/prealloc file extent, have %u expect %zu",
item_size, sizeof(*fi));
return -EUCLEAN;
}
if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
return -EUCLEAN;
return 0;
}
static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
u32 sectorsize = root->fs_info->sectorsize;
u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
generic_err(root, leaf, slot,
"invalid key objectid for csum item, have %llu expect %llu",
key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
return -EUCLEAN;
}
if (!IS_ALIGNED(key->offset, sectorsize)) {
generic_err(root, leaf, slot,
"unaligned key offset for csum item, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
}
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
generic_err(root, leaf, slot,
"unaligned item size for csum item, have %u should be aligned to %u",
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
}
return 0;
}
/*
* Common point to switch the item-specific validation.
*/
static int check_leaf_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
int ret = 0;
switch (key->type) {
case BTRFS_EXTENT_DATA_KEY:
ret = check_extent_data_item(root, leaf, key, slot);
break;
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot);
break;
}
return ret;
}
int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = root->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
struct btrfs_key prev_key = {0, 0, 0};
struct btrfs_key key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
* extent buffer alone we can not find out what is the id of the
* corresponding subvolume tree, so we can not figure out if the extent
* buffer corresponds to the root of the relocation tree or not. So
* skip this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
struct btrfs_root *check_root;
key.objectid = btrfs_header_owner(leaf);
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
check_root = btrfs_get_fs_root(fs_info, &key, false);
/*
* The only reason we also check NULL here is that during
* open_ctree() some roots has not yet been set up.
*/
if (!IS_ERR_OR_NULL(check_root)) {
struct extent_buffer *eb;
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
generic_err(check_root, leaf, 0,
"invalid nritems, have %u should not be 0 for non-root leaf",
nritems);
free_extent_buffer(eb);
return -EUCLEAN;
}
free_extent_buffer(eb);
}
return 0;
}
if (nritems == 0)
return 0;
/*
* Check the following things to make sure this is a good leaf, and
* leaf users won't need to bother with similar sanity checks:
*
* 1) key ordering
* 2) item offset and size
* No overlap, no hole, all inside the leaf.
* 3) item content
* If possible, do comprehensive sanity check.
* NOTE: All checks must only rely on the item data itself.
*/
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
generic_err(root, leaf, slot,
"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
key.offset);
return -EUCLEAN;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (slot == 0)
item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
else
item_end_expected = btrfs_item_offset_nr(leaf,
slot - 1);
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
generic_err(root, leaf, slot,
"unexpected item end, have %u expect %u",
btrfs_item_end_nr(leaf, slot),
item_end_expected);
return -EUCLEAN;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
generic_err(root, leaf, slot,
"slot end outside of leaf, have %u expect range [0, %u]",
btrfs_item_end_nr(leaf, slot),
BTRFS_LEAF_DATA_SIZE(fs_info));
return -EUCLEAN;
}
/* Also check if the item pointer overlaps with btrfs item. */
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
btrfs_item_ptr_offset(leaf, slot)) {
generic_err(root, leaf, slot,
"slot overlaps with its data, item end %lu data start %lu",
btrfs_item_nr_offset(slot) +
sizeof(struct btrfs_item),
btrfs_item_ptr_offset(leaf, slot));
return -EUCLEAN;
}
/* Check if the item size and content meet other criteria */
ret = check_leaf_item(root, leaf, &key, slot);
if (ret < 0)
return ret;
prev_key.objectid = key.objectid;
prev_key.type = key.type;
prev_key.offset = key.offset;
}
return 0;
}
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
u64 bytenr;
int ret = 0;
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
root->objectid, node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
return -EUCLEAN;
}
for (slot = 0; slot < nr - 1; slot++) {
bytenr = btrfs_node_blockptr(node, slot);
btrfs_node_key_to_cpu(node, &key, slot);
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
generic_err(root, node, slot,
"invalid NULL node pointer");
ret = -EUCLEAN;
goto out;
}
if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
generic_err(root, node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, root->fs_info->sectorsize);
ret = -EUCLEAN;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
generic_err(root, node, slot,
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
next_key.offset);
ret = -EUCLEAN;
goto out;
}
}
out:
return ret;
}

26
fs/btrfs/tree-checker.h Normal file
View File

@ -0,0 +1,26 @@
/*
* Copyright (C) Qu Wenruo 2017. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program.
*/
#ifndef __BTRFS_TREE_CHECKER__
#define __BTRFS_TREE_CHECKER__
#include "ctree.h"
#include "extent_io.h"
int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
#endif

View File

@ -717,7 +717,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset); ins.offset);
if (ret == 0) { if (ret == 0) {
ret = btrfs_inc_extent_ref(trans, fs_info, ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset, ins.objectid, ins.offset,
0, root->root_key.objectid, 0, root->root_key.objectid,
key->objectid, offset); key->objectid, offset);
@ -2699,34 +2699,36 @@ static void wait_log_commit(struct btrfs_root *root, int transid)
* so we know that if ours is more than 2 older than the * so we know that if ours is more than 2 older than the
* current transaction, we're done * current transaction, we're done
*/ */
do { for (;;) {
prepare_to_wait(&root->log_commit_wait[index], prepare_to_wait(&root->log_commit_wait[index],
&wait, TASK_UNINTERRUPTIBLE); &wait, TASK_UNINTERRUPTIBLE);
if (!(root->log_transid_committed < transid &&
atomic_read(&root->log_commit[index])))
break;
mutex_unlock(&root->log_mutex); mutex_unlock(&root->log_mutex);
schedule();
if (root->log_transid_committed < transid &&
atomic_read(&root->log_commit[index]))
schedule();
finish_wait(&root->log_commit_wait[index], &wait);
mutex_lock(&root->log_mutex); mutex_lock(&root->log_mutex);
} while (root->log_transid_committed < transid && }
atomic_read(&root->log_commit[index])); finish_wait(&root->log_commit_wait[index], &wait);
} }
static void wait_for_writer(struct btrfs_root *root) static void wait_for_writer(struct btrfs_root *root)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
while (atomic_read(&root->log_writers)) { for (;;) {
prepare_to_wait(&root->log_writer_wait, prepare_to_wait(&root->log_writer_wait, &wait,
&wait, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (!atomic_read(&root->log_writers))
break;
mutex_unlock(&root->log_mutex); mutex_unlock(&root->log_mutex);
if (atomic_read(&root->log_writers)) schedule();
schedule();
finish_wait(&root->log_writer_wait, &wait);
mutex_lock(&root->log_mutex); mutex_lock(&root->log_mutex);
} }
finish_wait(&root->log_writer_wait, &wait);
} }
static inline void btrfs_remove_log_ctx(struct btrfs_root *root, static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
@ -4645,7 +4647,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key min_key; struct btrfs_key min_key;
struct btrfs_key max_key; struct btrfs_key max_key;
struct btrfs_root *log = root->log_root; struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
LIST_HEAD(logged_list); LIST_HEAD(logged_list);
u64 last_extent = 0; u64 last_extent = 0;
int err = 0; int err = 0;
@ -4888,7 +4889,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto next_slot; goto next_slot;
} }
src = path->nodes[0];
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++; ins_nr++;
goto next_slot; goto next_slot;

View File

@ -360,7 +360,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
int again = 0; int again = 0;
unsigned long num_run; unsigned long num_run;
unsigned long batch_run = 0; unsigned long batch_run = 0;
unsigned long limit;
unsigned long last_waited = 0; unsigned long last_waited = 0;
int force_reg = 0; int force_reg = 0;
int sync_pending = 0; int sync_pending = 0;
@ -375,8 +374,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
blk_start_plug(&plug); blk_start_plug(&plug);
bdi = device->bdev->bd_bdi; bdi = device->bdev->bd_bdi;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
loop: loop:
spin_lock(&device->io_lock); spin_lock(&device->io_lock);
@ -443,13 +440,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
pending = pending->bi_next; pending = pending->bi_next;
cur->bi_next = NULL; cur->bi_next = NULL;
/*
* atomic_dec_return implies a barrier for waitqueue_active
*/
if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
BUG_ON(atomic_read(&cur->__bi_cnt) == 0); BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
/* /*
@ -517,12 +507,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
&device->work); &device->work);
goto done; goto done;
} }
/* unplug every 64 requests just for good measure */
if (batch_run % 64 == 0) {
blk_finish_plug(&plug);
blk_start_plug(&plug);
sync_pending = 0;
}
} }
cond_resched(); cond_resched();
@ -547,7 +531,7 @@ static void pending_bios_fn(struct btrfs_work *work)
} }
void btrfs_free_stale_device(struct btrfs_device *cur_dev) static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
{ {
struct btrfs_fs_devices *fs_devs; struct btrfs_fs_devices *fs_devs;
struct btrfs_device *dev; struct btrfs_device *dev;
@ -1068,14 +1052,15 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
return ret; return ret;
} }
void btrfs_release_disk_super(struct page *page) static void btrfs_release_disk_super(struct page *page)
{ {
kunmap(page); kunmap(page);
put_page(page); put_page(page);
} }
int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
struct page **page, struct btrfs_super_block **disk_super) struct page **page,
struct btrfs_super_block **disk_super)
{ {
void *p; void *p;
pgoff_t index; pgoff_t index;
@ -1817,8 +1802,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs, static struct btrfs_device * btrfs_find_next_active_device(
struct btrfs_device *device) struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
{ {
struct btrfs_device *next_device; struct btrfs_device *next_device;
@ -2031,19 +2016,20 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
} }
btrfs_close_bdev(srcdev); btrfs_close_bdev(srcdev);
call_rcu(&srcdev->rcu, free_device); call_rcu(&srcdev->rcu, free_device);
/*
* unless fs_devices is seed fs, num_devices shouldn't go
* zero
*/
BUG_ON(!fs_devices->num_devices && !fs_devices->seeding);
/* if this is no devs we rather delete the fs_devices */ /* if this is no devs we rather delete the fs_devices */
if (!fs_devices->num_devices) { if (!fs_devices->num_devices) {
struct btrfs_fs_devices *tmp_fs_devices; struct btrfs_fs_devices *tmp_fs_devices;
/*
* On a mounted FS, num_devices can't be zero unless it's a
* seed. In case of a seed device being replaced, the replace
* target added to the sprout FS, so there will be no more
* device left under the seed FS.
*/
ASSERT(fs_devices->seeding);
tmp_fs_devices = fs_info->fs_devices; tmp_fs_devices = fs_info->fs_devices;
while (tmp_fs_devices) { while (tmp_fs_devices) {
if (tmp_fs_devices->seed == fs_devices) { if (tmp_fs_devices->seed == fs_devices) {
@ -2323,6 +2309,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
u64 tmp; u64 tmp;
int seeding_dev = 0; int seeding_dev = 0;
int ret = 0; int ret = 0;
bool unlocked = false;
if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
return -EROFS; return -EROFS;
@ -2399,7 +2386,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) { if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY; sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info); ret = btrfs_prepare_sprout(fs_info);
BUG_ON(ret); /* -ENOMEM */ if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_trans;
}
} }
device->fs_devices = fs_info->fs_devices; device->fs_devices = fs_info->fs_devices;
@ -2445,14 +2435,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto error_trans; goto error_sysfs;
} }
} }
ret = btrfs_add_device(trans, fs_info, device); ret = btrfs_add_device(trans, fs_info, device);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto error_trans; goto error_sysfs;
} }
if (seeding_dev) { if (seeding_dev) {
@ -2461,7 +2451,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
ret = btrfs_finish_sprout(trans, fs_info); ret = btrfs_finish_sprout(trans, fs_info);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto error_trans; goto error_sysfs;
} }
/* Sprouting would change fsid of the mounted root, /* Sprouting would change fsid of the mounted root,
@ -2479,6 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) { if (seeding_dev) {
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount); up_write(&sb->s_umount);
unlocked = true;
if (ret) /* transaction commit */ if (ret) /* transaction commit */
return ret; return ret;
@ -2491,7 +2482,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
if (PTR_ERR(trans) == -ENOENT) if (PTR_ERR(trans) == -ENOENT)
return 0; return 0;
return PTR_ERR(trans); ret = PTR_ERR(trans);
trans = NULL;
goto error_sysfs;
} }
ret = btrfs_commit_transaction(trans); ret = btrfs_commit_transaction(trans);
} }
@ -2500,14 +2493,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
update_dev_time(device_path); update_dev_time(device_path);
return ret; return ret;
error_trans: error_sysfs:
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
if (trans)
btrfs_end_transaction(trans);
rcu_string_free(device->name);
kfree(device); kfree(device);
error: error:
blkdev_put(bdev, FMODE_EXCL); blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev) { if (seeding_dev && !unlocked) {
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount); up_write(&sb->s_umount);
} }
@ -4813,16 +4810,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
em_tree = &info->mapping_tree.map_tree; em_tree = &info->mapping_tree.map_tree;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0); ret = add_extent_mapping(em_tree, em, 0);
if (!ret) {
list_add_tail(&em->list, &trans->transaction->pending_chunks);
refcount_inc(&em->refs);
}
write_unlock(&em_tree->lock);
if (ret) { if (ret) {
write_unlock(&em_tree->lock);
free_extent_map(em); free_extent_map(em);
goto error; goto error;
} }
list_add_tail(&em->list, &trans->transaction->pending_chunks);
refcount_inc(&em->refs);
write_unlock(&em_tree->lock);
ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
if (ret) if (ret)
goto error_del_extent; goto error_del_extent;
@ -5695,10 +5692,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (map->type & BTRFS_BLOCK_GROUP_RAID0) { if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
&stripe_index); &stripe_index);
if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) if (!need_full_stripe(op))
mirror_num = 1; mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) if (need_full_stripe(op))
num_stripes = map->num_stripes; num_stripes = map->num_stripes;
else if (mirror_num) else if (mirror_num)
stripe_index = mirror_num - 1; stripe_index = mirror_num - 1;
@ -5711,7 +5708,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
} }
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) { } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { if (need_full_stripe(op)) {
num_stripes = map->num_stripes; num_stripes = map->num_stripes;
} else if (mirror_num) { } else if (mirror_num) {
stripe_index = mirror_num - 1; stripe_index = mirror_num - 1;
@ -5725,7 +5722,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
stripe_index *= map->sub_stripes; stripe_index *= map->sub_stripes;
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) if (need_full_stripe(op))
num_stripes = map->sub_stripes; num_stripes = map->sub_stripes;
else if (mirror_num) else if (mirror_num)
stripe_index += mirror_num - 1; stripe_index += mirror_num - 1;
@ -5740,9 +5737,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
} }
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map && if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
(op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */ /* push stripe_nr back to the start of the full stripe */
stripe_nr = div64_u64(raid56_full_stripe_start, stripe_nr = div64_u64(raid56_full_stripe_start,
stripe_len * nr_data_stripes(map)); stripe_len * nr_data_stripes(map));
@ -5769,9 +5764,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
/* We distribute the parity blocks across stripes */ /* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes, div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
&stripe_index); &stripe_index);
if ((op != BTRFS_MAP_WRITE && if (!need_full_stripe(op) && mirror_num <= 1)
op != BTRFS_MAP_GET_READ_MIRRORS) &&
mirror_num <= 1)
mirror_num = 1; mirror_num = 1;
} }
} else { } else {
@ -6033,7 +6026,7 @@ static void btrfs_end_bio(struct bio *bio)
* this bio is actually up to date, we didn't * this bio is actually up to date, we didn't
* go over the max number of errors * go over the max number of errors
*/ */
bio->bi_status = 0; bio->bi_status = BLK_STS_OK;
} }
btrfs_end_bbio(bbio, bio); btrfs_end_bbio(bbio, bio);
@ -6069,13 +6062,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device,
return; return;
} }
/*
* nr_async_bios allows us to reliably return congestion to the
* higher layers. Otherwise, the async bio makes it appear we have
* made progress against dirty pages when we've really just put it
* on a queue for later
*/
atomic_inc(&fs_info->nr_async_bios);
WARN_ON(bio->bi_next); WARN_ON(bio->bi_next);
bio->bi_next = NULL; bio->bi_next = NULL;
@ -6144,7 +6130,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9; bio->bi_iter.bi_sector = logical >> 9;
bio->bi_status = BLK_STS_IOERR; if (atomic_read(&bbio->error) > bbio->max_errors)
bio->bi_status = BLK_STS_IOERR;
else
bio->bi_status = BLK_STS_OK;
btrfs_end_bbio(bbio, bio); btrfs_end_bbio(bbio, bio);
} }
} }
@ -6249,7 +6238,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
device = btrfs_alloc_device(NULL, &devid, dev_uuid); device = btrfs_alloc_device(NULL, &devid, dev_uuid);
if (IS_ERR(device)) if (IS_ERR(device))
return NULL; return device;
list_add(&device->dev_list, &fs_devices->devices); list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices; device->fs_devices = fs_devices;
@ -6377,6 +6366,17 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
u64 devid, u8 *uuid, bool error)
{
if (error)
btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
devid, uuid);
else
btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
devid, uuid);
}
static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
struct extent_buffer *leaf, struct extent_buffer *leaf,
struct btrfs_chunk *chunk) struct btrfs_chunk *chunk)
@ -6447,18 +6447,21 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
if (!map->stripes[i].dev && if (!map->stripes[i].dev &&
!btrfs_test_opt(fs_info, DEGRADED)) { !btrfs_test_opt(fs_info, DEGRADED)) {
free_extent_map(em); free_extent_map(em);
btrfs_report_missing_device(fs_info, devid, uuid); btrfs_report_missing_device(fs_info, devid, uuid, true);
return -EIO; return -ENOENT;
} }
if (!map->stripes[i].dev) { if (!map->stripes[i].dev) {
map->stripes[i].dev = map->stripes[i].dev =
add_missing_dev(fs_info->fs_devices, devid, add_missing_dev(fs_info->fs_devices, devid,
uuid); uuid);
if (!map->stripes[i].dev) { if (IS_ERR(map->stripes[i].dev)) {
free_extent_map(em); free_extent_map(em);
return -EIO; btrfs_err(fs_info,
"failed to init missing dev %llu: %ld",
devid, PTR_ERR(map->stripes[i].dev));
return PTR_ERR(map->stripes[i].dev);
} }
btrfs_report_missing_device(fs_info, devid, uuid); btrfs_report_missing_device(fs_info, devid, uuid, false);
} }
map->stripes[i].dev->in_fs_metadata = 1; map->stripes[i].dev->in_fs_metadata = 1;
} }
@ -6577,19 +6580,28 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
if (!device) { if (!device) {
if (!btrfs_test_opt(fs_info, DEGRADED)) { if (!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_report_missing_device(fs_info, devid, dev_uuid); btrfs_report_missing_device(fs_info, devid,
return -EIO; dev_uuid, true);
return -ENOENT;
} }
device = add_missing_dev(fs_devices, devid, dev_uuid); device = add_missing_dev(fs_devices, devid, dev_uuid);
if (!device) if (IS_ERR(device)) {
return -ENOMEM; btrfs_err(fs_info,
btrfs_report_missing_device(fs_info, devid, dev_uuid); "failed to add missing dev %llu: %ld",
devid, PTR_ERR(device));
return PTR_ERR(device);
}
btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
} else { } else {
if (!device->bdev) { if (!device->bdev) {
btrfs_report_missing_device(fs_info, devid, dev_uuid); if (!btrfs_test_opt(fs_info, DEGRADED)) {
if (!btrfs_test_opt(fs_info, DEGRADED)) btrfs_report_missing_device(fs_info,
return -EIO; devid, dev_uuid, true);
return -ENOENT;
}
btrfs_report_missing_device(fs_info, devid,
dev_uuid, false);
} }
if(!device->bdev && !device->missing) { if(!device->bdev && !device->missing) {
@ -6756,12 +6768,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
return -EIO; return -EIO;
} }
void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid)
{
btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid);
}
/* /*
* Check if all chunks in the fs are OK for read-write degraded mount * Check if all chunks in the fs are OK for read-write degraded mount
* *

View File

@ -542,7 +542,5 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info);
void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid);
#endif #endif

View File

@ -37,6 +37,7 @@ struct workspace {
z_stream strm; z_stream strm;
char *buf; char *buf;
struct list_head list; struct list_head list;
int level;
}; };
static void zlib_free_workspace(struct list_head *ws) static void zlib_free_workspace(struct list_head *ws)
@ -96,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
*total_out = 0; *total_out = 0;
*total_in = 0; *total_in = 0;
if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) { if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
pr_warn("BTRFS: deflateInit failed\n"); pr_warn("BTRFS: deflateInit failed\n");
ret = -EIO; ret = -EIO;
goto out; goto out;
@ -402,10 +403,22 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
return ret; return ret;
} }
static void zlib_set_level(struct list_head *ws, unsigned int type)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
unsigned level = (type & 0xF0) >> 4;
if (level > 9)
level = 9;
workspace->level = level > 0 ? level : 3;
}
const struct btrfs_compress_op btrfs_zlib_compress = { const struct btrfs_compress_op btrfs_zlib_compress = {
.alloc_workspace = zlib_alloc_workspace, .alloc_workspace = zlib_alloc_workspace,
.free_workspace = zlib_free_workspace, .free_workspace = zlib_free_workspace,
.compress_pages = zlib_compress_pages, .compress_pages = zlib_compress_pages,
.decompress_bio = zlib_decompress_bio, .decompress_bio = zlib_decompress_bio,
.decompress = zlib_decompress, .decompress = zlib_decompress,
.set_level = zlib_set_level,
}; };

View File

@ -423,10 +423,15 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
return ret; return ret;
} }
static void zstd_set_level(struct list_head *ws, unsigned int type)
{
}
const struct btrfs_compress_op btrfs_zstd_compress = { const struct btrfs_compress_op btrfs_zstd_compress = {
.alloc_workspace = zstd_alloc_workspace, .alloc_workspace = zstd_alloc_workspace,
.free_workspace = zstd_free_workspace, .free_workspace = zstd_free_workspace,
.compress_pages = zstd_compress_pages, .compress_pages = zstd_compress_pages,
.decompress_bio = zstd_decompress_bio, .decompress_bio = zstd_decompress_bio,
.decompress = zstd_decompress, .decompress = zstd_decompress,
.set_level = zstd_set_level,
}; };

View File

@ -29,6 +29,13 @@ struct btrfs_qgroup_extent_record;
struct btrfs_qgroup; struct btrfs_qgroup;
struct prelim_ref; struct prelim_ref;
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
TRACE_DEFINE_ENUM(ALLOC_CHUNK);
TRACE_DEFINE_ENUM(COMMIT_TRANS);
#define show_ref_type(type) \ #define show_ref_type(type) \
__print_symbolic(type, \ __print_symbolic(type, \
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
@ -792,11 +799,10 @@ DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref,
DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
TP_PROTO(const struct btrfs_fs_info *fs_info, TP_PROTO(const struct btrfs_fs_info *fs_info,
const struct btrfs_delayed_ref_node *ref,
const struct btrfs_delayed_ref_head *head_ref, const struct btrfs_delayed_ref_head *head_ref,
int action), int action),
TP_ARGS(fs_info, ref, head_ref, action), TP_ARGS(fs_info, head_ref, action),
TP_STRUCT__entry_btrfs( TP_STRUCT__entry_btrfs(
__field( u64, bytenr ) __field( u64, bytenr )
@ -806,8 +812,8 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
), ),
TP_fast_assign_btrfs(fs_info, TP_fast_assign_btrfs(fs_info,
__entry->bytenr = ref->bytenr; __entry->bytenr = head_ref->bytenr;
__entry->num_bytes = ref->num_bytes; __entry->num_bytes = head_ref->num_bytes;
__entry->action = action; __entry->action = action;
__entry->is_data = head_ref->is_data; __entry->is_data = head_ref->is_data;
), ),
@ -822,21 +828,19 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head,
TP_PROTO(const struct btrfs_fs_info *fs_info, TP_PROTO(const struct btrfs_fs_info *fs_info,
const struct btrfs_delayed_ref_node *ref,
const struct btrfs_delayed_ref_head *head_ref, const struct btrfs_delayed_ref_head *head_ref,
int action), int action),
TP_ARGS(fs_info, ref, head_ref, action) TP_ARGS(fs_info, head_ref, action)
); );
DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head,
TP_PROTO(const struct btrfs_fs_info *fs_info, TP_PROTO(const struct btrfs_fs_info *fs_info,
const struct btrfs_delayed_ref_node *ref,
const struct btrfs_delayed_ref_head *head_ref, const struct btrfs_delayed_ref_head *head_ref,
int action), int action),
TP_ARGS(fs_info, ref, head_ref, action) TP_ARGS(fs_info, head_ref, action)
); );
#define show_chunk_type(type) \ #define show_chunk_type(type) \
@ -1692,6 +1696,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
TP_ARGS(fs_info, oldref, newref, tree_size) TP_ARGS(fs_info, oldref, newref, tree_size)
); );
TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
TP_ARGS(root, ino, mod),
TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, ino )
__field( int, mod )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->objectid;
__entry->ino = ino;
__entry->mod = mod;
),
TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->ino, __entry->mod)
);
#endif /* _TRACE_BTRFS_H */ #endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */ /* This part must be outside protection */

View File

@ -609,10 +609,14 @@ struct btrfs_ioctl_ino_path_args {
struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_logical_ino_args {
__u64 logical; /* in */ __u64 logical; /* in */
__u64 size; /* in */ __u64 size; /* in */
__u64 reserved[4]; __u64 reserved[3]; /* must be 0 for now */
__u64 flags; /* in, v2 only */
/* struct btrfs_data_container *inodes; out */ /* struct btrfs_data_container *inodes; out */
__u64 inodes; __u64 inodes;
}; };
/* Return every ref to the extent, not just those containing logical block.
* Requires logical == extent bytenr. */
#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0)
enum btrfs_dev_stat_values { enum btrfs_dev_stat_values {
/* disk I/O failure stats */ /* disk I/O failure stats */
@ -836,5 +840,7 @@ enum btrfs_err_code {
struct btrfs_ioctl_feature_flags[3]) struct btrfs_ioctl_feature_flags[3])
#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \ #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
struct btrfs_ioctl_vol_args_v2) struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
struct btrfs_ioctl_logical_ino_args)
#endif /* _UAPI_LINUX_BTRFS_H */ #endif /* _UAPI_LINUX_BTRFS_H */

View File

@ -733,6 +733,7 @@ struct btrfs_balance_item {
#define BTRFS_FILE_EXTENT_INLINE 0 #define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1 #define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2 #define BTRFS_FILE_EXTENT_PREALLOC 2
#define BTRFS_FILE_EXTENT_TYPES 2
struct btrfs_file_extent_item { struct btrfs_file_extent_item {
/* /*