mirror of https://gitee.com/openkylin/linux.git
Btrfs: make xattr replace operations atomic
Replacing a xattr consists of doing a lookup for its existing value, delete the current value from the respective leaf, release the search path and then finally insert the new value. This leaves a time window where readers (getxattr, listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs, so this has security implications. This change also fixes 2 other existing issues which were: *) Deleting the old xattr value without verifying first if the new xattr will fit in the existing leaf item (in case multiple xattrs are packed in the same item due to name hash collision); *) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't exist but we have have an existing item that packs muliple xattrs with the same name hash as the input xattr. In this case we should return ENOSPC. A test case for xfstests follows soon. Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace implementation. Reported-by: Alexandre Oliva <oliva@gnu.org> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
c7bc6319c5
commit
5f5bc6b1e2
|
@ -2939,7 +2939,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
*/
|
*/
|
||||||
if (!p->leave_spinning)
|
if (!p->leave_spinning)
|
||||||
btrfs_set_path_blocking(p);
|
btrfs_set_path_blocking(p);
|
||||||
if (ret < 0)
|
if (ret < 0 && !p->skip_release_on_error)
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -607,6 +607,7 @@ struct btrfs_path {
|
||||||
unsigned int leave_spinning:1;
|
unsigned int leave_spinning:1;
|
||||||
unsigned int search_commit_root:1;
|
unsigned int search_commit_root:1;
|
||||||
unsigned int need_commit_sem:1;
|
unsigned int need_commit_sem:1;
|
||||||
|
unsigned int skip_release_on_error:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3690,6 +3691,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||||
int verify_dir_item(struct btrfs_root *root,
|
int verify_dir_item(struct btrfs_root *root,
|
||||||
struct extent_buffer *leaf,
|
struct extent_buffer *leaf,
|
||||||
struct btrfs_dir_item *dir_item);
|
struct btrfs_dir_item *dir_item);
|
||||||
|
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
||||||
|
struct btrfs_path *path,
|
||||||
|
const char *name,
|
||||||
|
int name_len);
|
||||||
|
|
||||||
/* orphan.c */
|
/* orphan.c */
|
||||||
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
|
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
|
||||||
|
|
|
@ -21,10 +21,6 @@
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
|
|
||||||
static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
|
||||||
struct btrfs_path *path,
|
|
||||||
const char *name, int name_len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* insert a name into a directory, doing overflow properly if there is a hash
|
* insert a name into a directory, doing overflow properly if there is a hash
|
||||||
* collision. data_size indicates how big the item inserted should be. On
|
* collision. data_size indicates how big the item inserted should be. On
|
||||||
|
@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||||
* this walks through all the entries in a dir item and finds one
|
* this walks through all the entries in a dir item and finds one
|
||||||
* for a specific name.
|
* for a specific name.
|
||||||
*/
|
*/
|
||||||
static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
const char *name, int name_len)
|
const char *name, int name_len)
|
||||||
{
|
{
|
||||||
struct btrfs_dir_item *dir_item;
|
struct btrfs_dir_item *dir_item;
|
||||||
unsigned long name_ptr;
|
unsigned long name_ptr;
|
||||||
|
|
160
fs/btrfs/xattr.c
160
fs/btrfs/xattr.c
|
@ -29,6 +29,7 @@
|
||||||
#include "xattr.h"
|
#include "xattr.h"
|
||||||
#include "disk-io.h"
|
#include "disk-io.h"
|
||||||
#include "props.h"
|
#include "props.h"
|
||||||
|
#include "locking.h"
|
||||||
|
|
||||||
|
|
||||||
ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
|
ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
|
||||||
|
@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
|
||||||
struct inode *inode, const char *name,
|
struct inode *inode, const char *name,
|
||||||
const void *value, size_t size, int flags)
|
const void *value, size_t size, int flags)
|
||||||
{
|
{
|
||||||
struct btrfs_dir_item *di;
|
struct btrfs_dir_item *di = NULL;
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
size_t name_len = strlen(name);
|
size_t name_len = strlen(name);
|
||||||
|
@ -103,84 +104,119 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
path->skip_release_on_error = 1;
|
||||||
|
|
||||||
|
if (!value) {
|
||||||
|
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
|
||||||
|
name, name_len, -1);
|
||||||
|
if (!di && (flags & XATTR_REPLACE))
|
||||||
|
ret = -ENODATA;
|
||||||
|
else if (di)
|
||||||
|
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a replace we can't just do the insert blindly.
|
||||||
|
* Do a lookup first (read-only btrfs_search_slot), and return if xattr
|
||||||
|
* doesn't exist. If it exists, fall down below to the insert/replace
|
||||||
|
* path - we can't race with a concurrent xattr delete, because the VFS
|
||||||
|
* locks the inode's i_mutex before calling setxattr or removexattr.
|
||||||
|
*/
|
||||||
if (flags & XATTR_REPLACE) {
|
if (flags & XATTR_REPLACE) {
|
||||||
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
|
ASSERT(mutex_is_locked(&inode->i_mutex));
|
||||||
name_len, -1);
|
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
|
||||||
if (IS_ERR(di)) {
|
name, name_len, 0);
|
||||||
ret = PTR_ERR(di);
|
if (!di) {
|
||||||
goto out;
|
|
||||||
} else if (!di) {
|
|
||||||
ret = -ENODATA;
|
ret = -ENODATA;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
btrfs_release_path(path);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* remove the attribute
|
|
||||||
*/
|
|
||||||
if (!value)
|
|
||||||
goto out;
|
|
||||||
} else {
|
|
||||||
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
|
|
||||||
name, name_len, 0);
|
|
||||||
if (IS_ERR(di)) {
|
|
||||||
ret = PTR_ERR(di);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (!di && !value)
|
|
||||||
goto out;
|
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
di = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
again:
|
|
||||||
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
|
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
|
||||||
name, name_len, value, size);
|
name, name_len, value, size);
|
||||||
/*
|
if (ret == -EOVERFLOW) {
|
||||||
* If we're setting an xattr to a new value but the new value is say
|
/*
|
||||||
* exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
|
* We have an existing item in a leaf, split_leaf couldn't
|
||||||
* back from split_leaf. This is because it thinks we'll be extending
|
* expand it. That item might have or not a dir_item that
|
||||||
* the existing item size, but we're asking for enough space to add the
|
* matches our target xattr, so lets check.
|
||||||
* item itself. So if we get EOVERFLOW just set ret to EEXIST and let
|
*/
|
||||||
* the rest of the function figure it out.
|
ret = 0;
|
||||||
*/
|
btrfs_assert_tree_locked(path->nodes[0]);
|
||||||
if (ret == -EOVERFLOW)
|
di = btrfs_match_dir_item_name(root, path, name, name_len);
|
||||||
|
if (!di && !(flags & XATTR_REPLACE)) {
|
||||||
|
ret = -ENOSPC;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else if (ret == -EEXIST) {
|
||||||
|
ret = 0;
|
||||||
|
di = btrfs_match_dir_item_name(root, path, name, name_len);
|
||||||
|
ASSERT(di); /* logic error */
|
||||||
|
} else if (ret) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (di && (flags & XATTR_CREATE)) {
|
||||||
ret = -EEXIST;
|
ret = -EEXIST;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (ret == -EEXIST) {
|
if (di) {
|
||||||
if (flags & XATTR_CREATE)
|
|
||||||
goto out;
|
|
||||||
/*
|
/*
|
||||||
* We can't use the path we already have since we won't have the
|
* We're doing a replace, and it must be atomic, that is, at
|
||||||
* proper locking for a delete, so release the path and
|
* any point in time we have either the old or the new xattr
|
||||||
* re-lookup to delete the thing.
|
* value in the tree. We don't want readers (getxattr and
|
||||||
|
* listxattrs) to miss a value, this is specially important
|
||||||
|
* for ACLs.
|
||||||
*/
|
*/
|
||||||
btrfs_release_path(path);
|
const int slot = path->slots[0];
|
||||||
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
|
struct extent_buffer *leaf = path->nodes[0];
|
||||||
name, name_len, -1);
|
const u16 old_data_len = btrfs_dir_data_len(leaf, di);
|
||||||
if (IS_ERR(di)) {
|
const u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||||
ret = PTR_ERR(di);
|
const u32 data_size = sizeof(*di) + name_len + size;
|
||||||
goto out;
|
struct btrfs_item *item;
|
||||||
} else if (!di) {
|
unsigned long data_ptr;
|
||||||
/* Shouldn't happen but just in case... */
|
char *ptr;
|
||||||
btrfs_release_path(path);
|
|
||||||
goto again;
|
if (size > old_data_len) {
|
||||||
|
if (btrfs_leaf_free_space(root, leaf) <
|
||||||
|
(size - old_data_len)) {
|
||||||
|
ret = -ENOSPC;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
if (old_data_len + name_len + sizeof(*di) == item_size) {
|
||||||
if (ret)
|
/* No other xattrs packed in the same leaf item. */
|
||||||
goto out;
|
if (size > old_data_len)
|
||||||
|
btrfs_extend_item(root, path,
|
||||||
/*
|
size - old_data_len);
|
||||||
* We have a value to set, so go back and try to insert it now.
|
else if (size < old_data_len)
|
||||||
*/
|
btrfs_truncate_item(root, path, data_size, 1);
|
||||||
if (value) {
|
} else {
|
||||||
btrfs_release_path(path);
|
/* There are other xattrs packed in the same item. */
|
||||||
goto again;
|
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
btrfs_extend_item(root, path, data_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
item = btrfs_item_nr(slot);
|
||||||
|
ptr = btrfs_item_ptr(leaf, slot, char);
|
||||||
|
ptr += btrfs_item_size(leaf, item) - data_size;
|
||||||
|
di = (struct btrfs_dir_item *)ptr;
|
||||||
|
btrfs_set_dir_data_len(leaf, di, size);
|
||||||
|
data_ptr = ((unsigned long)(di + 1)) + name_len;
|
||||||
|
write_extent_buffer(leaf, value, data_ptr, size);
|
||||||
|
btrfs_mark_buffer_dirty(leaf);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Insert, and we had space for the xattr, so path->slots[0] is
|
||||||
|
* where our xattr dir_item is and btrfs_insert_xattr_item()
|
||||||
|
* filled it.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
|
|
Loading…
Reference in New Issue