mirror of https://gitee.com/openkylin/linux.git
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
This commit is contained in:
commit
860a7a0c32
19
fs/Kconfig
19
fs/Kconfig
|
@ -268,6 +268,25 @@ config OCFS2_COMPAT_JBD
|
|||
is backwards compatible with JBD. It is safe to say N here.
|
||||
However, if you really want to use the original JBD, say Y here.
|
||||
|
||||
config BTRFS_FS
|
||||
tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
|
||||
depends on EXPERIMENTAL
|
||||
select LIBCRC32C
|
||||
select ZLIB_INFLATE
|
||||
select ZLIB_DEFLATE
|
||||
help
|
||||
Btrfs is a new filesystem with extents, writable snapshotting,
|
||||
support for multiple devices and many more features.
|
||||
|
||||
Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
|
||||
FINALIZED. You should say N here unless you are interested in
|
||||
testing Btrfs with non-critical data.
|
||||
|
||||
To compile this file system support as a module, choose M here. The
|
||||
module will be called btrfs.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
config DNOTIFY
|
||||
|
|
|
@ -121,4 +121,5 @@ obj-$(CONFIG_HOSTFS) += hostfs/
|
|||
obj-$(CONFIG_HPPFS) += hppfs/
|
||||
obj-$(CONFIG_DEBUG_FS) += debugfs/
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2/
|
||||
obj-$(CONFIG_BTRFS_FS) += btrfs/
|
||||
obj-$(CONFIG_GFS2_FS) += gfs2/
|
||||
|
|
|
@ -0,0 +1,356 @@
|
|||
|
||||
NOTE! This copyright does *not* cover user programs that use kernel
|
||||
services by normal system calls - this is merely considered normal use
|
||||
of the kernel, and does *not* fall under the heading of "derived work".
|
||||
Also note that the GPL below is copyrighted by the Free Software
|
||||
Foundation, but the instance of code that it refers to (the Linux
|
||||
kernel) is copyrighted by me and others who actually wrote it.
|
||||
|
||||
Also note that the only valid version of the GPL as far as the kernel
|
||||
is concerned is _this_ particular version of the license (ie v2, not
|
||||
v2.2 or v3.x or whatever), unless explicitly otherwise stated.
|
||||
|
||||
Linus Torvalds
|
||||
|
||||
----------------------------------------
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
|
@ -0,0 +1,48 @@
|
|||
Install Instructions
|
||||
|
||||
Btrfs puts snapshots and subvolumes into the root directory of the FS. This
|
||||
directory can only be changed by btrfsctl right now, and normal filesystem
|
||||
operations do not work on it. The default subvolume is called 'default',
|
||||
and you can create files and directories in mount_point/default
|
||||
|
||||
Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need
|
||||
to compile the kernel with:
|
||||
|
||||
CONFIG_LIBCRC32C=m
|
||||
|
||||
libcrc32c can be static as well. Once your kernel is setup, typing make in the
|
||||
btrfs module sources will build against the running kernel. When the build is
|
||||
complete:
|
||||
|
||||
modprobe libcrc32c
|
||||
insmod btrfs.ko
|
||||
|
||||
The Btrfs utility programs require libuuid to build. This can be found
|
||||
in the e2fsprogs sources, and is usually available as libuuid or
|
||||
e2fsprogs-devel from various distros.
|
||||
|
||||
Building the utilities is just make ; make install. The programs go
|
||||
into /usr/local/bin. The commands available are:
|
||||
|
||||
mkfs.btrfs: create a filesystem
|
||||
|
||||
btrfsctl: control program to create snapshots and subvolumes:
|
||||
|
||||
mount /dev/sda2 /mnt
|
||||
btrfsctl -s new_subvol_name /mnt
|
||||
btrfsctl -s snapshot_of_default /mnt/default
|
||||
btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
|
||||
btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
|
||||
ls /mnt
|
||||
default snapshot_of_a_snapshot snapshot_of_new_subvol
|
||||
new_subvol_name snapshot_of_default
|
||||
|
||||
Snapshots and subvolumes cannot be deleted right now, but you can
|
||||
rm -rf all the files and directories inside them.
|
||||
|
||||
btrfsck: do a limited check of the FS extent trees.</li>
|
||||
|
||||
debug-tree: print all of the FS metadata in text form. Example:
|
||||
|
||||
debug-tree /dev/sda2 >& big_output_file
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
ifneq ($(KERNELRELEASE),)
|
||||
# kbuild part of makefile
|
||||
|
||||
obj-$(CONFIG_BTRFS_FS) := btrfs.o
|
||||
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
file-item.o inode-item.o inode-map.o disk-io.o \
|
||||
transaction.o inode.o file.o tree-defrag.o \
|
||||
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
||||
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
||||
ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
|
||||
compression.o
|
||||
else
|
||||
|
||||
# Normal Makefile
|
||||
|
||||
KERNELDIR := /lib/modules/`uname -r`/build
|
||||
all:
|
||||
$(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
|
||||
|
||||
modules_install:
|
||||
$(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
|
||||
clean:
|
||||
$(MAKE) -C $(KERNELDIR) M=`pwd` clean
|
||||
|
||||
endif
|
|
@ -0,0 +1,351 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Red Hat. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include "ctree.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "xattr.h"
|
||||
|
||||
#ifdef CONFIG_FS_POSIX_ACL
|
||||
|
||||
static void btrfs_update_cached_acl(struct inode *inode,
|
||||
struct posix_acl **p_acl,
|
||||
struct posix_acl *acl)
|
||||
{
|
||||
spin_lock(&inode->i_lock);
|
||||
if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
|
||||
posix_acl_release(*p_acl);
|
||||
*p_acl = posix_acl_dup(acl);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
|
||||
{
|
||||
int size;
|
||||
const char *name;
|
||||
char *value = NULL;
|
||||
struct posix_acl *acl = NULL, **p_acl;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name = POSIX_ACL_XATTR_ACCESS;
|
||||
p_acl = &BTRFS_I(inode)->i_acl;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name = POSIX_ACL_XATTR_DEFAULT;
|
||||
p_acl = &BTRFS_I(inode)->i_default_acl;
|
||||
break;
|
||||
default:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (*p_acl != BTRFS_ACL_NOT_CACHED)
|
||||
acl = posix_acl_dup(*p_acl);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
if (acl)
|
||||
return acl;
|
||||
|
||||
|
||||
size = __btrfs_getxattr(inode, name, "", 0);
|
||||
if (size > 0) {
|
||||
value = kzalloc(size, GFP_NOFS);
|
||||
if (!value)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
size = __btrfs_getxattr(inode, name, value, size);
|
||||
if (size > 0) {
|
||||
acl = posix_acl_from_xattr(value, size);
|
||||
btrfs_update_cached_acl(inode, p_acl, acl);
|
||||
}
|
||||
kfree(value);
|
||||
} else if (size == -ENOENT) {
|
||||
acl = NULL;
|
||||
btrfs_update_cached_acl(inode, p_acl, acl);
|
||||
}
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
||||
static int btrfs_xattr_get_acl(struct inode *inode, int type,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
int ret = 0;
|
||||
|
||||
acl = btrfs_get_acl(inode, type);
|
||||
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl == NULL)
|
||||
return -ENODATA;
|
||||
ret = posix_acl_to_xattr(acl, value, size);
|
||||
posix_acl_release(acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Needs to be called with fs_mutex held
|
||||
*/
|
||||
static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
int ret, size = 0;
|
||||
const char *name;
|
||||
struct posix_acl **p_acl;
|
||||
char *value = NULL;
|
||||
mode_t mode;
|
||||
|
||||
if (acl) {
|
||||
ret = posix_acl_valid(acl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
mode = inode->i_mode;
|
||||
ret = posix_acl_equiv_mode(acl, &mode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = 0;
|
||||
inode->i_mode = mode;
|
||||
name = POSIX_ACL_XATTR_ACCESS;
|
||||
p_acl = &BTRFS_I(inode)->i_acl;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
return acl ? -EINVAL : 0;
|
||||
name = POSIX_ACL_XATTR_DEFAULT;
|
||||
p_acl = &BTRFS_I(inode)->i_default_acl;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
size = posix_acl_xattr_size(acl->a_count);
|
||||
value = kmalloc(size, GFP_NOFS);
|
||||
if (!value) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = posix_acl_to_xattr(acl, value, size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __btrfs_setxattr(inode, name, value, size, 0);
|
||||
|
||||
out:
|
||||
kfree(value);
|
||||
|
||||
if (!ret)
|
||||
btrfs_update_cached_acl(inode, p_acl, acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_xattr_set_acl(struct inode *inode, int type,
|
||||
const void *value, size_t size)
|
||||
{
|
||||
int ret = 0;
|
||||
struct posix_acl *acl = NULL;
|
||||
|
||||
if (value) {
|
||||
acl = posix_acl_from_xattr(value, size);
|
||||
if (acl == NULL) {
|
||||
value = NULL;
|
||||
size = 0;
|
||||
} else if (IS_ERR(acl)) {
|
||||
return PTR_ERR(acl);
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_set_acl(inode, acl, type);
|
||||
|
||||
posix_acl_release(acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
|
||||
}
|
||||
|
||||
static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
|
||||
}
|
||||
|
||||
static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
|
||||
}
|
||||
|
||||
static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
|
||||
}
|
||||
|
||||
int btrfs_check_acl(struct inode *inode, int mask)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
int error = -EAGAIN;
|
||||
|
||||
acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl) {
|
||||
error = posix_acl_permission(inode, acl, mask);
|
||||
posix_acl_release(acl);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_init_acl is already generally called under fs_mutex, so the locking
|
||||
* stuff has been fixed to work with that. If the locking stuff changes, we
|
||||
* need to re-evaluate the acl locking stuff.
|
||||
*/
|
||||
int btrfs_init_acl(struct inode *inode, struct inode *dir)
|
||||
{
|
||||
struct posix_acl *acl = NULL;
|
||||
int ret = 0;
|
||||
|
||||
/* this happens with subvols */
|
||||
if (!dir)
|
||||
return 0;
|
||||
|
||||
if (!S_ISLNK(inode->i_mode)) {
|
||||
if (IS_POSIXACL(dir)) {
|
||||
acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
}
|
||||
|
||||
if (!acl)
|
||||
inode->i_mode &= ~current->fs->umask;
|
||||
}
|
||||
|
||||
if (IS_POSIXACL(dir) && acl) {
|
||||
struct posix_acl *clone;
|
||||
mode_t mode;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
|
||||
if (ret)
|
||||
goto failed;
|
||||
}
|
||||
clone = posix_acl_clone(acl, GFP_NOFS);
|
||||
ret = -ENOMEM;
|
||||
if (!clone)
|
||||
goto failed;
|
||||
|
||||
mode = inode->i_mode;
|
||||
ret = posix_acl_create_masq(clone, &mode);
|
||||
if (ret >= 0) {
|
||||
inode->i_mode = mode;
|
||||
if (ret > 0) {
|
||||
/* we need an acl */
|
||||
ret = btrfs_set_acl(inode, clone,
|
||||
ACL_TYPE_ACCESS);
|
||||
}
|
||||
}
|
||||
}
|
||||
failed:
|
||||
posix_acl_release(acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_acl_chmod(struct inode *inode)
|
||||
{
|
||||
struct posix_acl *acl, *clone;
|
||||
int ret = 0;
|
||||
|
||||
if (S_ISLNK(inode->i_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!IS_POSIXACL(inode))
|
||||
return 0;
|
||||
|
||||
acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
if (IS_ERR(acl) || !acl)
|
||||
return PTR_ERR(acl);
|
||||
|
||||
clone = posix_acl_clone(acl, GFP_KERNEL);
|
||||
posix_acl_release(acl);
|
||||
if (!clone)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = posix_acl_chmod_masq(clone, inode->i_mode);
|
||||
if (!ret)
|
||||
ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
|
||||
|
||||
posix_acl_release(clone);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct xattr_handler btrfs_xattr_acl_default_handler = {
|
||||
.prefix = POSIX_ACL_XATTR_DEFAULT,
|
||||
.get = btrfs_xattr_acl_default_get,
|
||||
.set = btrfs_xattr_acl_default_set,
|
||||
};
|
||||
|
||||
struct xattr_handler btrfs_xattr_acl_access_handler = {
|
||||
.prefix = POSIX_ACL_XATTR_ACCESS,
|
||||
.get = btrfs_xattr_acl_access_get,
|
||||
.set = btrfs_xattr_acl_access_set,
|
||||
};
|
||||
|
||||
#else /* CONFIG_FS_POSIX_ACL */
|
||||
|
||||
int btrfs_acl_chmod(struct inode *inode)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_init_acl(struct inode *inode, struct inode *dir)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_acl(struct inode *inode, int mask)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FS_POSIX_ACL */
|
|
@ -0,0 +1,419 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/version.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
# include <linux/freezer.h>
|
||||
#include "async-thread.h"
|
||||
|
||||
#define WORK_QUEUED_BIT 0
|
||||
#define WORK_DONE_BIT 1
|
||||
#define WORK_ORDER_DONE_BIT 2
|
||||
|
||||
/*
|
||||
* container for the kthread task pointer and the list of pending work
|
||||
* One of these is allocated per thread.
|
||||
*/
|
||||
struct btrfs_worker_thread {
|
||||
/* pool we belong to */
|
||||
struct btrfs_workers *workers;
|
||||
|
||||
/* list of struct btrfs_work that are waiting for service */
|
||||
struct list_head pending;
|
||||
|
||||
/* list of worker threads from struct btrfs_workers */
|
||||
struct list_head worker_list;
|
||||
|
||||
/* kthread */
|
||||
struct task_struct *task;
|
||||
|
||||
/* number of things on the pending list */
|
||||
atomic_t num_pending;
|
||||
|
||||
unsigned long sequence;
|
||||
|
||||
/* protects the pending list. */
|
||||
spinlock_t lock;
|
||||
|
||||
/* set to non-zero when this thread is already awake and kicking */
|
||||
int working;
|
||||
|
||||
/* are we currently idle */
|
||||
int idle;
|
||||
};
|
||||
|
||||
/*
|
||||
* helper function to move a thread onto the idle list after it
|
||||
* has finished some requests.
|
||||
*/
|
||||
static void check_idle_worker(struct btrfs_worker_thread *worker)
|
||||
{
|
||||
if (!worker->idle && atomic_read(&worker->num_pending) <
|
||||
worker->workers->idle_thresh / 2) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&worker->workers->lock, flags);
|
||||
worker->idle = 1;
|
||||
list_move(&worker->worker_list, &worker->workers->idle_list);
|
||||
spin_unlock_irqrestore(&worker->workers->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function to move a thread off the idle list after new
|
||||
* pending work is added.
|
||||
*/
|
||||
static void check_busy_worker(struct btrfs_worker_thread *worker)
|
||||
{
|
||||
if (worker->idle && atomic_read(&worker->num_pending) >=
|
||||
worker->workers->idle_thresh) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&worker->workers->lock, flags);
|
||||
worker->idle = 0;
|
||||
list_move_tail(&worker->worker_list,
|
||||
&worker->workers->worker_list);
|
||||
spin_unlock_irqrestore(&worker->workers->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static noinline int run_ordered_completions(struct btrfs_workers *workers,
|
||||
struct btrfs_work *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!workers->ordered)
|
||||
return 0;
|
||||
|
||||
set_bit(WORK_DONE_BIT, &work->flags);
|
||||
|
||||
spin_lock_irqsave(&workers->lock, flags);
|
||||
|
||||
while (!list_empty(&workers->order_list)) {
|
||||
work = list_entry(workers->order_list.next,
|
||||
struct btrfs_work, order_list);
|
||||
|
||||
if (!test_bit(WORK_DONE_BIT, &work->flags))
|
||||
break;
|
||||
|
||||
/* we are going to call the ordered done function, but
|
||||
* we leave the work item on the list as a barrier so
|
||||
* that later work items that are done don't have their
|
||||
* functions called before this one returns
|
||||
*/
|
||||
if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
|
||||
break;
|
||||
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
|
||||
work->ordered_func(work);
|
||||
|
||||
/* now take the lock again and call the freeing code */
|
||||
spin_lock_irqsave(&workers->lock, flags);
|
||||
list_del(&work->order_list);
|
||||
work->ordered_free(work);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* main loop for servicing work items
|
||||
*/
|
||||
static int worker_loop(void *arg)
|
||||
{
|
||||
struct btrfs_worker_thread *worker = arg;
|
||||
struct list_head *cur;
|
||||
struct btrfs_work *work;
|
||||
do {
|
||||
spin_lock_irq(&worker->lock);
|
||||
while (!list_empty(&worker->pending)) {
|
||||
cur = worker->pending.next;
|
||||
work = list_entry(cur, struct btrfs_work, list);
|
||||
list_del(&work->list);
|
||||
clear_bit(WORK_QUEUED_BIT, &work->flags);
|
||||
|
||||
work->worker = worker;
|
||||
spin_unlock_irq(&worker->lock);
|
||||
|
||||
work->func(work);
|
||||
|
||||
atomic_dec(&worker->num_pending);
|
||||
/*
|
||||
* unless this is an ordered work queue,
|
||||
* 'work' was probably freed by func above.
|
||||
*/
|
||||
run_ordered_completions(worker->workers, work);
|
||||
|
||||
spin_lock_irq(&worker->lock);
|
||||
check_idle_worker(worker);
|
||||
|
||||
}
|
||||
worker->working = 0;
|
||||
if (freezing(current)) {
|
||||
refrigerator();
|
||||
} else {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
spin_unlock_irq(&worker->lock);
|
||||
if (!kthread_should_stop())
|
||||
schedule();
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
} while (!kthread_should_stop());
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this will wait for all the worker threads to shutdown
|
||||
*/
|
||||
int btrfs_stop_workers(struct btrfs_workers *workers)
|
||||
{
|
||||
struct list_head *cur;
|
||||
struct btrfs_worker_thread *worker;
|
||||
|
||||
list_splice_init(&workers->idle_list, &workers->worker_list);
|
||||
while (!list_empty(&workers->worker_list)) {
|
||||
cur = workers->worker_list.next;
|
||||
worker = list_entry(cur, struct btrfs_worker_thread,
|
||||
worker_list);
|
||||
kthread_stop(worker->task);
|
||||
list_del(&worker->worker_list);
|
||||
kfree(worker);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* simple init on struct btrfs_workers
|
||||
*/
|
||||
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
|
||||
{
|
||||
workers->num_workers = 0;
|
||||
INIT_LIST_HEAD(&workers->worker_list);
|
||||
INIT_LIST_HEAD(&workers->idle_list);
|
||||
INIT_LIST_HEAD(&workers->order_list);
|
||||
spin_lock_init(&workers->lock);
|
||||
workers->max_workers = max;
|
||||
workers->idle_thresh = 32;
|
||||
workers->name = name;
|
||||
workers->ordered = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* starts new worker threads. This does not enforce the max worker
|
||||
* count in case you need to temporarily go past it.
|
||||
*/
|
||||
int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
|
||||
{
|
||||
struct btrfs_worker_thread *worker;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_workers; i++) {
|
||||
worker = kzalloc(sizeof(*worker), GFP_NOFS);
|
||||
if (!worker) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&worker->pending);
|
||||
INIT_LIST_HEAD(&worker->worker_list);
|
||||
spin_lock_init(&worker->lock);
|
||||
atomic_set(&worker->num_pending, 0);
|
||||
worker->task = kthread_run(worker_loop, worker,
|
||||
"btrfs-%s-%d", workers->name,
|
||||
workers->num_workers + i);
|
||||
worker->workers = workers;
|
||||
if (IS_ERR(worker->task)) {
|
||||
kfree(worker);
|
||||
ret = PTR_ERR(worker->task);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
spin_lock_irq(&workers->lock);
|
||||
list_add_tail(&worker->worker_list, &workers->idle_list);
|
||||
worker->idle = 1;
|
||||
workers->num_workers++;
|
||||
spin_unlock_irq(&workers->lock);
|
||||
}
|
||||
return 0;
|
||||
fail:
|
||||
btrfs_stop_workers(workers);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* run through the list and find a worker thread that doesn't have a lot
|
||||
* to do right now. This can return null if we aren't yet at the thread
|
||||
* count limit and all of the threads are busy.
|
||||
*/
|
||||
static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
|
||||
{
|
||||
struct btrfs_worker_thread *worker;
|
||||
struct list_head *next;
|
||||
int enforce_min = workers->num_workers < workers->max_workers;
|
||||
|
||||
/*
|
||||
* if we find an idle thread, don't move it to the end of the
|
||||
* idle list. This improves the chance that the next submission
|
||||
* will reuse the same thread, and maybe catch it while it is still
|
||||
* working
|
||||
*/
|
||||
if (!list_empty(&workers->idle_list)) {
|
||||
next = workers->idle_list.next;
|
||||
worker = list_entry(next, struct btrfs_worker_thread,
|
||||
worker_list);
|
||||
return worker;
|
||||
}
|
||||
if (enforce_min || list_empty(&workers->worker_list))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* if we pick a busy task, move the task to the end of the list.
|
||||
* hopefully this will keep things somewhat evenly balanced.
|
||||
* Do the move in batches based on the sequence number. This groups
|
||||
* requests submitted at roughly the same time onto the same worker.
|
||||
*/
|
||||
next = workers->worker_list.next;
|
||||
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
|
||||
atomic_inc(&worker->num_pending);
|
||||
worker->sequence++;
|
||||
|
||||
if (worker->sequence % workers->idle_thresh == 0)
|
||||
list_move_tail(next, &workers->worker_list);
|
||||
return worker;
|
||||
}
|
||||
|
||||
/*
|
||||
* selects a worker thread to take the next job. This will either find
|
||||
* an idle worker, start a new worker up to the max count, or just return
|
||||
* one of the existing busy workers.
|
||||
*/
|
||||
static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
|
||||
{
|
||||
struct btrfs_worker_thread *worker;
|
||||
unsigned long flags;
|
||||
|
||||
again:
|
||||
spin_lock_irqsave(&workers->lock, flags);
|
||||
worker = next_worker(workers);
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
|
||||
if (!worker) {
|
||||
spin_lock_irqsave(&workers->lock, flags);
|
||||
if (workers->num_workers >= workers->max_workers) {
|
||||
struct list_head *fallback = NULL;
|
||||
/*
|
||||
* we have failed to find any workers, just
|
||||
* return the force one
|
||||
*/
|
||||
if (!list_empty(&workers->worker_list))
|
||||
fallback = workers->worker_list.next;
|
||||
if (!list_empty(&workers->idle_list))
|
||||
fallback = workers->idle_list.next;
|
||||
BUG_ON(!fallback);
|
||||
worker = list_entry(fallback,
|
||||
struct btrfs_worker_thread, worker_list);
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
/* we're below the limit, start another worker */
|
||||
btrfs_start_workers(workers, 1);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return worker;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_requeue_work just puts the work item back on the tail of the list
|
||||
* it was taken from. It is intended for use with long running work functions
|
||||
* that make some progress and want to give the cpu up for others.
|
||||
*/
|
||||
int btrfs_requeue_work(struct btrfs_work *work)
|
||||
{
|
||||
struct btrfs_worker_thread *worker = work->worker;
|
||||
unsigned long flags;
|
||||
|
||||
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
atomic_inc(&worker->num_pending);
|
||||
list_add_tail(&work->list, &worker->pending);
|
||||
|
||||
/* by definition we're busy, take ourselves off the idle
|
||||
* list
|
||||
*/
|
||||
if (worker->idle) {
|
||||
spin_lock_irqsave(&worker->workers->lock, flags);
|
||||
worker->idle = 0;
|
||||
list_move_tail(&worker->worker_list,
|
||||
&worker->workers->worker_list);
|
||||
spin_unlock_irqrestore(&worker->workers->lock, flags);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* places a struct btrfs_work into the pending queue of one of the kthreads
|
||||
*/
|
||||
int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
|
||||
{
|
||||
struct btrfs_worker_thread *worker;
|
||||
unsigned long flags;
|
||||
int wake = 0;
|
||||
|
||||
/* don't requeue something already on a list */
|
||||
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
|
||||
goto out;
|
||||
|
||||
worker = find_worker(workers);
|
||||
if (workers->ordered) {
|
||||
spin_lock_irqsave(&workers->lock, flags);
|
||||
list_add_tail(&work->order_list, &workers->order_list);
|
||||
spin_unlock_irqrestore(&workers->lock, flags);
|
||||
} else {
|
||||
INIT_LIST_HEAD(&work->order_list);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
atomic_inc(&worker->num_pending);
|
||||
check_busy_worker(worker);
|
||||
list_add_tail(&work->list, &worker->pending);
|
||||
|
||||
/*
|
||||
* avoid calling into wake_up_process if this thread has already
|
||||
* been kicked
|
||||
*/
|
||||
if (!worker->working)
|
||||
wake = 1;
|
||||
worker->working = 1;
|
||||
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
if (wake)
|
||||
wake_up_process(worker->task);
|
||||
out:
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_ASYNC_THREAD_
|
||||
#define __BTRFS_ASYNC_THREAD_
|
||||
|
||||
struct btrfs_worker_thread;
|
||||
|
||||
/*
|
||||
* This is similar to a workqueue, but it is meant to spread the operations
|
||||
* across all available cpus instead of just the CPU that was used to
|
||||
* queue the work. There is also some batching introduced to try and
|
||||
* cut down on context switches.
|
||||
*
|
||||
* By default threads are added on demand up to 2 * the number of cpus.
|
||||
* Changing struct btrfs_workers->max_workers is one way to prevent
|
||||
* demand creation of kthreads.
|
||||
*
|
||||
* the basic model of these worker threads is to embed a btrfs_work
|
||||
* structure in your own data struct, and use container_of in a
|
||||
* work function to get back to your data struct.
|
||||
*/
|
||||
struct btrfs_work {
|
||||
/*
|
||||
* func should be set to the function you want called
|
||||
* your work struct is passed as the only arg
|
||||
*
|
||||
* ordered_func must be set for work sent to an ordered work queue,
|
||||
* and it is called to complete a given work item in the same
|
||||
* order they were sent to the queue.
|
||||
*/
|
||||
void (*func)(struct btrfs_work *work);
|
||||
void (*ordered_func)(struct btrfs_work *work);
|
||||
void (*ordered_free)(struct btrfs_work *work);
|
||||
|
||||
/*
|
||||
* flags should be set to zero. It is used to make sure the
|
||||
* struct is only inserted once into the list.
|
||||
*/
|
||||
unsigned long flags;
|
||||
|
||||
/* don't touch these */
|
||||
struct btrfs_worker_thread *worker;
|
||||
struct list_head list;
|
||||
struct list_head order_list;
|
||||
};
|
||||
|
||||
struct btrfs_workers {
|
||||
/* current number of running workers */
|
||||
int num_workers;
|
||||
|
||||
/* max number of workers allowed. changed by btrfs_start_workers */
|
||||
int max_workers;
|
||||
|
||||
/* once a worker has this many requests or fewer, it is idle */
|
||||
int idle_thresh;
|
||||
|
||||
/* force completions in the order they were queued */
|
||||
int ordered;
|
||||
|
||||
/* list with all the work threads. The workers on the idle thread
|
||||
* may be actively servicing jobs, but they haven't yet hit the
|
||||
* idle thresh limit above.
|
||||
*/
|
||||
struct list_head worker_list;
|
||||
struct list_head idle_list;
|
||||
|
||||
/*
|
||||
* when operating in ordered mode, this maintains the list
|
||||
* of work items waiting for completion
|
||||
*/
|
||||
struct list_head order_list;
|
||||
|
||||
/* lock for finding the next worker thread to queue on */
|
||||
spinlock_t lock;
|
||||
|
||||
/* extra name for this worker, used for current->name */
|
||||
char *name;
|
||||
};
|
||||
|
||||
int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
|
||||
int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
|
||||
int btrfs_stop_workers(struct btrfs_workers *workers);
|
||||
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
|
||||
int btrfs_requeue_work(struct btrfs_work *work);
|
||||
#endif
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_I__
|
||||
#define __BTRFS_I__
|
||||
|
||||
#include "extent_map.h"
|
||||
#include "extent_io.h"
|
||||
#include "ordered-data.h"
|
||||
|
||||
/* in memory btrfs inode */
|
||||
struct btrfs_inode {
|
||||
/* which subvolume this inode belongs to */
|
||||
struct btrfs_root *root;
|
||||
|
||||
/* key used to find this inode on disk. This is used by the code
|
||||
* to read in roots of subvolumes
|
||||
*/
|
||||
struct btrfs_key location;
|
||||
|
||||
/* the extent_tree has caches of all the extent mappings to disk */
|
||||
struct extent_map_tree extent_tree;
|
||||
|
||||
/* the io_tree does range state (DIRTY, LOCKED etc) */
|
||||
struct extent_io_tree io_tree;
|
||||
|
||||
/* special utility tree used to record which mirrors have already been
|
||||
* tried when checksums fail for a given block
|
||||
*/
|
||||
struct extent_io_tree io_failure_tree;
|
||||
|
||||
/* held while inesrting or deleting extents from files */
|
||||
struct mutex extent_mutex;
|
||||
|
||||
/* held while logging the inode in tree-log.c */
|
||||
struct mutex log_mutex;
|
||||
|
||||
/* used to order data wrt metadata */
|
||||
struct btrfs_ordered_inode_tree ordered_tree;
|
||||
|
||||
/* standard acl pointers */
|
||||
struct posix_acl *i_acl;
|
||||
struct posix_acl *i_default_acl;
|
||||
|
||||
/* for keeping track of orphaned inodes */
|
||||
struct list_head i_orphan;
|
||||
|
||||
/* list of all the delalloc inodes in the FS. There are times we need
|
||||
* to write all the delalloc pages to disk, and this list is used
|
||||
* to walk them all.
|
||||
*/
|
||||
struct list_head delalloc_inodes;
|
||||
|
||||
/* full 64 bit generation number, struct vfs_inode doesn't have a big
|
||||
* enough field for this.
|
||||
*/
|
||||
u64 generation;
|
||||
|
||||
/* sequence number for NFS changes */
|
||||
u64 sequence;
|
||||
|
||||
/*
|
||||
* transid of the trans_handle that last modified this inode
|
||||
*/
|
||||
u64 last_trans;
|
||||
/*
|
||||
* transid that last logged this inode
|
||||
*/
|
||||
u64 logged_trans;
|
||||
|
||||
/*
|
||||
* trans that last made a change that should be fully fsync'd. This
|
||||
* gets reset to zero each time the inode is logged
|
||||
*/
|
||||
u64 log_dirty_trans;
|
||||
|
||||
/* total number of bytes pending delalloc, used by stat to calc the
|
||||
* real block usage of the file
|
||||
*/
|
||||
u64 delalloc_bytes;
|
||||
|
||||
/*
|
||||
* the size of the file stored in the metadata on disk. data=ordered
|
||||
* means the in-memory i_size might be larger than the size on disk
|
||||
* because not all the blocks are written yet.
|
||||
*/
|
||||
u64 disk_i_size;
|
||||
|
||||
/* flags field from the on disk inode */
|
||||
u32 flags;
|
||||
|
||||
/*
|
||||
* if this is a directory then index_cnt is the counter for the index
|
||||
* number for new files that are created
|
||||
*/
|
||||
u64 index_cnt;
|
||||
|
||||
/* the start of block group preferred for allocations. */
|
||||
u64 block_group;
|
||||
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct btrfs_inode, vfs_inode);
|
||||
}
|
||||
|
||||
static inline void btrfs_i_size_write(struct inode *inode, u64 size)
|
||||
{
|
||||
inode->i_size = size;
|
||||
BTRFS_I(inode)->disk_i_size = size;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef _COMPAT_H_
|
||||
#define _COMPAT_H_
|
||||
|
||||
#define btrfs_drop_nlink(inode) drop_nlink(inode)
|
||||
#define btrfs_inc_nlink(inode) inc_nlink(inode)
|
||||
|
||||
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27)
|
||||
static inline struct dentry *d_obtain_alias(struct inode *inode)
|
||||
{
|
||||
struct dentry *d;
|
||||
|
||||
if (!inode)
|
||||
return NULL;
|
||||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
|
||||
d = d_alloc_anon(inode);
|
||||
if (!d)
|
||||
iput(inode);
|
||||
return d;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
|
||||
# define __pagevec_lru_add_file __pagevec_lru_add
|
||||
# define open_bdev_exclusive open_bdev_excl
|
||||
# define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev)
|
||||
typedef unsigned __bitwise__ fmode_t;
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _COMPAT_H_ */
|
|
@ -0,0 +1,709 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/bit_spinlock.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include "compat.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "volumes.h"
|
||||
#include "ordered-data.h"
|
||||
#include "compression.h"
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
|
||||
struct compressed_bio {
|
||||
/* number of bios pending for this compressed extent */
|
||||
atomic_t pending_bios;
|
||||
|
||||
/* the pages with the compressed data on them */
|
||||
struct page **compressed_pages;
|
||||
|
||||
/* inode that owns this data */
|
||||
struct inode *inode;
|
||||
|
||||
/* starting offset in the inode for our pages */
|
||||
u64 start;
|
||||
|
||||
/* number of bytes in the inode we're working on */
|
||||
unsigned long len;
|
||||
|
||||
/* number of bytes on disk */
|
||||
unsigned long compressed_len;
|
||||
|
||||
/* number of compressed pages in the array */
|
||||
unsigned long nr_pages;
|
||||
|
||||
/* IO errors */
|
||||
int errors;
|
||||
int mirror_num;
|
||||
|
||||
/* for reads, this is the bio we are copying the data into */
|
||||
struct bio *orig_bio;
|
||||
|
||||
/*
|
||||
* the start of a variable length array of checksums only
|
||||
* used by reads
|
||||
*/
|
||||
u32 sums;
|
||||
};
|
||||
|
||||
static inline int compressed_bio_size(struct btrfs_root *root,
|
||||
unsigned long disk_size)
|
||||
{
|
||||
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
return sizeof(struct compressed_bio) +
|
||||
((disk_size + root->sectorsize - 1) / root->sectorsize) *
|
||||
csum_size;
|
||||
}
|
||||
|
||||
static struct bio *compressed_bio_alloc(struct block_device *bdev,
|
||||
u64 first_byte, gfp_t gfp_flags)
|
||||
{
|
||||
struct bio *bio;
|
||||
int nr_vecs;
|
||||
|
||||
nr_vecs = bio_get_nr_vecs(bdev);
|
||||
bio = bio_alloc(gfp_flags, nr_vecs);
|
||||
|
||||
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
|
||||
while (!bio && (nr_vecs /= 2))
|
||||
bio = bio_alloc(gfp_flags, nr_vecs);
|
||||
}
|
||||
|
||||
if (bio) {
|
||||
bio->bi_size = 0;
|
||||
bio->bi_bdev = bdev;
|
||||
bio->bi_sector = first_byte >> 9;
|
||||
}
|
||||
return bio;
|
||||
}
|
||||
|
||||
static int check_compressed_csum(struct inode *inode,
|
||||
struct compressed_bio *cb,
|
||||
u64 disk_start)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct page *page;
|
||||
unsigned long i;
|
||||
char *kaddr;
|
||||
u32 csum;
|
||||
u32 *cb_sum = &cb->sums;
|
||||
|
||||
if (btrfs_test_flag(inode, NODATASUM))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < cb->nr_pages; i++) {
|
||||
page = cb->compressed_pages[i];
|
||||
csum = ~(u32)0;
|
||||
|
||||
kaddr = kmap_atomic(page, KM_USER0);
|
||||
csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
|
||||
btrfs_csum_final(csum, (char *)&csum);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
|
||||
if (csum != *cb_sum) {
|
||||
printk(KERN_INFO "btrfs csum failed ino %lu "
|
||||
"extent %llu csum %u "
|
||||
"wanted %u mirror %d\n", inode->i_ino,
|
||||
(unsigned long long)disk_start,
|
||||
csum, *cb_sum, cb->mirror_num);
|
||||
ret = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
cb_sum++;
|
||||
|
||||
}
|
||||
ret = 0;
|
||||
fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* when we finish reading compressed pages from the disk, we
|
||||
* decompress them and then run the bio end_io routines on the
|
||||
* decompressed pages (in the inode address space).
|
||||
*
|
||||
* This allows the checksumming and other IO error handling routines
|
||||
* to work normally
|
||||
*
|
||||
* The compressed pages are freed here, and it must be run
|
||||
* in process context
|
||||
*/
|
||||
static void end_compressed_bio_read(struct bio *bio, int err)
|
||||
{
|
||||
struct extent_io_tree *tree;
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct inode *inode;
|
||||
struct page *page;
|
||||
unsigned long index;
|
||||
int ret;
|
||||
|
||||
if (err)
|
||||
cb->errors = 1;
|
||||
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
*/
|
||||
if (!atomic_dec_and_test(&cb->pending_bios))
|
||||
goto out;
|
||||
|
||||
inode = cb->inode;
|
||||
ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
|
||||
if (ret)
|
||||
goto csum_failed;
|
||||
|
||||
/* ok, we're the last bio for this extent, lets start
|
||||
* the decompression.
|
||||
*/
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
|
||||
cb->start,
|
||||
cb->orig_bio->bi_io_vec,
|
||||
cb->orig_bio->bi_vcnt,
|
||||
cb->compressed_len);
|
||||
csum_failed:
|
||||
if (ret)
|
||||
cb->errors = 1;
|
||||
|
||||
/* release the compressed pages */
|
||||
index = 0;
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
page->mapping = NULL;
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
/* do io completion on the original bio */
|
||||
if (cb->errors) {
|
||||
bio_io_error(cb->orig_bio);
|
||||
} else {
|
||||
int bio_index = 0;
|
||||
struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
|
||||
|
||||
/*
|
||||
* we have verified the checksum already, set page
|
||||
* checked so the end_io handlers know about it
|
||||
*/
|
||||
while (bio_index < cb->orig_bio->bi_vcnt) {
|
||||
SetPageChecked(bvec->bv_page);
|
||||
bvec++;
|
||||
bio_index++;
|
||||
}
|
||||
bio_endio(cb->orig_bio, 0);
|
||||
}
|
||||
|
||||
/* finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
out:
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the writeback bits on all of the file
|
||||
* pages for a compressed write
|
||||
*/
|
||||
static noinline int end_compressed_writeback(struct inode *inode, u64 start,
|
||||
unsigned long ram_size)
|
||||
{
|
||||
unsigned long index = start >> PAGE_CACHE_SHIFT;
|
||||
unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
|
||||
struct page *pages[16];
|
||||
unsigned long nr_pages = end_index - index + 1;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
while (nr_pages > 0) {
|
||||
ret = find_get_pages_contig(inode->i_mapping, index,
|
||||
min_t(unsigned long,
|
||||
nr_pages, ARRAY_SIZE(pages)), pages);
|
||||
if (ret == 0) {
|
||||
nr_pages -= 1;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < ret; i++) {
|
||||
end_page_writeback(pages[i]);
|
||||
page_cache_release(pages[i]);
|
||||
}
|
||||
nr_pages -= ret;
|
||||
index += ret;
|
||||
}
|
||||
/* the inode may be gone now */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* do the cleanup once all the compressed pages hit the disk.
|
||||
* This will clear writeback on the file pages and free the compressed
|
||||
* pages.
|
||||
*
|
||||
* This also calls the writeback end hooks for the file pages so that
|
||||
* metadata and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct bio *bio, int err)
|
||||
{
|
||||
struct extent_io_tree *tree;
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct inode *inode;
|
||||
struct page *page;
|
||||
unsigned long index;
|
||||
|
||||
if (err)
|
||||
cb->errors = 1;
|
||||
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
*/
|
||||
if (!atomic_dec_and_test(&cb->pending_bios))
|
||||
goto out;
|
||||
|
||||
/* ok, we're the last bio for this extent, step one is to
|
||||
* call back into the FS and do all the end_io operations
|
||||
*/
|
||||
inode = cb->inode;
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
|
||||
tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
|
||||
cb->start,
|
||||
cb->start + cb->len - 1,
|
||||
NULL, 1);
|
||||
cb->compressed_pages[0]->mapping = NULL;
|
||||
|
||||
end_compressed_writeback(inode, cb->start, cb->len);
|
||||
/* note, our inode could be gone now */
|
||||
|
||||
/*
|
||||
* release the compressed pages, these came from alloc_page and
|
||||
* are not attached to the inode at all
|
||||
*/
|
||||
index = 0;
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
page->mapping = NULL;
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
/* finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
out:
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* worker function to build and submit bios for previously compressed pages.
|
||||
* The corresponding pages in the inode should be marked for writeback
|
||||
* and the compressed pages should have a reference on them for dropping
|
||||
* when the IO is complete.
|
||||
*
|
||||
* This also checksums the file bytes and gets things ready for
|
||||
* the end io hooks.
|
||||
*/
|
||||
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
unsigned long len, u64 disk_start,
|
||||
unsigned long compressed_len,
|
||||
struct page **compressed_pages,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct compressed_bio *cb;
|
||||
unsigned long bytes_left;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
int page_index = 0;
|
||||
struct page *page;
|
||||
u64 first_byte = disk_start;
|
||||
struct block_device *bdev;
|
||||
int ret;
|
||||
|
||||
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
|
||||
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
|
||||
atomic_set(&cb->pending_bios, 0);
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->start = start;
|
||||
cb->len = len;
|
||||
cb->mirror_num = 0;
|
||||
cb->compressed_pages = compressed_pages;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->orig_bio = NULL;
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
|
||||
|
||||
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
atomic_inc(&cb->pending_bios);
|
||||
|
||||
/* create and submit bios for the compressed pages */
|
||||
bytes_left = compressed_len;
|
||||
for (page_index = 0; page_index < cb->nr_pages; page_index++) {
|
||||
page = compressed_pages[page_index];
|
||||
page->mapping = inode->i_mapping;
|
||||
if (bio->bi_size)
|
||||
ret = io_tree->ops->merge_bio_hook(page, 0,
|
||||
PAGE_CACHE_SIZE,
|
||||
bio, 0);
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
page->mapping = NULL;
|
||||
if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
|
||||
PAGE_CACHE_SIZE) {
|
||||
bio_get(bio);
|
||||
|
||||
/*
|
||||
* inc the count before we submit the bio so
|
||||
* we know the end IO handler won't happen before
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
atomic_inc(&cb->pending_bios);
|
||||
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
|
||||
BUG_ON(ret);
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
|
||||
}
|
||||
if (bytes_left < PAGE_CACHE_SIZE) {
|
||||
printk("bytes left %lu compress len %lu nr %lu\n",
|
||||
bytes_left, cb->compressed_len, cb->nr_pages);
|
||||
}
|
||||
bytes_left -= PAGE_CACHE_SIZE;
|
||||
first_byte += PAGE_CACHE_SIZE;
|
||||
cond_resched();
|
||||
}
|
||||
bio_get(bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
|
||||
BUG_ON(ret);
|
||||
|
||||
bio_put(bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
u64 compressed_end,
|
||||
struct compressed_bio *cb)
|
||||
{
|
||||
unsigned long end_index;
|
||||
unsigned long page_index;
|
||||
u64 last_offset;
|
||||
u64 isize = i_size_read(inode);
|
||||
int ret;
|
||||
struct page *page;
|
||||
unsigned long nr_pages = 0;
|
||||
struct extent_map *em;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct pagevec pvec;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_io_tree *tree;
|
||||
u64 end;
|
||||
int misses = 0;
|
||||
|
||||
page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
|
||||
last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
if (isize == 0)
|
||||
return 0;
|
||||
|
||||
end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
while (last_offset < compressed_end) {
|
||||
page_index = last_offset >> PAGE_CACHE_SHIFT;
|
||||
|
||||
if (page_index > end_index)
|
||||
break;
|
||||
|
||||
rcu_read_lock();
|
||||
page = radix_tree_lookup(&mapping->page_tree, page_index);
|
||||
rcu_read_unlock();
|
||||
if (page) {
|
||||
misses++;
|
||||
if (misses > 4)
|
||||
break;
|
||||
goto next;
|
||||
}
|
||||
|
||||
page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS);
|
||||
if (!page)
|
||||
break;
|
||||
|
||||
page->index = page_index;
|
||||
/*
|
||||
* what we want to do here is call add_to_page_cache_lru,
|
||||
* but that isn't exported, so we reproduce it here
|
||||
*/
|
||||
if (add_to_page_cache(page, mapping,
|
||||
page->index, GFP_NOFS)) {
|
||||
page_cache_release(page);
|
||||
goto next;
|
||||
}
|
||||
|
||||
/* open coding of lru_cache_add, also not exported */
|
||||
page_cache_get(page);
|
||||
if (!pagevec_add(&pvec, page))
|
||||
__pagevec_lru_add_file(&pvec);
|
||||
|
||||
end = last_offset + PAGE_CACHE_SIZE - 1;
|
||||
/*
|
||||
* at this point, we have a locked page in the page cache
|
||||
* for these bytes in the file. But, we have to make
|
||||
* sure they map to this compressed extent on disk.
|
||||
*/
|
||||
set_page_extent_mapped(page);
|
||||
lock_extent(tree, last_offset, end, GFP_NOFS);
|
||||
spin_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, last_offset,
|
||||
PAGE_CACHE_SIZE);
|
||||
spin_unlock(&em_tree->lock);
|
||||
|
||||
if (!em || last_offset < em->start ||
|
||||
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
|
||||
(em->block_start >> 9) != cb->orig_bio->bi_sector) {
|
||||
free_extent_map(em);
|
||||
unlock_extent(tree, last_offset, end, GFP_NOFS);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
break;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
if (page->index == end_index) {
|
||||
char *userpage;
|
||||
size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
|
||||
|
||||
if (zero_offset) {
|
||||
int zeros;
|
||||
zeros = PAGE_CACHE_SIZE - zero_offset;
|
||||
userpage = kmap_atomic(page, KM_USER0);
|
||||
memset(userpage + zero_offset, 0, zeros);
|
||||
flush_dcache_page(page);
|
||||
kunmap_atomic(userpage, KM_USER0);
|
||||
}
|
||||
}
|
||||
|
||||
ret = bio_add_page(cb->orig_bio, page,
|
||||
PAGE_CACHE_SIZE, 0);
|
||||
|
||||
if (ret == PAGE_CACHE_SIZE) {
|
||||
nr_pages++;
|
||||
page_cache_release(page);
|
||||
} else {
|
||||
unlock_extent(tree, last_offset, end, GFP_NOFS);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
break;
|
||||
}
|
||||
next:
|
||||
last_offset += PAGE_CACHE_SIZE;
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
__pagevec_lru_add_file(&pvec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* for a compressed read, the bio we get passed has all the inode pages
|
||||
* in it. We don't actually do IO on those pages but allocate new ones
|
||||
* to hold the compressed pages on disk.
|
||||
*
|
||||
* bio->bi_sector points to the compressed extent on disk
|
||||
* bio->bi_io_vec points to all of the inode pages
|
||||
* bio->bi_vcnt is a count of pages
|
||||
*
|
||||
* After the compressed pages are read, we copy the bytes into the
|
||||
* bio we were passed and then call the bio end_io calls
|
||||
*/
|
||||
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags)
|
||||
{
|
||||
struct extent_io_tree *tree;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct compressed_bio *cb;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
|
||||
unsigned long compressed_len;
|
||||
unsigned long nr_pages;
|
||||
unsigned long page_index;
|
||||
struct page *page;
|
||||
struct block_device *bdev;
|
||||
struct bio *comp_bio;
|
||||
u64 cur_disk_byte = (u64)bio->bi_sector << 9;
|
||||
u64 em_len;
|
||||
u64 em_start;
|
||||
struct extent_map *em;
|
||||
int ret;
|
||||
u32 *sums;
|
||||
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
||||
/* we need the actual starting offset of this extent in the file */
|
||||
spin_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree,
|
||||
page_offset(bio->bi_io_vec->bv_page),
|
||||
PAGE_CACHE_SIZE);
|
||||
spin_unlock(&em_tree->lock);
|
||||
|
||||
compressed_len = em->block_len;
|
||||
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
|
||||
atomic_set(&cb->pending_bios, 0);
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
sums = &cb->sums;
|
||||
|
||||
cb->start = em->orig_start;
|
||||
em_len = em->len;
|
||||
em_start = em->start;
|
||||
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
|
||||
cb->len = uncompressed_len;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->orig_bio = bio;
|
||||
|
||||
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
|
||||
PAGE_CACHE_SIZE;
|
||||
cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
|
||||
GFP_NOFS);
|
||||
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
|
||||
|
||||
for (page_index = 0; page_index < nr_pages; page_index++) {
|
||||
cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
|
||||
__GFP_HIGHMEM);
|
||||
}
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
add_ra_bio_pages(inode, em_start + em_len, cb);
|
||||
|
||||
/* include any pages we added in add_ra-bio_pages */
|
||||
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
|
||||
cb->len = uncompressed_len;
|
||||
|
||||
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
atomic_inc(&cb->pending_bios);
|
||||
|
||||
for (page_index = 0; page_index < nr_pages; page_index++) {
|
||||
page = cb->compressed_pages[page_index];
|
||||
page->mapping = inode->i_mapping;
|
||||
page->index = em_start >> PAGE_CACHE_SHIFT;
|
||||
|
||||
if (comp_bio->bi_size)
|
||||
ret = tree->ops->merge_bio_hook(page, 0,
|
||||
PAGE_CACHE_SIZE,
|
||||
comp_bio, 0);
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
page->mapping = NULL;
|
||||
if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
|
||||
PAGE_CACHE_SIZE) {
|
||||
bio_get(comp_bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
/*
|
||||
* inc the count before we submit the bio so
|
||||
* we know the end IO handler won't happen before
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
atomic_inc(&cb->pending_bios);
|
||||
|
||||
if (!btrfs_test_flag(inode, NODATASUM)) {
|
||||
btrfs_lookup_bio_sums(root, inode, comp_bio,
|
||||
sums);
|
||||
}
|
||||
sums += (comp_bio->bi_size + root->sectorsize - 1) /
|
||||
root->sectorsize;
|
||||
|
||||
ret = btrfs_map_bio(root, READ, comp_bio,
|
||||
mirror_num, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
bio_put(comp_bio);
|
||||
|
||||
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
|
||||
GFP_NOFS);
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
|
||||
bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
|
||||
}
|
||||
cur_disk_byte += PAGE_CACHE_SIZE;
|
||||
}
|
||||
bio_get(comp_bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
if (!btrfs_test_flag(inode, NODATASUM))
|
||||
btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
|
||||
|
||||
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
bio_put(comp_bio);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_COMPRESSION_
|
||||
#define __BTRFS_COMPRESSION_
|
||||
|
||||
int btrfs_zlib_decompress(unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen);
|
||||
int btrfs_zlib_compress_pages(struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out);
|
||||
int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen);
|
||||
void btrfs_zlib_exit(void);
|
||||
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
unsigned long len, u64 disk_start,
|
||||
unsigned long compressed_len,
|
||||
struct page **compressed_pages,
|
||||
unsigned long nr_pages);
|
||||
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
#endif
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_CRC32C__
|
||||
#define __BTRFS_CRC32C__
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
/* #define CONFIG_BTRFS_HW_SUM 1 */
|
||||
|
||||
#ifdef CONFIG_BTRFS_HW_SUM
|
||||
#ifdef CONFIG_X86
|
||||
/*
|
||||
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
|
||||
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
|
||||
* CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2A: Instruction Set Reference, A-M
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */
|
||||
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define REX_PRE "0x48, "
|
||||
#define SCALE_F 8
|
||||
#else
|
||||
#define REX_PRE
|
||||
#define SCALE_F 4
|
||||
#endif
|
||||
|
||||
static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data,
|
||||
size_t length)
|
||||
{
|
||||
while (length--) {
|
||||
__asm__ __volatile__(
|
||||
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
|
||||
:"=S"(crc)
|
||||
:"0"(crc), "c"(*data)
|
||||
);
|
||||
data++;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p,
|
||||
size_t len)
|
||||
{
|
||||
unsigned int iquotient = len / SCALE_F;
|
||||
unsigned int iremainder = len % SCALE_F;
|
||||
#ifdef CONFIG_X86_64
|
||||
u64 *ptmp = (u64 *)p;
|
||||
#else
|
||||
u32 *ptmp = (u32 *)p;
|
||||
#endif
|
||||
|
||||
while (iquotient--) {
|
||||
__asm__ __volatile__(
|
||||
".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
|
||||
:"=S"(crc)
|
||||
:"0"(crc), "c"(*ptmp)
|
||||
);
|
||||
ptmp++;
|
||||
}
|
||||
|
||||
if (iremainder)
|
||||
crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp,
|
||||
iremainder);
|
||||
|
||||
return crc;
|
||||
}
|
||||
#endif /* CONFIG_BTRFS_HW_SUM */
|
||||
|
||||
static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address,
|
||||
size_t len)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_HW_SUM
|
||||
if (cpu_has_xmm4_2)
|
||||
return btrfs_crc32c_le_hw(crc, address, len);
|
||||
#endif
|
||||
return crc32c_le(crc, address, len);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
|
||||
|
||||
#endif /* CONFIG_X86 */
|
||||
|
||||
/**
|
||||
* implementation of crc32c_le() changed in linux-2.6.23,
|
||||
* has of v0.13 btrfs-progs is using the latest version.
|
||||
* We must workaround older implementations of crc32c_le()
|
||||
* found on older kernel versions.
|
||||
*/
|
||||
#define btrfs_crc32c(seed, data, length) \
|
||||
__btrfs_crc32c(seed, (unsigned char const *)data, length)
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,386 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "hash.h"
|
||||
#include "transaction.h"
|
||||
|
||||
/*
|
||||
* insert a name into a directory, doing overflow properly if there is a hash
|
||||
* collision. data_size indicates how big the item inserted should be. On
|
||||
* success a struct btrfs_dir_item pointer is returned, otherwise it is
|
||||
* an ERR_PTR.
|
||||
*
|
||||
* The name is not copied into the dir item, you have to do that yourself.
|
||||
*/
|
||||
static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||
*trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *cpu_key,
|
||||
u32 data_size,
|
||||
const char *name,
|
||||
int name_len)
|
||||
{
|
||||
int ret;
|
||||
char *ptr;
|
||||
struct btrfs_item *item;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
|
||||
if (ret == -EEXIST) {
|
||||
struct btrfs_dir_item *di;
|
||||
di = btrfs_match_dir_item_name(root, path, name, name_len);
|
||||
if (di)
|
||||
return ERR_PTR(-EEXIST);
|
||||
ret = btrfs_extend_item(trans, root, path, data_size);
|
||||
WARN_ON(ret > 0);
|
||||
}
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
WARN_ON(ret > 0);
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_nr(leaf, path->slots[0]);
|
||||
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
|
||||
BUG_ON(data_size > btrfs_item_size(leaf, item));
|
||||
ptr += btrfs_item_size(leaf, item) - data_size;
|
||||
return (struct btrfs_dir_item *)ptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* xattrs work a lot like directories, this inserts an xattr item
|
||||
* into the tree
|
||||
*/
|
||||
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, const char *name,
|
||||
u16 name_len, const void *data, u16 data_len,
|
||||
u64 dir)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_dir_item *dir_item;
|
||||
unsigned long name_ptr, data_ptr;
|
||||
struct btrfs_key key, location;
|
||||
struct btrfs_disk_key disk_key;
|
||||
struct extent_buffer *leaf;
|
||||
u32 data_size;
|
||||
|
||||
key.objectid = dir;
|
||||
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
if (name_len + data_len + sizeof(struct btrfs_dir_item) >
|
||||
BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
|
||||
return -ENOSPC;
|
||||
|
||||
data_size = sizeof(*dir_item) + name_len + data_len;
|
||||
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
|
||||
name, name_len);
|
||||
/*
|
||||
* FIXME: at some point we should handle xattr's that are larger than
|
||||
* what we can fit in our leaf. We set location to NULL b/c we arent
|
||||
* pointing at anything else, that will change if we store the xattr
|
||||
* data in a separate inode.
|
||||
*/
|
||||
BUG_ON(IS_ERR(dir_item));
|
||||
memset(&location, 0, sizeof(location));
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_cpu_key_to_disk(&disk_key, &location);
|
||||
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
|
||||
btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name_len);
|
||||
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
|
||||
btrfs_set_dir_data_len(leaf, dir_item, data_len);
|
||||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
data_ptr = (unsigned long)((char *)name_ptr + name_len);
|
||||
|
||||
write_extent_buffer(leaf, name, name_ptr, name_len);
|
||||
write_extent_buffer(leaf, data, data_ptr, data_len);
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* insert a directory item in the tree, doing all the magic for
|
||||
* both indexes. 'dir' indicates which objectid to insert it into,
|
||||
* 'location' is the key to stuff into the directory item, 'type' is the
|
||||
* type of the inode we're pointing to, and 'index' is the sequence number
|
||||
* to use for the second index (if one is created).
|
||||
*/
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, const char *name, int name_len, u64 dir,
|
||||
struct btrfs_key *location, u8 type, u64 index)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret2 = 0;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_dir_item *dir_item;
|
||||
struct extent_buffer *leaf;
|
||||
unsigned long name_ptr;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_disk_key disk_key;
|
||||
u32 data_size;
|
||||
|
||||
key.objectid = dir;
|
||||
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
path = btrfs_alloc_path();
|
||||
data_size = sizeof(*dir_item) + name_len;
|
||||
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
|
||||
name, name_len);
|
||||
if (IS_ERR(dir_item)) {
|
||||
ret = PTR_ERR(dir_item);
|
||||
if (ret == -EEXIST)
|
||||
goto second_insert;
|
||||
goto out;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_cpu_key_to_disk(&disk_key, location);
|
||||
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
|
||||
btrfs_set_dir_type(leaf, dir_item, type);
|
||||
btrfs_set_dir_data_len(leaf, dir_item, 0);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name_len);
|
||||
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
|
||||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
|
||||
write_extent_buffer(leaf, name, name_ptr, name_len);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
second_insert:
|
||||
/* FIXME, use some real flag for selecting the extra index */
|
||||
if (root == root->fs_info->tree_root) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(root, path);
|
||||
|
||||
btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
|
||||
key.offset = index;
|
||||
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
|
||||
name, name_len);
|
||||
if (IS_ERR(dir_item)) {
|
||||
ret2 = PTR_ERR(dir_item);
|
||||
goto out;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
btrfs_cpu_key_to_disk(&disk_key, location);
|
||||
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
|
||||
btrfs_set_dir_type(leaf, dir_item, type);
|
||||
btrfs_set_dir_data_len(leaf, dir_item, 0);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name_len);
|
||||
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
|
||||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
write_extent_buffer(leaf, name, name_ptr, name_len);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret2)
|
||||
return ret2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup a directory item based on name. 'dir' is the objectid
|
||||
* we're searching in, and 'mod' tells us if you plan on deleting the
|
||||
* item (use mod < 0) or changing the options (use mod > 0)
|
||||
*/
|
||||
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const char *name, int name_len,
|
||||
int mod)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
int ins_len = mod < 0 ? -1 : 0;
|
||||
int cow = mod != 0;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
key.objectid = dir;
|
||||
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
|
||||
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
return NULL;
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
|
||||
if (found_key.objectid != dir ||
|
||||
btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
|
||||
found_key.offset != key.offset)
|
||||
return NULL;
|
||||
|
||||
return btrfs_match_dir_item_name(root, path, name, name_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup a directory item based on index. 'dir' is the objectid
|
||||
* we're searching in, and 'mod' tells us if you plan on deleting the
|
||||
* item (use mod < 0) or changing the options (use mod > 0)
|
||||
*
|
||||
* The name is used to make sure the index really points to the name you were
|
||||
* looking for.
|
||||
*/
|
||||
struct btrfs_dir_item *
|
||||
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
u64 objectid, const char *name, int name_len,
|
||||
int mod)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
int ins_len = mod < 0 ? -1 : 0;
|
||||
int cow = mod != 0;
|
||||
|
||||
key.objectid = dir;
|
||||
btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
|
||||
key.offset = objectid;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
if (ret > 0)
|
||||
return ERR_PTR(-ENOENT);
|
||||
return btrfs_match_dir_item_name(root, path, name, name_len);
|
||||
}
|
||||
|
||||
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const char *name, u16 name_len,
|
||||
int mod)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
int ins_len = mod < 0 ? -1 : 0;
|
||||
int cow = mod != 0;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
key.objectid = dir;
|
||||
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
return NULL;
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
|
||||
if (found_key.objectid != dir ||
|
||||
btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
|
||||
found_key.offset != key.offset)
|
||||
return NULL;
|
||||
|
||||
return btrfs_match_dir_item_name(root, path, name, name_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function to look at the directory item pointed to by 'path'
|
||||
* this walks through all the entries in a dir item and finds one
|
||||
* for a specific name.
|
||||
*/
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const char *name, int name_len)
|
||||
{
|
||||
struct btrfs_dir_item *dir_item;
|
||||
unsigned long name_ptr;
|
||||
u32 total_len;
|
||||
u32 cur = 0;
|
||||
u32 this_len;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
|
||||
total_len = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
while (cur < total_len) {
|
||||
this_len = sizeof(*dir_item) +
|
||||
btrfs_dir_name_len(leaf, dir_item) +
|
||||
btrfs_dir_data_len(leaf, dir_item);
|
||||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
|
||||
if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
|
||||
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
|
||||
return dir_item;
|
||||
|
||||
cur += this_len;
|
||||
dir_item = (struct btrfs_dir_item *)((char *)dir_item +
|
||||
this_len);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* given a pointer into a directory item, delete it. This
|
||||
* handles items that have more than one entry in them.
|
||||
*/
|
||||
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_dir_item *di)
|
||||
{
|
||||
|
||||
struct extent_buffer *leaf;
|
||||
u32 sub_item_len;
|
||||
u32 item_len;
|
||||
int ret = 0;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
|
||||
btrfs_dir_data_len(leaf, di);
|
||||
item_len = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
if (sub_item_len == item_len) {
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
} else {
|
||||
/* MARKER */
|
||||
unsigned long ptr = (unsigned long)di;
|
||||
unsigned long start;
|
||||
|
||||
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
|
||||
item_len - (ptr + sub_item_len - start));
|
||||
ret = btrfs_truncate_item(trans, root, path,
|
||||
item_len - sub_item_len, 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DISKIO__
|
||||
#define __DISKIO__
|
||||
|
||||
#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
|
||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||
|
||||
#define BTRFS_SUPER_MIRROR_MAX 3
|
||||
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
||||
|
||||
static inline u64 btrfs_sb_offset(int mirror)
|
||||
{
|
||||
u64 start = 16 * 1024;
|
||||
if (mirror)
|
||||
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
|
||||
return BTRFS_SUPER_INFO_OFFSET;
|
||||
}
|
||||
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
|
||||
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
|
||||
u32 blocksize, u64 parent_transid);
|
||||
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
|
||||
u64 parent_transid);
|
||||
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
|
||||
u64 bytenr, u32 blocksize);
|
||||
int clean_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *buf);
|
||||
struct btrfs_root *open_ctree(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
char *options);
|
||||
int close_ctree(struct btrfs_root *root);
|
||||
int write_ctree_super(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, int max_mirrors);
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
|
||||
int btrfs_commit_super(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
|
||||
u64 bytenr, u32 blocksize);
|
||||
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 root_objectid);
|
||||
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key *location,
|
||||
const char *name, int namelen);
|
||||
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *location);
|
||||
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key *location);
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_insert_dev_radix(struct btrfs_root *root,
|
||||
struct block_device *bdev,
|
||||
u64 device_id,
|
||||
u64 block_start,
|
||||
u64 num_blocks);
|
||||
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
|
||||
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
|
||||
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
||||
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
|
||||
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
|
||||
int wait_on_tree_block_writeback(struct btrfs_root *root,
|
||||
struct extent_buffer *buf);
|
||||
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
|
||||
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
|
||||
void btrfs_csum_final(u32 crc, char *result);
|
||||
int btrfs_open_device(struct btrfs_device *dev);
|
||||
int btrfs_verify_block_csum(struct btrfs_root *root,
|
||||
struct extent_buffer *buf);
|
||||
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
int metadata);
|
||||
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
|
||||
int rw, struct bio *bio, int mirror_num,
|
||||
unsigned long bio_flags,
|
||||
extent_submit_bio_hook_t *submit_bio_start,
|
||||
extent_submit_bio_hook_t *submit_bio_done);
|
||||
|
||||
int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
|
||||
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
|
||||
int btrfs_write_tree_block(struct extent_buffer *buf);
|
||||
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
|
||||
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btree_lock_page_hook(struct page *page);
|
||||
#endif
|
|
@ -0,0 +1,203 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "print-tree.h"
|
||||
#include "export.h"
|
||||
#include "compat.h"
|
||||
|
||||
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
|
||||
parent_objectid) / 4)
|
||||
#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \
|
||||
parent_root_objectid) / 4)
|
||||
#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4)
|
||||
|
||||
static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
|
||||
int connectable)
|
||||
{
|
||||
struct btrfs_fid *fid = (struct btrfs_fid *)fh;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
int len = *max_len;
|
||||
int type;
|
||||
|
||||
if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
|
||||
(connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
|
||||
return 255;
|
||||
|
||||
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
|
||||
type = FILEID_BTRFS_WITHOUT_PARENT;
|
||||
|
||||
fid->objectid = BTRFS_I(inode)->location.objectid;
|
||||
fid->root_objectid = BTRFS_I(inode)->root->objectid;
|
||||
fid->gen = inode->i_generation;
|
||||
|
||||
if (connectable && !S_ISDIR(inode->i_mode)) {
|
||||
struct inode *parent;
|
||||
u64 parent_root_id;
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
|
||||
parent = dentry->d_parent->d_inode;
|
||||
fid->parent_objectid = BTRFS_I(parent)->location.objectid;
|
||||
fid->parent_gen = parent->i_generation;
|
||||
parent_root_id = BTRFS_I(parent)->root->objectid;
|
||||
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
if (parent_root_id != fid->root_objectid) {
|
||||
fid->parent_root_objectid = parent_root_id;
|
||||
len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
|
||||
type = FILEID_BTRFS_WITH_PARENT_ROOT;
|
||||
} else {
|
||||
len = BTRFS_FID_SIZE_CONNECTABLE;
|
||||
type = FILEID_BTRFS_WITH_PARENT;
|
||||
}
|
||||
}
|
||||
|
||||
*max_len = len;
|
||||
return type;
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u32 generation)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct inode *inode;
|
||||
struct btrfs_key key;
|
||||
|
||||
key.objectid = root_objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
|
||||
key.offset = (u64)-1;
|
||||
|
||||
root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
|
||||
if (IS_ERR(root))
|
||||
return ERR_CAST(root);
|
||||
|
||||
key.objectid = objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
|
||||
key.offset = 0;
|
||||
|
||||
inode = btrfs_iget(sb, &key, root, NULL);
|
||||
if (IS_ERR(inode))
|
||||
return (void *)inode;
|
||||
|
||||
if (generation != inode->i_generation) {
|
||||
iput(inode);
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
|
||||
return d_obtain_alias(inode);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
|
||||
int fh_len, int fh_type)
|
||||
{
|
||||
struct btrfs_fid *fid = (struct btrfs_fid *) fh;
|
||||
u64 objectid, root_objectid;
|
||||
u32 generation;
|
||||
|
||||
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
|
||||
if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
|
||||
return NULL;
|
||||
root_objectid = fid->root_objectid;
|
||||
} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
|
||||
if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
|
||||
return NULL;
|
||||
root_objectid = fid->parent_root_objectid;
|
||||
} else
|
||||
return NULL;
|
||||
|
||||
objectid = fid->parent_objectid;
|
||||
generation = fid->parent_gen;
|
||||
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||
int fh_len, int fh_type)
|
||||
{
|
||||
struct btrfs_fid *fid = (struct btrfs_fid *) fh;
|
||||
u64 objectid, root_objectid;
|
||||
u32 generation;
|
||||
|
||||
if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
|
||||
fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
|
||||
(fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
|
||||
fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
|
||||
(fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
|
||||
fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
|
||||
return NULL;
|
||||
|
||||
objectid = fid->objectid;
|
||||
root_objectid = fid->root_objectid;
|
||||
generation = fid->gen;
|
||||
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_get_parent(struct dentry *child)
|
||||
{
|
||||
struct inode *dir = child->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(dir)->root;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
u64 objectid;
|
||||
int ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
||||
key.objectid = dir->i_ino;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
|
||||
key.offset = (u64)-1;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0) {
|
||||
/* Error */
|
||||
btrfs_free_path(path);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (ret) {
|
||||
/* btrfs_search_slot() returns the slot where we'd want to
|
||||
insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
|
||||
The _real_ backref, telling us what the parent inode
|
||||
_actually_ is, will be in the slot _before_ the one
|
||||
that btrfs_search_slot() returns. */
|
||||
if (!slot) {
|
||||
/* Unless there is _no_ key in the tree before... */
|
||||
btrfs_free_path(path);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
slot--;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
objectid = key.offset;
|
||||
|
||||
/* If we are already at the root of a subvol, return the real root */
|
||||
if (objectid == dir->i_ino)
|
||||
return dget(dir->i_sb->s_root);
|
||||
|
||||
/* Build a new key for the inode item */
|
||||
key.objectid = objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
|
||||
key.offset = 0;
|
||||
|
||||
return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
|
||||
}
|
||||
|
||||
const struct export_operations btrfs_export_ops = {
|
||||
.encode_fh = btrfs_encode_fh,
|
||||
.fh_to_dentry = btrfs_fh_to_dentry,
|
||||
.fh_to_parent = btrfs_fh_to_parent,
|
||||
.get_parent = btrfs_get_parent,
|
||||
};
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef BTRFS_EXPORT_H
|
||||
#define BTRFS_EXPORT_H
|
||||
|
||||
#include <linux/exportfs.h>
|
||||
|
||||
extern const struct export_operations btrfs_export_ops;
|
||||
|
||||
struct btrfs_fid {
|
||||
u64 objectid;
|
||||
u64 root_objectid;
|
||||
u32 gen;
|
||||
|
||||
u64 parent_objectid;
|
||||
u32 parent_gen;
|
||||
|
||||
u64 parent_root_objectid;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,269 @@
|
|||
#ifndef __EXTENTIO__
|
||||
#define __EXTENTIO__
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
/* bits for the extent state */
|
||||
#define EXTENT_DIRTY 1
|
||||
#define EXTENT_WRITEBACK (1 << 1)
|
||||
#define EXTENT_UPTODATE (1 << 2)
|
||||
#define EXTENT_LOCKED (1 << 3)
|
||||
#define EXTENT_NEW (1 << 4)
|
||||
#define EXTENT_DELALLOC (1 << 5)
|
||||
#define EXTENT_DEFRAG (1 << 6)
|
||||
#define EXTENT_DEFRAG_DONE (1 << 7)
|
||||
#define EXTENT_BUFFER_FILLED (1 << 8)
|
||||
#define EXTENT_ORDERED (1 << 9)
|
||||
#define EXTENT_ORDERED_METADATA (1 << 10)
|
||||
#define EXTENT_BOUNDARY (1 << 11)
|
||||
#define EXTENT_NODATASUM (1 << 12)
|
||||
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
|
||||
|
||||
/* flags for bio submission */
|
||||
#define EXTENT_BIO_COMPRESSED 1
|
||||
|
||||
/*
|
||||
* page->private values. Every page that is controlled by the extent
|
||||
* map has page->private set to one.
|
||||
*/
|
||||
#define EXTENT_PAGE_PRIVATE 1
|
||||
#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
|
||||
|
||||
struct extent_state;
|
||||
|
||||
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
|
||||
struct bio *bio, int mirror_num,
|
||||
unsigned long bio_flags);
|
||||
struct extent_io_ops {
|
||||
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, int *page_started,
|
||||
unsigned long *nr_written);
|
||||
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
|
||||
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
|
||||
extent_submit_bio_hook_t *submit_bio_hook;
|
||||
int (*merge_bio_hook)(struct page *page, unsigned long offset,
|
||||
size_t size, struct bio *bio,
|
||||
unsigned long bio_flags);
|
||||
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
|
||||
int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
|
||||
u64 start, u64 end,
|
||||
struct extent_state *state);
|
||||
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
|
||||
u64 start, u64 end,
|
||||
struct extent_state *state);
|
||||
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
|
||||
struct extent_state *state);
|
||||
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
|
||||
struct extent_state *state, int uptodate);
|
||||
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
|
||||
unsigned long old, unsigned long bits);
|
||||
int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
|
||||
unsigned long old, unsigned long bits);
|
||||
int (*write_cache_pages_lock_hook)(struct page *page);
|
||||
};
|
||||
|
||||
struct extent_io_tree {
|
||||
struct rb_root state;
|
||||
struct rb_root buffer;
|
||||
struct address_space *mapping;
|
||||
u64 dirty_bytes;
|
||||
spinlock_t lock;
|
||||
spinlock_t buffer_lock;
|
||||
struct extent_io_ops *ops;
|
||||
};
|
||||
|
||||
struct extent_state {
|
||||
u64 start;
|
||||
u64 end; /* inclusive */
|
||||
struct rb_node rb_node;
|
||||
struct extent_io_tree *tree;
|
||||
wait_queue_head_t wq;
|
||||
atomic_t refs;
|
||||
unsigned long state;
|
||||
|
||||
/* for use by the FS */
|
||||
u64 private;
|
||||
|
||||
struct list_head leak_list;
|
||||
};
|
||||
|
||||
struct extent_buffer {
|
||||
u64 start;
|
||||
unsigned long len;
|
||||
char *map_token;
|
||||
char *kaddr;
|
||||
unsigned long map_start;
|
||||
unsigned long map_len;
|
||||
struct page *first_page;
|
||||
atomic_t refs;
|
||||
int flags;
|
||||
struct list_head leak_list;
|
||||
struct rb_node rb_node;
|
||||
struct mutex mutex;
|
||||
};
|
||||
|
||||
struct extent_map_tree;
|
||||
|
||||
static inline struct extent_state *extent_state_next(struct extent_state *state)
|
||||
{
|
||||
struct rb_node *node;
|
||||
node = rb_next(&state->rb_node);
|
||||
if (!node)
|
||||
return NULL;
|
||||
return rb_entry(node, struct extent_state, rb_node);
|
||||
}
|
||||
|
||||
typedef struct extent_map *(get_extent_t)(struct inode *inode,
|
||||
struct page *page,
|
||||
size_t page_offset,
|
||||
u64 start, u64 len,
|
||||
int create);
|
||||
|
||||
void extent_io_tree_init(struct extent_io_tree *tree,
|
||||
struct address_space *mapping, gfp_t mask);
|
||||
int try_release_extent_mapping(struct extent_map_tree *map,
|
||||
struct extent_io_tree *tree, struct page *page,
|
||||
gfp_t mask);
|
||||
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page);
|
||||
int try_release_extent_state(struct extent_map_tree *map,
|
||||
struct extent_io_tree *tree, struct page *page,
|
||||
gfp_t mask);
|
||||
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
|
||||
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent);
|
||||
int __init extent_io_init(void);
|
||||
void extent_io_exit(void);
|
||||
|
||||
u64 count_range_bits(struct extent_io_tree *tree,
|
||||
u64 *start, u64 search_end,
|
||||
u64 max_bytes, unsigned long bits);
|
||||
|
||||
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
int bits, int filled);
|
||||
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
int bits, gfp_t mask);
|
||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
int bits, int wake, int delete, gfp_t mask);
|
||||
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
int bits, gfp_t mask);
|
||||
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, gfp_t mask);
|
||||
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
gfp_t mask);
|
||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, int bits);
|
||||
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
|
||||
u64 start, int bits);
|
||||
int extent_invalidatepage(struct extent_io_tree *tree,
|
||||
struct page *page, unsigned long offset);
|
||||
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent,
|
||||
struct writeback_control *wbc);
|
||||
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
|
||||
u64 start, u64 end, get_extent_t *get_extent,
|
||||
int mode);
|
||||
int extent_writepages(struct extent_io_tree *tree,
|
||||
struct address_space *mapping,
|
||||
get_extent_t *get_extent,
|
||||
struct writeback_control *wbc);
|
||||
int extent_readpages(struct extent_io_tree *tree,
|
||||
struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages,
|
||||
get_extent_t get_extent);
|
||||
int extent_prepare_write(struct extent_io_tree *tree,
|
||||
struct inode *inode, struct page *page,
|
||||
unsigned from, unsigned to, get_extent_t *get_extent);
|
||||
int extent_commit_write(struct extent_io_tree *tree,
|
||||
struct inode *inode, struct page *page,
|
||||
unsigned from, unsigned to);
|
||||
sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
|
||||
get_extent_t *get_extent);
|
||||
int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
|
||||
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
|
||||
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
|
||||
void set_page_extent_mapped(struct page *page);
|
||||
|
||||
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
|
||||
u64 start, unsigned long len,
|
||||
struct page *page0,
|
||||
gfp_t mask);
|
||||
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
|
||||
u64 start, unsigned long len,
|
||||
gfp_t mask);
|
||||
void free_extent_buffer(struct extent_buffer *eb);
|
||||
int read_extent_buffer_pages(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb, u64 start, int wait,
|
||||
get_extent_t *get_extent, int mirror_num);
|
||||
|
||||
static inline void extent_buffer_get(struct extent_buffer *eb)
|
||||
{
|
||||
atomic_inc(&eb->refs);
|
||||
}
|
||||
|
||||
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
|
||||
unsigned long start,
|
||||
unsigned long len);
|
||||
void read_extent_buffer(struct extent_buffer *eb, void *dst,
|
||||
unsigned long start,
|
||||
unsigned long len);
|
||||
void write_extent_buffer(struct extent_buffer *eb, const void *src,
|
||||
unsigned long start, unsigned long len);
|
||||
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
|
||||
unsigned long dst_offset, unsigned long src_offset,
|
||||
unsigned long len);
|
||||
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
|
||||
unsigned long src_offset, unsigned long len);
|
||||
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
|
||||
unsigned long src_offset, unsigned long len);
|
||||
void memset_extent_buffer(struct extent_buffer *eb, char c,
|
||||
unsigned long start, unsigned long len);
|
||||
int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
|
||||
int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
|
||||
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int set_extent_buffer_dirty(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int set_extent_buffer_uptodate(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int extent_buffer_uptodate(struct extent_io_tree *tree,
|
||||
struct extent_buffer *eb);
|
||||
int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
|
||||
unsigned long min_len, char **token, char **map,
|
||||
unsigned long *map_start,
|
||||
unsigned long *map_len, int km);
|
||||
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
|
||||
unsigned long min_len, char **token, char **map,
|
||||
unsigned long *map_start,
|
||||
unsigned long *map_len, int km);
|
||||
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
|
||||
int release_extent_buffer_tail_pages(struct extent_buffer *eb);
|
||||
int extent_range_uptodate(struct extent_io_tree *tree,
|
||||
u64 start, u64 end);
|
||||
int extent_clear_unlock_delalloc(struct inode *inode,
|
||||
struct extent_io_tree *tree,
|
||||
u64 start, u64 end, struct page *locked_page,
|
||||
int unlock_page,
|
||||
int clear_unlock,
|
||||
int clear_delalloc, int clear_dirty,
|
||||
int set_writeback,
|
||||
int end_writeback);
|
||||
#endif
|
|
@ -0,0 +1,351 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include "extent_map.h"
|
||||
|
||||
/* temporary define until extent_map moves out of btrfs */
|
||||
struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
|
||||
unsigned long extra_flags,
|
||||
void (*ctor)(void *, struct kmem_cache *,
|
||||
unsigned long));
|
||||
|
||||
static struct kmem_cache *extent_map_cache;
|
||||
|
||||
int __init extent_map_init(void)
|
||||
{
|
||||
extent_map_cache = btrfs_cache_create("extent_map",
|
||||
sizeof(struct extent_map), 0,
|
||||
NULL);
|
||||
if (!extent_map_cache)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void extent_map_exit(void)
|
||||
{
|
||||
if (extent_map_cache)
|
||||
kmem_cache_destroy(extent_map_cache);
|
||||
}
|
||||
|
||||
/**
|
||||
* extent_map_tree_init - initialize extent map tree
|
||||
* @tree: tree to initialize
|
||||
* @mask: flags for memory allocations during tree operations
|
||||
*
|
||||
* Initialize the extent tree @tree. Should be called for each new inode
|
||||
* or other user of the extent_map interface.
|
||||
*/
|
||||
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
|
||||
{
|
||||
tree->map.rb_node = NULL;
|
||||
spin_lock_init(&tree->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(extent_map_tree_init);
|
||||
|
||||
/**
|
||||
* alloc_extent_map - allocate new extent map structure
|
||||
* @mask: memory allocation flags
|
||||
*
|
||||
* Allocate a new extent_map structure. The new structure is
|
||||
* returned with a reference count of one and needs to be
|
||||
* freed using free_extent_map()
|
||||
*/
|
||||
struct extent_map *alloc_extent_map(gfp_t mask)
|
||||
{
|
||||
struct extent_map *em;
|
||||
em = kmem_cache_alloc(extent_map_cache, mask);
|
||||
if (!em || IS_ERR(em))
|
||||
return em;
|
||||
em->in_tree = 0;
|
||||
em->flags = 0;
|
||||
atomic_set(&em->refs, 1);
|
||||
return em;
|
||||
}
|
||||
EXPORT_SYMBOL(alloc_extent_map);
|
||||
|
||||
/**
|
||||
* free_extent_map - drop reference count of an extent_map
|
||||
* @em: extent map beeing releasead
|
||||
*
|
||||
* Drops the reference out on @em by one and free the structure
|
||||
* if the reference count hits zero.
|
||||
*/
|
||||
void free_extent_map(struct extent_map *em)
|
||||
{
|
||||
if (!em)
|
||||
return;
|
||||
WARN_ON(atomic_read(&em->refs) == 0);
|
||||
if (atomic_dec_and_test(&em->refs)) {
|
||||
WARN_ON(em->in_tree);
|
||||
kmem_cache_free(extent_map_cache, em);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(free_extent_map);
|
||||
|
||||
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct extent_map *entry;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct extent_map, rb_node);
|
||||
|
||||
WARN_ON(!entry->in_tree);
|
||||
|
||||
if (offset < entry->start)
|
||||
p = &(*p)->rb_left;
|
||||
else if (offset >= extent_map_end(entry))
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return parent;
|
||||
}
|
||||
|
||||
entry = rb_entry(node, struct extent_map, rb_node);
|
||||
entry->in_tree = 1;
|
||||
rb_link_node(node, parent, p);
|
||||
rb_insert_color(node, root);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* search through the tree for an extent_map with a given offset. If
|
||||
* it can't be found, try to find some neighboring extents
|
||||
*/
|
||||
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
struct rb_node **prev_ret,
|
||||
struct rb_node **next_ret)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct rb_node *prev = NULL;
|
||||
struct rb_node *orig_prev = NULL;
|
||||
struct extent_map *entry;
|
||||
struct extent_map *prev_entry = NULL;
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct extent_map, rb_node);
|
||||
prev = n;
|
||||
prev_entry = entry;
|
||||
|
||||
WARN_ON(!entry->in_tree);
|
||||
|
||||
if (offset < entry->start)
|
||||
n = n->rb_left;
|
||||
else if (offset >= extent_map_end(entry))
|
||||
n = n->rb_right;
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
if (prev_ret) {
|
||||
orig_prev = prev;
|
||||
while (prev && offset >= extent_map_end(prev_entry)) {
|
||||
prev = rb_next(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
*prev_ret = prev;
|
||||
prev = orig_prev;
|
||||
}
|
||||
|
||||
if (next_ret) {
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
while (prev && offset < prev_entry->start) {
|
||||
prev = rb_prev(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
*next_ret = prev;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* look for an offset in the tree, and if it can't be found, return
|
||||
* the first offset we can find smaller than 'offset'.
|
||||
*/
|
||||
static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
|
||||
{
|
||||
struct rb_node *prev;
|
||||
struct rb_node *ret;
|
||||
ret = __tree_search(root, offset, &prev, NULL);
|
||||
if (!ret)
|
||||
return prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* check to see if two extent_map structs are adjacent and safe to merge */
|
||||
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
||||
{
|
||||
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* don't merge compressed extents, we need to know their
|
||||
* actual size
|
||||
*/
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
|
||||
return 0;
|
||||
|
||||
if (extent_map_end(prev) == next->start &&
|
||||
prev->flags == next->flags &&
|
||||
prev->bdev == next->bdev &&
|
||||
((next->block_start == EXTENT_MAP_HOLE &&
|
||||
prev->block_start == EXTENT_MAP_HOLE) ||
|
||||
(next->block_start == EXTENT_MAP_INLINE &&
|
||||
prev->block_start == EXTENT_MAP_INLINE) ||
|
||||
(next->block_start == EXTENT_MAP_DELALLOC &&
|
||||
prev->block_start == EXTENT_MAP_DELALLOC) ||
|
||||
(next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
|
||||
next->block_start == extent_map_block_end(prev)))) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* add_extent_mapping - add new extent map to the extent tree
|
||||
* @tree: tree to insert new map in
|
||||
* @em: map to insert
|
||||
*
|
||||
* Insert @em into @tree or perform a simple forward/backward merge with
|
||||
* existing mappings. The extent_map struct passed in will be inserted
|
||||
* into the tree directly, with an additional reference taken, or a
|
||||
* reference dropped if the merge attempt was sucessfull.
|
||||
*/
|
||||
int add_extent_mapping(struct extent_map_tree *tree,
|
||||
struct extent_map *em)
|
||||
{
|
||||
int ret = 0;
|
||||
struct extent_map *merge = NULL;
|
||||
struct rb_node *rb;
|
||||
struct extent_map *exist;
|
||||
|
||||
exist = lookup_extent_mapping(tree, em->start, em->len);
|
||||
if (exist) {
|
||||
free_extent_map(exist);
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
assert_spin_locked(&tree->lock);
|
||||
rb = tree_insert(&tree->map, em->start, &em->rb_node);
|
||||
if (rb) {
|
||||
ret = -EEXIST;
|
||||
free_extent_map(merge);
|
||||
goto out;
|
||||
}
|
||||
atomic_inc(&em->refs);
|
||||
if (em->start != 0) {
|
||||
rb = rb_prev(&em->rb_node);
|
||||
if (rb)
|
||||
merge = rb_entry(rb, struct extent_map, rb_node);
|
||||
if (rb && mergable_maps(merge, em)) {
|
||||
em->start = merge->start;
|
||||
em->len += merge->len;
|
||||
em->block_len += merge->block_len;
|
||||
em->block_start = merge->block_start;
|
||||
merge->in_tree = 0;
|
||||
rb_erase(&merge->rb_node, &tree->map);
|
||||
free_extent_map(merge);
|
||||
}
|
||||
}
|
||||
rb = rb_next(&em->rb_node);
|
||||
if (rb)
|
||||
merge = rb_entry(rb, struct extent_map, rb_node);
|
||||
if (rb && mergable_maps(em, merge)) {
|
||||
em->len += merge->len;
|
||||
em->block_len += merge->len;
|
||||
rb_erase(&merge->rb_node, &tree->map);
|
||||
merge->in_tree = 0;
|
||||
free_extent_map(merge);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(add_extent_mapping);
|
||||
|
||||
/* simple helper to do math around the end of an extent, handling wrap */
|
||||
static u64 range_end(u64 start, u64 len)
|
||||
{
|
||||
if (start + len < start)
|
||||
return (u64)-1;
|
||||
return start + len;
|
||||
}
|
||||
|
||||
/**
|
||||
* lookup_extent_mapping - lookup extent_map
|
||||
* @tree: tree to lookup in
|
||||
* @start: byte offset to start the search
|
||||
* @len: length of the lookup range
|
||||
*
|
||||
* Find and return the first extent_map struct in @tree that intersects the
|
||||
* [start, len] range. There may be additional objects in the tree that
|
||||
* intersect, so check the object returned carefully to make sure that no
|
||||
* additional lookups are needed.
|
||||
*/
|
||||
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct rb_node *rb_node;
|
||||
struct rb_node *prev = NULL;
|
||||
struct rb_node *next = NULL;
|
||||
u64 end = range_end(start, len);
|
||||
|
||||
assert_spin_locked(&tree->lock);
|
||||
rb_node = __tree_search(&tree->map, start, &prev, &next);
|
||||
if (!rb_node && prev) {
|
||||
em = rb_entry(prev, struct extent_map, rb_node);
|
||||
if (end > em->start && start < extent_map_end(em))
|
||||
goto found;
|
||||
}
|
||||
if (!rb_node && next) {
|
||||
em = rb_entry(next, struct extent_map, rb_node);
|
||||
if (end > em->start && start < extent_map_end(em))
|
||||
goto found;
|
||||
}
|
||||
if (!rb_node) {
|
||||
em = NULL;
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(rb_node)) {
|
||||
em = ERR_PTR(PTR_ERR(rb_node));
|
||||
goto out;
|
||||
}
|
||||
em = rb_entry(rb_node, struct extent_map, rb_node);
|
||||
if (end > em->start && start < extent_map_end(em))
|
||||
goto found;
|
||||
|
||||
em = NULL;
|
||||
goto out;
|
||||
|
||||
found:
|
||||
atomic_inc(&em->refs);
|
||||
out:
|
||||
return em;
|
||||
}
|
||||
EXPORT_SYMBOL(lookup_extent_mapping);
|
||||
|
||||
/**
|
||||
* remove_extent_mapping - removes an extent_map from the extent tree
|
||||
* @tree: extent tree to remove from
|
||||
* @em: extent map beeing removed
|
||||
*
|
||||
* Removes @em from @tree. No reference counts are dropped, and no checks
|
||||
* are done to see if the range is in use
|
||||
*/
|
||||
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
|
||||
assert_spin_locked(&tree->lock);
|
||||
rb_erase(&em->rb_node, &tree->map);
|
||||
em->in_tree = 0;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(remove_extent_mapping);
|
|
@ -0,0 +1,62 @@
|
|||
#ifndef __EXTENTMAP__
|
||||
#define __EXTENTMAP__
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#define EXTENT_MAP_LAST_BYTE (u64)-4
|
||||
#define EXTENT_MAP_HOLE (u64)-3
|
||||
#define EXTENT_MAP_INLINE (u64)-2
|
||||
#define EXTENT_MAP_DELALLOC (u64)-1
|
||||
|
||||
/* bits for the flags field */
|
||||
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
|
||||
#define EXTENT_FLAG_COMPRESSED 1
|
||||
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
|
||||
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
|
||||
|
||||
struct extent_map {
|
||||
struct rb_node rb_node;
|
||||
|
||||
/* all of these are in bytes */
|
||||
u64 start;
|
||||
u64 len;
|
||||
u64 orig_start;
|
||||
u64 block_start;
|
||||
u64 block_len;
|
||||
unsigned long flags;
|
||||
struct block_device *bdev;
|
||||
atomic_t refs;
|
||||
int in_tree;
|
||||
};
|
||||
|
||||
struct extent_map_tree {
|
||||
struct rb_root map;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static inline u64 extent_map_end(struct extent_map *em)
|
||||
{
|
||||
if (em->start + em->len < em->start)
|
||||
return (u64)-1;
|
||||
return em->start + em->len;
|
||||
}
|
||||
|
||||
static inline u64 extent_map_block_end(struct extent_map *em)
|
||||
{
|
||||
if (em->block_start + em->block_len < em->block_start)
|
||||
return (u64)-1;
|
||||
return em->block_start + em->block_len;
|
||||
}
|
||||
|
||||
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
|
||||
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
|
||||
u64 start, u64 len);
|
||||
int add_extent_mapping(struct extent_map_tree *tree,
|
||||
struct extent_map *em);
|
||||
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
|
||||
|
||||
struct extent_map *alloc_extent_map(gfp_t mask);
|
||||
void free_extent_map(struct extent_map *em);
|
||||
int __init extent_map_init(void);
|
||||
void extent_map_exit(void);
|
||||
#endif
|
|
@ -0,0 +1,821 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "print-tree.h"
|
||||
|
||||
#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
|
||||
sizeof(struct btrfs_item) * 2) / \
|
||||
size) - 1))
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
u64 disk_offset, u64 disk_num_bytes,
|
||||
u64 num_bytes, u64 offset, u64 ram_bytes,
|
||||
u8 compression, u8 encryption, u16 other_encoding)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_file_extent_item *item;
|
||||
struct btrfs_key file_key;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
file_key.objectid = objectid;
|
||||
file_key.offset = pos;
|
||||
btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
||||
sizeof(*item));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
BUG_ON(ret);
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
|
||||
btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, item, offset);
|
||||
btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
|
||||
btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
|
||||
btrfs_set_file_extent_generation(leaf, item, trans->transid);
|
||||
btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
|
||||
btrfs_set_file_extent_compression(leaf, item, compression);
|
||||
btrfs_set_file_extent_encryption(leaf, item, encryption);
|
||||
btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
|
||||
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
u64 bytenr, int cow)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key file_key;
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_csum_item *item;
|
||||
struct extent_buffer *leaf;
|
||||
u64 csum_offset = 0;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
int csums_in_item;
|
||||
|
||||
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
file_key.offset = bytenr;
|
||||
btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
|
||||
ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
leaf = path->nodes[0];
|
||||
if (ret > 0) {
|
||||
ret = 1;
|
||||
if (path->slots[0] == 0)
|
||||
goto fail;
|
||||
path->slots[0]--;
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
|
||||
goto fail;
|
||||
|
||||
csum_offset = (bytenr - found_key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
csums_in_item /= csum_size;
|
||||
|
||||
if (csum_offset >= csums_in_item) {
|
||||
ret = -EFBIG;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
|
||||
item = (struct btrfs_csum_item *)((unsigned char *)item +
|
||||
csum_offset * csum_size);
|
||||
return item;
|
||||
fail:
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
|
||||
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid,
|
||||
u64 offset, int mod)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key file_key;
|
||||
int ins_len = mod < 0 ? -1 : 0;
|
||||
int cow = mod != 0;
|
||||
|
||||
file_key.objectid = objectid;
|
||||
file_key.offset = offset;
|
||||
btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
|
||||
ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
|
||||
struct bio *bio, u32 *dst)
|
||||
{
|
||||
u32 sum;
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
int bio_index = 0;
|
||||
u64 offset;
|
||||
u64 item_start_offset = 0;
|
||||
u64 item_last_offset = 0;
|
||||
u64 disk_bytenr;
|
||||
u32 diff;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
int ret;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_csum_item *item = NULL;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (bio->bi_size > PAGE_CACHE_SIZE * 8)
|
||||
path->reada = 2;
|
||||
|
||||
WARN_ON(bio->bi_vcnt <= 0);
|
||||
|
||||
disk_bytenr = (u64)bio->bi_sector << 9;
|
||||
while (bio_index < bio->bi_vcnt) {
|
||||
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
|
||||
ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
|
||||
if (ret == 0)
|
||||
goto found;
|
||||
|
||||
if (!item || disk_bytenr < item_start_offset ||
|
||||
disk_bytenr >= item_last_offset) {
|
||||
struct btrfs_key found_key;
|
||||
u32 item_size;
|
||||
|
||||
if (item)
|
||||
btrfs_release_path(root, path);
|
||||
item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
|
||||
path, disk_bytenr, 0);
|
||||
if (IS_ERR(item)) {
|
||||
ret = PTR_ERR(item);
|
||||
if (ret == -ENOENT || ret == -EFBIG)
|
||||
ret = 0;
|
||||
sum = 0;
|
||||
if (BTRFS_I(inode)->root->root_key.objectid ==
|
||||
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
set_extent_bits(io_tree, offset,
|
||||
offset + bvec->bv_len - 1,
|
||||
EXTENT_NODATASUM, GFP_NOFS);
|
||||
} else {
|
||||
printk(KERN_INFO "btrfs no csum found "
|
||||
"for inode %lu start %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)offset);
|
||||
}
|
||||
item = NULL;
|
||||
btrfs_release_path(root, path);
|
||||
goto found;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||
path->slots[0]);
|
||||
|
||||
item_start_offset = found_key.offset;
|
||||
item_size = btrfs_item_size_nr(path->nodes[0],
|
||||
path->slots[0]);
|
||||
item_last_offset = item_start_offset +
|
||||
(item_size / csum_size) *
|
||||
root->sectorsize;
|
||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
}
|
||||
/*
|
||||
* this byte range must be able to fit inside
|
||||
* a single leaf so it will also fit inside a u32
|
||||
*/
|
||||
diff = disk_bytenr - item_start_offset;
|
||||
diff = diff / root->sectorsize;
|
||||
diff = diff * csum_size;
|
||||
|
||||
read_extent_buffer(path->nodes[0], &sum,
|
||||
((unsigned long)item) + diff,
|
||||
csum_size);
|
||||
found:
|
||||
if (dst)
|
||||
*dst++ = sum;
|
||||
else
|
||||
set_state_private(io_tree, offset, sum);
|
||||
disk_bytenr += bvec->bv_len;
|
||||
bio_index++;
|
||||
bvec++;
|
||||
}
|
||||
btrfs_free_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_sector_sum *sector_sum;
|
||||
struct btrfs_csum_item *item;
|
||||
unsigned long offset;
|
||||
int ret;
|
||||
size_t size;
|
||||
u64 csum_end;
|
||||
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
|
||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key.offset = start;
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
|
||||
&key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
|
||||
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
||||
key.type == BTRFS_EXTENT_CSUM_KEY) {
|
||||
offset = (start - key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
if (offset * csum_size <
|
||||
btrfs_item_size_nr(leaf, path->slots[0] - 1))
|
||||
path->slots[0]--;
|
||||
}
|
||||
}
|
||||
|
||||
while (start <= end) {
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root->fs_info->csum_root, path);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0)
|
||||
break;
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||
key.type != BTRFS_EXTENT_CSUM_KEY)
|
||||
break;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.offset > end)
|
||||
break;
|
||||
|
||||
if (key.offset > start)
|
||||
start = key.offset;
|
||||
|
||||
size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
csum_end = key.offset + (size / csum_size) * root->sectorsize;
|
||||
if (csum_end <= start) {
|
||||
path->slots[0]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
size = min(csum_end, end + 1) - start;
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
|
||||
BUG_ON(!sums);
|
||||
|
||||
sector_sum = sums->sums;
|
||||
sums->bytenr = start;
|
||||
sums->len = size;
|
||||
|
||||
offset = (start - key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
offset *= csum_size;
|
||||
|
||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
while (size > 0) {
|
||||
read_extent_buffer(path->nodes[0], §or_sum->sum,
|
||||
((unsigned long)item) + offset,
|
||||
csum_size);
|
||||
sector_sum->bytenr = start;
|
||||
|
||||
size -= root->sectorsize;
|
||||
start += root->sectorsize;
|
||||
offset += csum_size;
|
||||
sector_sum++;
|
||||
}
|
||||
list_add_tail(&sums->list, list);
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
fail:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
|
||||
struct bio *bio, u64 file_start, int contig)
|
||||
{
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_sector_sum *sector_sum;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
char *data;
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
int bio_index = 0;
|
||||
unsigned long total_bytes = 0;
|
||||
unsigned long this_sum_bytes = 0;
|
||||
u64 offset;
|
||||
u64 disk_bytenr;
|
||||
|
||||
WARN_ON(bio->bi_vcnt <= 0);
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
|
||||
if (!sums)
|
||||
return -ENOMEM;
|
||||
|
||||
sector_sum = sums->sums;
|
||||
disk_bytenr = (u64)bio->bi_sector << 9;
|
||||
sums->len = bio->bi_size;
|
||||
INIT_LIST_HEAD(&sums->list);
|
||||
|
||||
if (contig)
|
||||
offset = file_start;
|
||||
else
|
||||
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
|
||||
|
||||
ordered = btrfs_lookup_ordered_extent(inode, offset);
|
||||
BUG_ON(!ordered);
|
||||
sums->bytenr = ordered->start;
|
||||
|
||||
while (bio_index < bio->bi_vcnt) {
|
||||
if (!contig)
|
||||
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
|
||||
|
||||
if (!contig && (offset >= ordered->file_offset + ordered->len ||
|
||||
offset < ordered->file_offset)) {
|
||||
unsigned long bytes_left;
|
||||
sums->len = this_sum_bytes;
|
||||
this_sum_bytes = 0;
|
||||
btrfs_add_ordered_sum(inode, ordered, sums);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
|
||||
bytes_left = bio->bi_size - total_bytes;
|
||||
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
|
||||
GFP_NOFS);
|
||||
BUG_ON(!sums);
|
||||
sector_sum = sums->sums;
|
||||
sums->len = bytes_left;
|
||||
ordered = btrfs_lookup_ordered_extent(inode, offset);
|
||||
BUG_ON(!ordered);
|
||||
sums->bytenr = ordered->start;
|
||||
}
|
||||
|
||||
data = kmap_atomic(bvec->bv_page, KM_USER0);
|
||||
sector_sum->sum = ~(u32)0;
|
||||
sector_sum->sum = btrfs_csum_data(root,
|
||||
data + bvec->bv_offset,
|
||||
sector_sum->sum,
|
||||
bvec->bv_len);
|
||||
kunmap_atomic(data, KM_USER0);
|
||||
btrfs_csum_final(sector_sum->sum,
|
||||
(char *)§or_sum->sum);
|
||||
sector_sum->bytenr = disk_bytenr;
|
||||
|
||||
sector_sum++;
|
||||
bio_index++;
|
||||
total_bytes += bvec->bv_len;
|
||||
this_sum_bytes += bvec->bv_len;
|
||||
disk_bytenr += bvec->bv_len;
|
||||
offset += bvec->bv_len;
|
||||
bvec++;
|
||||
}
|
||||
this_sum_bytes = 0;
|
||||
btrfs_add_ordered_sum(inode, ordered, sums);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function for csum removal, this expects the
|
||||
* key to describe the csum pointed to by the path, and it expects
|
||||
* the csum to overlap the range [bytenr, len]
|
||||
*
|
||||
* The csum should not be entirely contained in the range and the
|
||||
* range should not be entirely contained in the csum.
|
||||
*
|
||||
* This calls btrfs_truncate_item with the correct args based on the
|
||||
* overlap, and fixes up the key as required.
|
||||
*/
|
||||
static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *key,
|
||||
u64 bytenr, u64 len)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
u64 csum_end;
|
||||
u64 end_byte = bytenr + len;
|
||||
u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
|
||||
int ret;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
|
||||
csum_end <<= root->fs_info->sb->s_blocksize_bits;
|
||||
csum_end += key->offset;
|
||||
|
||||
if (key->offset < bytenr && csum_end <= end_byte) {
|
||||
/*
|
||||
* [ bytenr - len ]
|
||||
* [ ]
|
||||
* [csum ]
|
||||
* A simple truncate off the end of the item
|
||||
*/
|
||||
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
|
||||
new_size *= csum_size;
|
||||
ret = btrfs_truncate_item(trans, root, path, new_size, 1);
|
||||
BUG_ON(ret);
|
||||
} else if (key->offset >= bytenr && csum_end > end_byte &&
|
||||
end_byte > key->offset) {
|
||||
/*
|
||||
* [ bytenr - len ]
|
||||
* [ ]
|
||||
* [csum ]
|
||||
* we need to truncate from the beginning of the csum
|
||||
*/
|
||||
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
|
||||
new_size *= csum_size;
|
||||
|
||||
ret = btrfs_truncate_item(trans, root, path, new_size, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
key->offset = end_byte;
|
||||
ret = btrfs_set_item_key_safe(trans, root, path, key);
|
||||
BUG_ON(ret);
|
||||
} else {
|
||||
BUG();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* deletes the csum items from the csum tree for a given
|
||||
* range of bytes.
|
||||
*/
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
u64 end_byte = bytenr + len;
|
||||
u64 csum_end;
|
||||
struct extent_buffer *leaf;
|
||||
int ret;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
|
||||
|
||||
root = root->fs_info->csum_root;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
||||
while (1) {
|
||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key.offset = end_byte - 1;
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
goto out;
|
||||
path->slots[0]--;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||
key.type != BTRFS_EXTENT_CSUM_KEY) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (key.offset >= end_byte)
|
||||
break;
|
||||
|
||||
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
|
||||
csum_end <<= blocksize_bits;
|
||||
csum_end += key.offset;
|
||||
|
||||
/* this csum ends before we start, we're done */
|
||||
if (csum_end <= bytenr)
|
||||
break;
|
||||
|
||||
/* delete the entire item, it is inside our range */
|
||||
if (key.offset >= bytenr && csum_end <= end_byte) {
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
BUG_ON(ret);
|
||||
if (key.offset == bytenr)
|
||||
break;
|
||||
} else if (key.offset < bytenr && csum_end > end_byte) {
|
||||
unsigned long offset;
|
||||
unsigned long shift_len;
|
||||
unsigned long item_offset;
|
||||
/*
|
||||
* [ bytenr - len ]
|
||||
* [csum ]
|
||||
*
|
||||
* Our bytes are in the middle of the csum,
|
||||
* we need to split this item and insert a new one.
|
||||
*
|
||||
* But we can't drop the path because the
|
||||
* csum could change, get removed, extended etc.
|
||||
*
|
||||
* The trick here is the max size of a csum item leaves
|
||||
* enough room in the tree block for a single
|
||||
* item header. So, we split the item in place,
|
||||
* adding a new header pointing to the existing
|
||||
* bytes. Then we loop around again and we have
|
||||
* a nicely formed csum item that we can neatly
|
||||
* truncate.
|
||||
*/
|
||||
offset = (bytenr - key.offset) >> blocksize_bits;
|
||||
offset *= csum_size;
|
||||
|
||||
shift_len = (len >> blocksize_bits) * csum_size;
|
||||
|
||||
item_offset = btrfs_item_ptr_offset(leaf,
|
||||
path->slots[0]);
|
||||
|
||||
memset_extent_buffer(leaf, 0, item_offset + offset,
|
||||
shift_len);
|
||||
key.offset = bytenr;
|
||||
|
||||
/*
|
||||
* btrfs_split_item returns -EAGAIN when the
|
||||
* item changed size or key
|
||||
*/
|
||||
ret = btrfs_split_item(trans, root, path, &key, offset);
|
||||
BUG_ON(ret && ret != -EAGAIN);
|
||||
|
||||
key.offset = end_byte - 1;
|
||||
} else {
|
||||
ret = truncate_one_csum(trans, root, path,
|
||||
&key, bytenr, len);
|
||||
BUG_ON(ret);
|
||||
if (key.offset < bytenr)
|
||||
break;
|
||||
}
|
||||
btrfs_release_path(root, path);
|
||||
}
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_ordered_sum *sums)
|
||||
{
|
||||
u64 bytenr;
|
||||
int ret;
|
||||
struct btrfs_key file_key;
|
||||
struct btrfs_key found_key;
|
||||
u64 next_offset;
|
||||
u64 total_bytes = 0;
|
||||
int found_next;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_csum_item *item;
|
||||
struct btrfs_csum_item *item_end;
|
||||
struct extent_buffer *leaf = NULL;
|
||||
u64 csum_offset;
|
||||
struct btrfs_sector_sum *sector_sum;
|
||||
u32 nritems;
|
||||
u32 ins_size;
|
||||
char *eb_map;
|
||||
char *eb_token;
|
||||
unsigned long map_len;
|
||||
unsigned long map_start;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
sector_sum = sums->sums;
|
||||
again:
|
||||
next_offset = (u64)-1;
|
||||
found_next = 0;
|
||||
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
file_key.offset = sector_sum->bytenr;
|
||||
bytenr = sector_sum->bytenr;
|
||||
btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
|
||||
|
||||
item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
|
||||
if (!IS_ERR(item)) {
|
||||
leaf = path->nodes[0];
|
||||
ret = 0;
|
||||
goto found;
|
||||
}
|
||||
ret = PTR_ERR(item);
|
||||
if (ret == -EFBIG) {
|
||||
u32 item_size;
|
||||
/* we found one, but it isn't big enough yet */
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
if ((item_size / csum_size) >=
|
||||
MAX_CSUM_ITEMS(root, csum_size)) {
|
||||
/* already at max size, make a new one */
|
||||
goto insert;
|
||||
}
|
||||
} else {
|
||||
int slot = path->slots[0] + 1;
|
||||
/* we didn't find a csum item, insert one */
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
if (path->slots[0] >= nritems - 1) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret == 1)
|
||||
found_next = 1;
|
||||
if (ret != 0)
|
||||
goto insert;
|
||||
slot = 0;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
|
||||
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||
found_key.type != BTRFS_EXTENT_CSUM_KEY) {
|
||||
found_next = 1;
|
||||
goto insert;
|
||||
}
|
||||
next_offset = found_key.offset;
|
||||
found_next = 1;
|
||||
goto insert;
|
||||
}
|
||||
|
||||
/*
|
||||
* at this point, we know the tree has an item, but it isn't big
|
||||
* enough yet to put our csum in. Grow it
|
||||
*/
|
||||
btrfs_release_path(root, path);
|
||||
ret = btrfs_search_slot(trans, root, &file_key, path,
|
||||
csum_size, 1);
|
||||
if (ret < 0)
|
||||
goto fail_unlock;
|
||||
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
goto insert;
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
csum_offset = (bytenr - found_key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
|
||||
if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
|
||||
found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||
csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
|
||||
goto insert;
|
||||
}
|
||||
|
||||
if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
|
||||
csum_size) {
|
||||
u32 diff = (csum_offset + 1) * csum_size;
|
||||
|
||||
/*
|
||||
* is the item big enough already? we dropped our lock
|
||||
* before and need to recheck
|
||||
*/
|
||||
if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
|
||||
goto csum;
|
||||
|
||||
diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
if (diff != csum_size)
|
||||
goto insert;
|
||||
|
||||
ret = btrfs_extend_item(trans, root, path, diff);
|
||||
BUG_ON(ret);
|
||||
goto csum;
|
||||
}
|
||||
|
||||
insert:
|
||||
btrfs_release_path(root, path);
|
||||
csum_offset = 0;
|
||||
if (found_next) {
|
||||
u64 tmp = total_bytes + root->sectorsize;
|
||||
u64 next_sector = sector_sum->bytenr;
|
||||
struct btrfs_sector_sum *next = sector_sum + 1;
|
||||
|
||||
while (tmp < sums->len) {
|
||||
if (next_sector + root->sectorsize != next->bytenr)
|
||||
break;
|
||||
tmp += root->sectorsize;
|
||||
next_sector = next->bytenr;
|
||||
next++;
|
||||
}
|
||||
tmp = min(tmp, next_offset - file_key.offset);
|
||||
tmp >>= root->fs_info->sb->s_blocksize_bits;
|
||||
tmp = max((u64)1, tmp);
|
||||
tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
|
||||
ins_size = csum_size * tmp;
|
||||
} else {
|
||||
ins_size = csum_size;
|
||||
}
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
||||
ins_size);
|
||||
if (ret < 0)
|
||||
goto fail_unlock;
|
||||
if (ret != 0) {
|
||||
WARN_ON(1);
|
||||
goto fail_unlock;
|
||||
}
|
||||
csum:
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
|
||||
ret = 0;
|
||||
item = (struct btrfs_csum_item *)((unsigned char *)item +
|
||||
csum_offset * csum_size);
|
||||
found:
|
||||
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
|
||||
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
|
||||
btrfs_item_size_nr(leaf, path->slots[0]));
|
||||
eb_token = NULL;
|
||||
cond_resched();
|
||||
next_sector:
|
||||
|
||||
if (!eb_token ||
|
||||
(unsigned long)item + csum_size >= map_start + map_len) {
|
||||
int err;
|
||||
|
||||
if (eb_token)
|
||||
unmap_extent_buffer(leaf, eb_token, KM_USER1);
|
||||
eb_token = NULL;
|
||||
err = map_private_extent_buffer(leaf, (unsigned long)item,
|
||||
csum_size,
|
||||
&eb_token, &eb_map,
|
||||
&map_start, &map_len, KM_USER1);
|
||||
if (err)
|
||||
eb_token = NULL;
|
||||
}
|
||||
if (eb_token) {
|
||||
memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
|
||||
§or_sum->sum, csum_size);
|
||||
} else {
|
||||
write_extent_buffer(leaf, §or_sum->sum,
|
||||
(unsigned long)item, csum_size);
|
||||
}
|
||||
|
||||
total_bytes += root->sectorsize;
|
||||
sector_sum++;
|
||||
if (total_bytes < sums->len) {
|
||||
item = (struct btrfs_csum_item *)((char *)item +
|
||||
csum_size);
|
||||
if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
|
||||
sector_sum->bytenr) {
|
||||
bytenr = sector_sum->bytenr;
|
||||
goto next_sector;
|
||||
}
|
||||
}
|
||||
if (eb_token) {
|
||||
unmap_extent_buffer(leaf, eb_token, KM_USER1);
|
||||
eb_token = NULL;
|
||||
}
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
cond_resched();
|
||||
if (total_bytes < sums->len) {
|
||||
btrfs_release_path(root, path);
|
||||
goto again;
|
||||
}
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
||||
fail_unlock:
|
||||
goto out;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,495 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Red Hat. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include "ctree.h"
|
||||
|
||||
static int tree_insert_offset(struct rb_root *root, u64 offset,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_free_space *info;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
info = rb_entry(parent, struct btrfs_free_space, offset_index);
|
||||
|
||||
if (offset < info->offset)
|
||||
p = &(*p)->rb_left;
|
||||
else if (offset > info->offset)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
rb_link_node(node, parent, p);
|
||||
rb_insert_color(node, root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tree_insert_bytes(struct rb_root *root, u64 bytes,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_free_space *info;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
info = rb_entry(parent, struct btrfs_free_space, bytes_index);
|
||||
|
||||
if (bytes < info->bytes)
|
||||
p = &(*p)->rb_left;
|
||||
else
|
||||
p = &(*p)->rb_right;
|
||||
}
|
||||
|
||||
rb_link_node(node, parent, p);
|
||||
rb_insert_color(node, root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* searches the tree for the given offset. If contains is set we will return
|
||||
* the free space that contains the given offset. If contains is not set we
|
||||
* will return the free space that starts at or after the given offset and is
|
||||
* at least bytes long.
|
||||
*/
|
||||
static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
|
||||
u64 offset, u64 bytes,
|
||||
int contains)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct btrfs_free_space *entry, *ret = NULL;
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_free_space, offset_index);
|
||||
|
||||
if (offset < entry->offset) {
|
||||
if (!contains &&
|
||||
(!ret || entry->offset < ret->offset) &&
|
||||
(bytes <= entry->bytes))
|
||||
ret = entry;
|
||||
n = n->rb_left;
|
||||
} else if (offset > entry->offset) {
|
||||
if ((entry->offset + entry->bytes - 1) >= offset &&
|
||||
bytes <= entry->bytes) {
|
||||
ret = entry;
|
||||
break;
|
||||
}
|
||||
n = n->rb_right;
|
||||
} else {
|
||||
if (bytes > entry->bytes) {
|
||||
n = n->rb_right;
|
||||
continue;
|
||||
}
|
||||
ret = entry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* return a chunk at least bytes size, as close to offset that we can get.
|
||||
*/
|
||||
static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct btrfs_free_space *entry, *ret = NULL;
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_free_space, bytes_index);
|
||||
|
||||
if (bytes < entry->bytes) {
|
||||
/*
|
||||
* We prefer to get a hole size as close to the size we
|
||||
* are asking for so we don't take small slivers out of
|
||||
* huge holes, but we also want to get as close to the
|
||||
* offset as possible so we don't have a whole lot of
|
||||
* fragmentation.
|
||||
*/
|
||||
if (offset <= entry->offset) {
|
||||
if (!ret)
|
||||
ret = entry;
|
||||
else if (entry->bytes < ret->bytes)
|
||||
ret = entry;
|
||||
else if (entry->offset < ret->offset)
|
||||
ret = entry;
|
||||
}
|
||||
n = n->rb_left;
|
||||
} else if (bytes > entry->bytes) {
|
||||
n = n->rb_right;
|
||||
} else {
|
||||
/*
|
||||
* Ok we may have multiple chunks of the wanted size,
|
||||
* so we don't want to take the first one we find, we
|
||||
* want to take the one closest to our given offset, so
|
||||
* keep searching just in case theres a better match.
|
||||
*/
|
||||
n = n->rb_right;
|
||||
if (offset > entry->offset)
|
||||
continue;
|
||||
else if (!ret || entry->offset < ret->offset)
|
||||
ret = entry;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unlink_free_space(struct btrfs_block_group_cache *block_group,
|
||||
struct btrfs_free_space *info)
|
||||
{
|
||||
rb_erase(&info->offset_index, &block_group->free_space_offset);
|
||||
rb_erase(&info->bytes_index, &block_group->free_space_bytes);
|
||||
}
|
||||
|
||||
static int link_free_space(struct btrfs_block_group_cache *block_group,
|
||||
struct btrfs_free_space *info)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
|
||||
ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
|
||||
&info->offset_index);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
|
||||
&info->bytes_index);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *right_info;
|
||||
struct btrfs_free_space *left_info;
|
||||
struct btrfs_free_space *info = NULL;
|
||||
struct btrfs_free_space *alloc_info;
|
||||
int ret = 0;
|
||||
|
||||
alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
|
||||
if (!alloc_info)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* first we want to see if there is free space adjacent to the range we
|
||||
* are adding, if there is remove that struct and add a new one to
|
||||
* cover the entire range
|
||||
*/
|
||||
right_info = tree_search_offset(&block_group->free_space_offset,
|
||||
offset+bytes, 0, 1);
|
||||
left_info = tree_search_offset(&block_group->free_space_offset,
|
||||
offset-1, 0, 1);
|
||||
|
||||
if (right_info && right_info->offset == offset+bytes) {
|
||||
unlink_free_space(block_group, right_info);
|
||||
info = right_info;
|
||||
info->offset = offset;
|
||||
info->bytes += bytes;
|
||||
} else if (right_info && right_info->offset != offset+bytes) {
|
||||
printk(KERN_ERR "btrfs adding space in the middle of an "
|
||||
"existing free space area. existing: "
|
||||
"offset=%llu, bytes=%llu. new: offset=%llu, "
|
||||
"bytes=%llu\n", (unsigned long long)right_info->offset,
|
||||
(unsigned long long)right_info->bytes,
|
||||
(unsigned long long)offset,
|
||||
(unsigned long long)bytes);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (left_info) {
|
||||
unlink_free_space(block_group, left_info);
|
||||
|
||||
if (unlikely((left_info->offset + left_info->bytes) !=
|
||||
offset)) {
|
||||
printk(KERN_ERR "btrfs free space to the left "
|
||||
"of new free space isn't "
|
||||
"quite right. existing: offset=%llu, "
|
||||
"bytes=%llu. new: offset=%llu, bytes=%llu\n",
|
||||
(unsigned long long)left_info->offset,
|
||||
(unsigned long long)left_info->bytes,
|
||||
(unsigned long long)offset,
|
||||
(unsigned long long)bytes);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (info) {
|
||||
info->offset = left_info->offset;
|
||||
info->bytes += left_info->bytes;
|
||||
kfree(left_info);
|
||||
} else {
|
||||
info = left_info;
|
||||
info->bytes += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
if (info) {
|
||||
ret = link_free_space(block_group, info);
|
||||
if (!ret)
|
||||
info = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
info = alloc_info;
|
||||
alloc_info = NULL;
|
||||
info->offset = offset;
|
||||
info->bytes = bytes;
|
||||
|
||||
ret = link_free_space(block_group, info);
|
||||
if (ret)
|
||||
kfree(info);
|
||||
out:
|
||||
if (ret) {
|
||||
printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
|
||||
if (ret == -EEXIST)
|
||||
BUG();
|
||||
}
|
||||
|
||||
kfree(alloc_info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
__btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
int ret = 0;
|
||||
|
||||
info = tree_search_offset(&block_group->free_space_offset, offset, 0,
|
||||
1);
|
||||
|
||||
if (info && info->offset == offset) {
|
||||
if (info->bytes < bytes) {
|
||||
printk(KERN_ERR "Found free space at %llu, size %llu,"
|
||||
"trying to use %llu\n",
|
||||
(unsigned long long)info->offset,
|
||||
(unsigned long long)info->bytes,
|
||||
(unsigned long long)bytes);
|
||||
WARN_ON(1);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
unlink_free_space(block_group, info);
|
||||
|
||||
if (info->bytes == bytes) {
|
||||
kfree(info);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->offset += bytes;
|
||||
info->bytes -= bytes;
|
||||
|
||||
ret = link_free_space(block_group, info);
|
||||
BUG_ON(ret);
|
||||
} else if (info && info->offset < offset &&
|
||||
info->offset + info->bytes >= offset + bytes) {
|
||||
u64 old_start = info->offset;
|
||||
/*
|
||||
* we're freeing space in the middle of the info,
|
||||
* this can happen during tree log replay
|
||||
*
|
||||
* first unlink the old info and then
|
||||
* insert it again after the hole we're creating
|
||||
*/
|
||||
unlink_free_space(block_group, info);
|
||||
if (offset + bytes < info->offset + info->bytes) {
|
||||
u64 old_end = info->offset + info->bytes;
|
||||
|
||||
info->offset = offset + bytes;
|
||||
info->bytes = old_end - info->offset;
|
||||
ret = link_free_space(block_group, info);
|
||||
BUG_ON(ret);
|
||||
} else {
|
||||
/* the hole we're creating ends at the end
|
||||
* of the info struct, just free the info
|
||||
*/
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
/* step two, insert a new info struct to cover anything
|
||||
* before the hole
|
||||
*/
|
||||
ret = __btrfs_add_free_space(block_group, old_start,
|
||||
offset - old_start);
|
||||
BUG_ON(ret);
|
||||
} else {
|
||||
WARN_ON(1);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_free_space *sp;
|
||||
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
ret = __btrfs_add_free_space(block_group, offset, bytes);
|
||||
sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
|
||||
BUG_ON(!sp);
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_free_space *sp;
|
||||
|
||||
ret = __btrfs_add_free_space(block_group, offset, bytes);
|
||||
sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
|
||||
BUG_ON(!sp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
ret = __btrfs_remove_free_space(block_group, offset, bytes);
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __btrfs_remove_free_space(block_group, offset, bytes);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
struct rb_node *n;
|
||||
int count = 0;
|
||||
|
||||
for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
|
||||
info = rb_entry(n, struct btrfs_free_space, offset_index);
|
||||
if (info->bytes >= bytes)
|
||||
count++;
|
||||
}
|
||||
printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
|
||||
"\n", count);
|
||||
}
|
||||
|
||||
u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
struct rb_node *n;
|
||||
u64 ret = 0;
|
||||
|
||||
for (n = rb_first(&block_group->free_space_offset); n;
|
||||
n = rb_next(n)) {
|
||||
info = rb_entry(n, struct btrfs_free_space, offset_index);
|
||||
ret += info->bytes;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
struct rb_node *node;
|
||||
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
|
||||
info = rb_entry(node, struct btrfs_free_space, bytes_index);
|
||||
unlink_free_space(block_group, info);
|
||||
kfree(info);
|
||||
if (need_resched()) {
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
cond_resched();
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct btrfs_free_space *btrfs_find_free_space_offset(struct
|
||||
btrfs_block_group_cache
|
||||
*block_group, u64 offset,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *ret;
|
||||
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
ret = tree_search_offset(&block_group->free_space_offset, offset,
|
||||
bytes, 0);
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct btrfs_free_space *btrfs_find_free_space_bytes(struct
|
||||
btrfs_block_group_cache
|
||||
*block_group, u64 offset,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *ret;
|
||||
|
||||
mutex_lock(&block_group->alloc_mutex);
|
||||
|
||||
ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes);
|
||||
mutex_unlock(&block_group->alloc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
|
||||
*block_group, u64 offset,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_free_space *ret = NULL;
|
||||
|
||||
ret = tree_search_offset(&block_group->free_space_offset, offset,
|
||||
bytes, 0);
|
||||
if (!ret)
|
||||
ret = tree_search_bytes(&block_group->free_space_bytes,
|
||||
offset, bytes);
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __HASH__
|
||||
#define __HASH__
|
||||
|
||||
#include "crc32c.h"
|
||||
static inline u64 btrfs_name_hash(const char *name, int len)
|
||||
{
|
||||
return btrfs_crc32c((u32)~1, name, len);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
||||
static int find_name_in_backref(struct btrfs_path *path, const char *name,
|
||||
int name_len, struct btrfs_inode_ref **ref_ret)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_inode_ref *ref;
|
||||
unsigned long ptr;
|
||||
unsigned long name_ptr;
|
||||
u32 item_size;
|
||||
u32 cur_offset = 0;
|
||||
int len;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
while (cur_offset < item_size) {
|
||||
ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
|
||||
len = btrfs_inode_ref_name_len(leaf, ref);
|
||||
name_ptr = (unsigned long)(ref + 1);
|
||||
cur_offset += len + sizeof(*ref);
|
||||
if (len != name_len)
|
||||
continue;
|
||||
if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
|
||||
*ref_ret = ref;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 *index)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_inode_ref *ref;
|
||||
struct extent_buffer *leaf;
|
||||
unsigned long ptr;
|
||||
unsigned long item_start;
|
||||
u32 item_size;
|
||||
u32 sub_item_len;
|
||||
int ret;
|
||||
int del_len = name_len + sizeof(*ref);
|
||||
|
||||
key.objectid = inode_objectid;
|
||||
key.offset = ref_objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret > 0) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
} else if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
if (!find_name_in_backref(path, name, name_len, &ref)) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
|
||||
if (index)
|
||||
*index = btrfs_inode_ref_index(leaf, ref);
|
||||
|
||||
if (del_len == item_size) {
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
goto out;
|
||||
}
|
||||
ptr = (unsigned long)ref;
|
||||
sub_item_len = name_len + sizeof(*ref);
|
||||
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
|
||||
item_size - (ptr + sub_item_len - item_start));
|
||||
ret = btrfs_truncate_item(trans, root, path,
|
||||
item_size - sub_item_len, 1);
|
||||
BUG_ON(ret);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 index)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_inode_ref *ref;
|
||||
unsigned long ptr;
|
||||
int ret;
|
||||
int ins_len = name_len + sizeof(*ref);
|
||||
|
||||
key.objectid = inode_objectid;
|
||||
key.offset = ref_objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
||||
ins_len);
|
||||
if (ret == -EEXIST) {
|
||||
u32 old_size;
|
||||
|
||||
if (find_name_in_backref(path, name, name_len, &ref))
|
||||
goto out;
|
||||
|
||||
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
|
||||
ret = btrfs_extend_item(trans, root, path, ins_len);
|
||||
BUG_ON(ret);
|
||||
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_inode_ref);
|
||||
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
|
||||
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
ret = 0;
|
||||
} else if (ret < 0) {
|
||||
goto out;
|
||||
} else {
|
||||
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_inode_ref);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
|
||||
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
}
|
||||
write_extent_buffer(path->nodes[0], name, ptr, name_len);
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
key.objectid = objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
||||
sizeof(struct btrfs_inode_item));
|
||||
if (ret == 0 && objectid > root->highest_inode)
|
||||
root->highest_inode = objectid;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_path *path,
|
||||
struct btrfs_key *location, int mod)
|
||||
{
|
||||
int ins_len = mod < 0 ? -1 : 0;
|
||||
int cow = mod != 0;
|
||||
int ret;
|
||||
int slot;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key found_key;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
|
||||
if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY &&
|
||||
location->offset == (u64)-1 && path->slots[0] != 0) {
|
||||
slot = path->slots[0] - 1;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
if (found_key.objectid == location->objectid &&
|
||||
btrfs_key_type(&found_key) == btrfs_key_type(location)) {
|
||||
path->slots[0]--;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
||||
int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
struct extent_buffer *l;
|
||||
struct btrfs_key search_key;
|
||||
struct btrfs_key found_key;
|
||||
int slot;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
|
||||
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
|
||||
search_key.type = -1;
|
||||
search_key.offset = (u64)-1;
|
||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
BUG_ON(ret == 0);
|
||||
if (path->slots[0] > 0) {
|
||||
slot = path->slots[0] - 1;
|
||||
l = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(l, &found_key, slot);
|
||||
*objectid = found_key.objectid;
|
||||
} else {
|
||||
*objectid = BTRFS_FIRST_FREE_OBJECTID;
|
||||
}
|
||||
ret = 0;
|
||||
error:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* walks the btree of allocated inodes and find a hole.
|
||||
*/
|
||||
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 dirid, u64 *objectid)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
int slot = 0;
|
||||
u64 last_ino = 0;
|
||||
int start_found;
|
||||
struct extent_buffer *l;
|
||||
struct btrfs_key search_key;
|
||||
u64 search_start = dirid;
|
||||
|
||||
mutex_lock(&root->objectid_mutex);
|
||||
if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
|
||||
root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
|
||||
*objectid = ++root->last_inode_alloc;
|
||||
mutex_unlock(&root->objectid_mutex);
|
||||
return 0;
|
||||
}
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
|
||||
search_key.objectid = search_start;
|
||||
search_key.type = 0;
|
||||
search_key.offset = 0;
|
||||
|
||||
btrfs_init_path(path);
|
||||
start_found = 0;
|
||||
ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
while (1) {
|
||||
l = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(l)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret == 0)
|
||||
continue;
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
if (!start_found) {
|
||||
*objectid = search_start;
|
||||
start_found = 1;
|
||||
goto found;
|
||||
}
|
||||
*objectid = last_ino > search_start ?
|
||||
last_ino : search_start;
|
||||
goto found;
|
||||
}
|
||||
btrfs_item_key_to_cpu(l, &key, slot);
|
||||
if (key.objectid >= search_start) {
|
||||
if (start_found) {
|
||||
if (last_ino < search_start)
|
||||
last_ino = search_start;
|
||||
if (key.objectid > last_ino) {
|
||||
*objectid = last_ino;
|
||||
goto found;
|
||||
}
|
||||
} else if (key.objectid > search_start) {
|
||||
*objectid = search_start;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
|
||||
break;
|
||||
|
||||
start_found = 1;
|
||||
last_ino = key.objectid + 1;
|
||||
path->slots[0]++;
|
||||
}
|
||||
BUG_ON(1);
|
||||
found:
|
||||
btrfs_release_path(root, path);
|
||||
btrfs_free_path(path);
|
||||
BUG_ON(*objectid < search_start);
|
||||
mutex_unlock(&root->objectid_mutex);
|
||||
return 0;
|
||||
error:
|
||||
btrfs_release_path(root, path);
|
||||
btrfs_free_path(path);
|
||||
mutex_unlock(&root->objectid_mutex);
|
||||
return ret;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __IOCTL_
|
||||
#define __IOCTL_
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
#define BTRFS_IOCTL_MAGIC 0x94
|
||||
#define BTRFS_VOL_NAME_MAX 255
|
||||
#define BTRFS_PATH_NAME_MAX 3072
|
||||
|
||||
struct btrfs_ioctl_vol_args {
|
||||
__s64 fd;
|
||||
char name[BTRFS_PATH_NAME_MAX + 1];
|
||||
};
|
||||
|
||||
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
/* trans start and trans end are dangerous, and only for
|
||||
* use by applications that know how to avoid the
|
||||
* resulting deadlocks
|
||||
*/
|
||||
#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
|
||||
#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
|
||||
#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
|
||||
|
||||
#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
|
||||
#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
struct btrfs_ioctl_clone_range_args {
|
||||
__s64 src_fd;
|
||||
__u64 src_offset, src_length;
|
||||
__u64 dest_offset;
|
||||
};
|
||||
|
||||
#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
|
||||
struct btrfs_ioctl_clone_range_args)
|
||||
|
||||
#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
|
||||
#endif
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <asm/bug.h>
|
||||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
#include "locking.h"
|
||||
|
||||
/*
|
||||
* locks the per buffer mutex in an extent buffer. This uses adaptive locks
|
||||
* and the spin is not tuned very extensively. The spinning does make a big
|
||||
* difference in almost every workload, but spinning for the right amount of
|
||||
* time needs some help.
|
||||
*
|
||||
* In general, we want to spin as long as the lock holder is doing btree
|
||||
* searches, and we should give up if they are in more expensive code.
|
||||
*/
|
||||
|
||||
int btrfs_tree_lock(struct extent_buffer *eb)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (mutex_trylock(&eb->mutex))
|
||||
return 0;
|
||||
for (i = 0; i < 512; i++) {
|
||||
cpu_relax();
|
||||
if (mutex_trylock(&eb->mutex))
|
||||
return 0;
|
||||
}
|
||||
cpu_relax();
|
||||
mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_try_tree_lock(struct extent_buffer *eb)
|
||||
{
|
||||
return mutex_trylock(&eb->mutex);
|
||||
}
|
||||
|
||||
int btrfs_tree_unlock(struct extent_buffer *eb)
|
||||
{
|
||||
mutex_unlock(&eb->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_tree_locked(struct extent_buffer *eb)
|
||||
{
|
||||
return mutex_is_locked(&eb->mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_search_slot uses this to decide if it should drop its locks
|
||||
* before doing something expensive like allocating free blocks for cow.
|
||||
*/
|
||||
int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
|
||||
{
|
||||
int i;
|
||||
struct extent_buffer *eb;
|
||||
for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {
|
||||
eb = path->nodes[i];
|
||||
if (!eb)
|
||||
break;
|
||||
smp_mb();
|
||||
if (!list_empty(&eb->mutex.wait_list))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_LOCKING_
|
||||
#define __BTRFS_LOCKING_
|
||||
|
||||
int btrfs_tree_lock(struct extent_buffer *eb);
|
||||
int btrfs_tree_unlock(struct extent_buffer *eb);
|
||||
int btrfs_tree_locked(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_lock(struct extent_buffer *eb);
|
||||
int btrfs_path_lock_waiting(struct btrfs_path *path, int level);
|
||||
#endif
|
|
@ -0,0 +1,730 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include "ctree.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "extent_io.h"
|
||||
|
||||
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
if (entry->file_offset + entry->len < entry->file_offset)
|
||||
return (u64)-1;
|
||||
return entry->file_offset + entry->len;
|
||||
}
|
||||
|
||||
/* returns NULL if the insertion worked, or it returns the node it did find
|
||||
* in the tree
|
||||
*/
|
||||
static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_ordered_extent *entry;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
|
||||
|
||||
if (file_offset < entry->file_offset)
|
||||
p = &(*p)->rb_left;
|
||||
else if (file_offset >= entry_end(entry))
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return parent;
|
||||
}
|
||||
|
||||
rb_link_node(node, parent, p);
|
||||
rb_insert_color(node, root);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* look for a given offset in the tree, and if it can't be found return the
|
||||
* first lesser offset
|
||||
*/
|
||||
static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
||||
struct rb_node **prev_ret)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct rb_node *prev = NULL;
|
||||
struct rb_node *test;
|
||||
struct btrfs_ordered_extent *entry;
|
||||
struct btrfs_ordered_extent *prev_entry = NULL;
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
|
||||
prev = n;
|
||||
prev_entry = entry;
|
||||
|
||||
if (file_offset < entry->file_offset)
|
||||
n = n->rb_left;
|
||||
else if (file_offset >= entry_end(entry))
|
||||
n = n->rb_right;
|
||||
else
|
||||
return n;
|
||||
}
|
||||
if (!prev_ret)
|
||||
return NULL;
|
||||
|
||||
while (prev && file_offset >= entry_end(prev_entry)) {
|
||||
test = rb_next(prev);
|
||||
if (!test)
|
||||
break;
|
||||
prev_entry = rb_entry(test, struct btrfs_ordered_extent,
|
||||
rb_node);
|
||||
if (file_offset < entry_end(prev_entry))
|
||||
break;
|
||||
|
||||
prev = test;
|
||||
}
|
||||
if (prev)
|
||||
prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
|
||||
rb_node);
|
||||
while (prev && file_offset < entry_end(prev_entry)) {
|
||||
test = rb_prev(prev);
|
||||
if (!test)
|
||||
break;
|
||||
prev_entry = rb_entry(test, struct btrfs_ordered_extent,
|
||||
rb_node);
|
||||
prev = test;
|
||||
}
|
||||
*prev_ret = prev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to check if a given offset is inside a given entry
|
||||
*/
|
||||
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
||||
{
|
||||
if (file_offset < entry->file_offset ||
|
||||
entry->file_offset + entry->len <= file_offset)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* look find the first ordered struct that has this offset, otherwise
|
||||
* the first one less than this offset
|
||||
*/
|
||||
static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
||||
u64 file_offset)
|
||||
{
|
||||
struct rb_root *root = &tree->tree;
|
||||
struct rb_node *prev;
|
||||
struct rb_node *ret;
|
||||
struct btrfs_ordered_extent *entry;
|
||||
|
||||
if (tree->last) {
|
||||
entry = rb_entry(tree->last, struct btrfs_ordered_extent,
|
||||
rb_node);
|
||||
if (offset_in_entry(entry, file_offset))
|
||||
return tree->last;
|
||||
}
|
||||
ret = __tree_search(root, file_offset, &prev);
|
||||
if (!ret)
|
||||
ret = prev;
|
||||
if (ret)
|
||||
tree->last = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* allocate and add a new ordered_extent into the per-inode tree.
|
||||
* file_offset is the logical offset in the file
|
||||
*
|
||||
* start is the disk block number of an extent already reserved in the
|
||||
* extent allocation tree
|
||||
*
|
||||
* len is the length of the extent
|
||||
*
|
||||
* This also sets the EXTENT_ORDERED bit on the range in the inode.
|
||||
*
|
||||
* The tree is given a single reference on the ordered extent that was
|
||||
* inserted.
|
||||
*/
|
||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct btrfs_ordered_extent *entry;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
entry = kzalloc(sizeof(*entry), GFP_NOFS);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&tree->mutex);
|
||||
entry->file_offset = file_offset;
|
||||
entry->start = start;
|
||||
entry->len = len;
|
||||
entry->disk_len = disk_len;
|
||||
entry->inode = inode;
|
||||
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
|
||||
set_bit(type, &entry->flags);
|
||||
|
||||
/* one ref for the tree */
|
||||
atomic_set(&entry->refs, 1);
|
||||
init_waitqueue_head(&entry->wait);
|
||||
INIT_LIST_HEAD(&entry->list);
|
||||
INIT_LIST_HEAD(&entry->root_extent_list);
|
||||
|
||||
node = tree_insert(&tree->tree, file_offset,
|
||||
&entry->rb_node);
|
||||
BUG_ON(node);
|
||||
|
||||
set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
|
||||
entry_end(entry) - 1, GFP_NOFS);
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
|
||||
list_add_tail(&entry->root_extent_list,
|
||||
&BTRFS_I(inode)->root->fs_info->ordered_extents);
|
||||
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
|
||||
|
||||
mutex_unlock(&tree->mutex);
|
||||
BUG_ON(node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
|
||||
* when an ordered extent is finished. If the list covers more than one
|
||||
* ordered extent, it is split across multiples.
|
||||
*/
|
||||
int btrfs_add_ordered_sum(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
mutex_lock(&tree->mutex);
|
||||
list_add_tail(&sum->list, &entry->list);
|
||||
mutex_unlock(&tree->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this is used to account for finished IO across a given range
|
||||
* of the file. The IO should not span ordered extents. If
|
||||
* a given ordered_extent is completely done, 1 is returned, otherwise
|
||||
* 0.
|
||||
*
|
||||
* test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
|
||||
* to make sure this function only returns 1 once for a given ordered extent.
|
||||
*/
|
||||
int btrfs_dec_test_ordered_pending(struct inode *inode,
|
||||
u64 file_offset, u64 io_size)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct btrfs_ordered_extent *entry;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
int ret;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
mutex_lock(&tree->mutex);
|
||||
clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
|
||||
GFP_NOFS);
|
||||
node = tree_search(tree, file_offset);
|
||||
if (!node) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
if (!offset_in_entry(entry, file_offset)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = test_range_bit(io_tree, entry->file_offset,
|
||||
entry->file_offset + entry->len - 1,
|
||||
EXTENT_ORDERED, 0);
|
||||
if (ret == 0)
|
||||
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
|
||||
out:
|
||||
mutex_unlock(&tree->mutex);
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* used to drop a reference on an ordered extent. This will free
|
||||
* the extent if the last reference is dropped
|
||||
*/
|
||||
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
struct list_head *cur;
|
||||
struct btrfs_ordered_sum *sum;
|
||||
|
||||
if (atomic_dec_and_test(&entry->refs)) {
|
||||
while (!list_empty(&entry->list)) {
|
||||
cur = entry->list.next;
|
||||
sum = list_entry(cur, struct btrfs_ordered_sum, list);
|
||||
list_del(&sum->list);
|
||||
kfree(sum);
|
||||
}
|
||||
kfree(entry);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove an ordered extent from the tree. No references are dropped
|
||||
* but, anyone waiting on this extent is woken up.
|
||||
*/
|
||||
int btrfs_remove_ordered_extent(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
mutex_lock(&tree->mutex);
|
||||
node = &entry->rb_node;
|
||||
rb_erase(node, &tree->tree);
|
||||
tree->last = NULL;
|
||||
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
|
||||
list_del_init(&entry->root_extent_list);
|
||||
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
|
||||
|
||||
mutex_unlock(&tree->mutex);
|
||||
wake_up(&entry->wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* wait for all the ordered extents in a root. This is done when balancing
|
||||
* space between drives.
|
||||
*/
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
|
||||
{
|
||||
struct list_head splice;
|
||||
struct list_head *cur;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct inode *inode;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
spin_lock(&root->fs_info->ordered_extent_lock);
|
||||
list_splice_init(&root->fs_info->ordered_extents, &splice);
|
||||
while (!list_empty(&splice)) {
|
||||
cur = splice.next;
|
||||
ordered = list_entry(cur, struct btrfs_ordered_extent,
|
||||
root_extent_list);
|
||||
if (nocow_only &&
|
||||
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
|
||||
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
|
||||
list_move(&ordered->root_extent_list,
|
||||
&root->fs_info->ordered_extents);
|
||||
cond_resched_lock(&root->fs_info->ordered_extent_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
list_del_init(&ordered->root_extent_list);
|
||||
atomic_inc(&ordered->refs);
|
||||
|
||||
/*
|
||||
* the inode may be getting freed (in sys_unlink path).
|
||||
*/
|
||||
inode = igrab(ordered->inode);
|
||||
|
||||
spin_unlock(&root->fs_info->ordered_extent_lock);
|
||||
|
||||
if (inode) {
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
iput(inode);
|
||||
} else {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
|
||||
spin_lock(&root->fs_info->ordered_extent_lock);
|
||||
}
|
||||
spin_unlock(&root->fs_info->ordered_extent_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to start IO or wait for a given ordered extent to finish.
|
||||
*
|
||||
* If wait is one, this effectively waits on page writeback for all the pages
|
||||
* in the extent, and it waits on the io completion code to insert
|
||||
* metadata into the btree corresponding to the extent
|
||||
*/
|
||||
void btrfs_start_ordered_extent(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry,
|
||||
int wait)
|
||||
{
|
||||
u64 start = entry->file_offset;
|
||||
u64 end = start + entry->len - 1;
|
||||
|
||||
/*
|
||||
* pages in the range can be dirty, clean or writeback. We
|
||||
* start IO on any dirty ones so the wait doesn't stall waiting
|
||||
* for pdflush to find them
|
||||
*/
|
||||
btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL);
|
||||
if (wait) {
|
||||
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
|
||||
&entry->flags));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to wait on ordered extents across a large range of bytes.
|
||||
*/
|
||||
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
u64 orig_end;
|
||||
u64 wait_end;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
if (start + len < start) {
|
||||
orig_end = INT_LIMIT(loff_t);
|
||||
} else {
|
||||
orig_end = start + len - 1;
|
||||
if (orig_end > INT_LIMIT(loff_t))
|
||||
orig_end = INT_LIMIT(loff_t);
|
||||
}
|
||||
wait_end = orig_end;
|
||||
again:
|
||||
/* start IO across the range first to instantiate any delalloc
|
||||
* extents
|
||||
*/
|
||||
btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
|
||||
|
||||
/* The compression code will leave pages locked but return from
|
||||
* writepage without setting the page writeback. Starting again
|
||||
* with WB_SYNC_ALL will end up waiting for the IO to actually start.
|
||||
*/
|
||||
btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
|
||||
|
||||
btrfs_wait_on_page_writeback_range(inode->i_mapping,
|
||||
start >> PAGE_CACHE_SHIFT,
|
||||
orig_end >> PAGE_CACHE_SHIFT);
|
||||
|
||||
end = orig_end;
|
||||
while (1) {
|
||||
ordered = btrfs_lookup_first_ordered_extent(inode, end);
|
||||
if (!ordered)
|
||||
break;
|
||||
if (ordered->file_offset > orig_end) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
if (ordered->file_offset + ordered->len < start) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
end = ordered->file_offset;
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
if (end == 0 || end == start)
|
||||
break;
|
||||
end--;
|
||||
}
|
||||
if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
|
||||
EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
|
||||
schedule_timeout(1);
|
||||
goto again;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* find an ordered extent corresponding to file_offset. return NULL if
|
||||
* nothing is found, otherwise take a reference on the extent and return it
|
||||
*/
|
||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||
u64 file_offset)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct btrfs_ordered_extent *entry = NULL;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
mutex_lock(&tree->mutex);
|
||||
node = tree_search(tree, file_offset);
|
||||
if (!node)
|
||||
goto out;
|
||||
|
||||
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
if (!offset_in_entry(entry, file_offset))
|
||||
entry = NULL;
|
||||
if (entry)
|
||||
atomic_inc(&entry->refs);
|
||||
out:
|
||||
mutex_unlock(&tree->mutex);
|
||||
return entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup and return any extent before 'file_offset'. NULL is returned
|
||||
* if none is found
|
||||
*/
|
||||
struct btrfs_ordered_extent *
|
||||
btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct btrfs_ordered_extent *entry = NULL;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
mutex_lock(&tree->mutex);
|
||||
node = tree_search(tree, file_offset);
|
||||
if (!node)
|
||||
goto out;
|
||||
|
||||
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
atomic_inc(&entry->refs);
|
||||
out:
|
||||
mutex_unlock(&tree->mutex);
|
||||
return entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* After an extent is done, call this to conditionally update the on disk
|
||||
* i_size. i_size is updated to cover any fully written part of the file.
|
||||
*/
|
||||
int btrfs_ordered_update_i_size(struct inode *inode,
|
||||
struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
u64 disk_i_size;
|
||||
u64 new_i_size;
|
||||
u64 i_size_test;
|
||||
struct rb_node *node;
|
||||
struct btrfs_ordered_extent *test;
|
||||
|
||||
mutex_lock(&tree->mutex);
|
||||
disk_i_size = BTRFS_I(inode)->disk_i_size;
|
||||
|
||||
/*
|
||||
* if the disk i_size is already at the inode->i_size, or
|
||||
* this ordered extent is inside the disk i_size, we're done
|
||||
*/
|
||||
if (disk_i_size >= inode->i_size ||
|
||||
ordered->file_offset + ordered->len <= disk_i_size) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* we can't update the disk_isize if there are delalloc bytes
|
||||
* between disk_i_size and this ordered extent
|
||||
*/
|
||||
if (test_range_bit(io_tree, disk_i_size,
|
||||
ordered->file_offset + ordered->len - 1,
|
||||
EXTENT_DELALLOC, 0)) {
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* walk backward from this ordered extent to disk_i_size.
|
||||
* if we find an ordered extent then we can't update disk i_size
|
||||
* yet
|
||||
*/
|
||||
node = &ordered->rb_node;
|
||||
while (1) {
|
||||
node = rb_prev(node);
|
||||
if (!node)
|
||||
break;
|
||||
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
if (test->file_offset + test->len <= disk_i_size)
|
||||
break;
|
||||
if (test->file_offset >= inode->i_size)
|
||||
break;
|
||||
if (test->file_offset >= disk_i_size)
|
||||
goto out;
|
||||
}
|
||||
new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
|
||||
|
||||
/*
|
||||
* at this point, we know we can safely update i_size to at least
|
||||
* the offset from this ordered extent. But, we need to
|
||||
* walk forward and see if ios from higher up in the file have
|
||||
* finished.
|
||||
*/
|
||||
node = rb_next(&ordered->rb_node);
|
||||
i_size_test = 0;
|
||||
if (node) {
|
||||
/*
|
||||
* do we have an area where IO might have finished
|
||||
* between our ordered extent and the next one.
|
||||
*/
|
||||
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
if (test->file_offset > entry_end(ordered))
|
||||
i_size_test = test->file_offset;
|
||||
} else {
|
||||
i_size_test = i_size_read(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* i_size_test is the end of a region after this ordered
|
||||
* extent where there are no ordered extents. As long as there
|
||||
* are no delalloc bytes in this area, it is safe to update
|
||||
* disk_i_size to the end of the region.
|
||||
*/
|
||||
if (i_size_test > entry_end(ordered) &&
|
||||
!test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
|
||||
EXTENT_DELALLOC, 0)) {
|
||||
new_i_size = min_t(u64, i_size_test, i_size_read(inode));
|
||||
}
|
||||
BTRFS_I(inode)->disk_i_size = new_i_size;
|
||||
out:
|
||||
mutex_unlock(&tree->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* search the ordered extents for one corresponding to 'offset' and
|
||||
* try to find a checksum. This is used because we allow pages to
|
||||
* be reclaimed before their checksum is actually put into the btree
|
||||
*/
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u32 *sum)
|
||||
{
|
||||
struct btrfs_ordered_sum *ordered_sum;
|
||||
struct btrfs_sector_sum *sector_sums;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
|
||||
struct list_head *cur;
|
||||
unsigned long num_sectors;
|
||||
unsigned long i;
|
||||
u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
|
||||
int ret = 1;
|
||||
|
||||
ordered = btrfs_lookup_ordered_extent(inode, offset);
|
||||
if (!ordered)
|
||||
return 1;
|
||||
|
||||
mutex_lock(&tree->mutex);
|
||||
list_for_each_prev(cur, &ordered->list) {
|
||||
ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
|
||||
if (disk_bytenr >= ordered_sum->bytenr) {
|
||||
num_sectors = ordered_sum->len / sectorsize;
|
||||
sector_sums = ordered_sum->sums;
|
||||
for (i = 0; i < num_sectors; i++) {
|
||||
if (sector_sums[i].bytenr == disk_bytenr) {
|
||||
*sum = sector_sums[i].sum;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&tree->mutex);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* taken from mm/filemap.c because it isn't exported
|
||||
*
|
||||
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
|
||||
* @mapping: address space structure to write
|
||||
* @start: offset in bytes where the range starts
|
||||
* @end: offset in bytes where the range ends (inclusive)
|
||||
* @sync_mode: enable synchronous operation
|
||||
*
|
||||
* Start writeback against all of a mapping's dirty pages that lie
|
||||
* within the byte offsets <start, end> inclusive.
|
||||
*
|
||||
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
|
||||
* opposed to a regular memory cleansing writeback. The difference between
|
||||
* these two operations is that if a dirty page/buffer is encountered, it must
|
||||
* be waited upon, and not just skipped over.
|
||||
*/
|
||||
int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end, int sync_mode)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = sync_mode,
|
||||
.nr_to_write = mapping->nrpages * 2,
|
||||
.range_start = start,
|
||||
.range_end = end,
|
||||
.for_writepages = 1,
|
||||
};
|
||||
return btrfs_writepages(mapping, &wbc);
|
||||
}
|
||||
|
||||
/**
|
||||
* taken from mm/filemap.c because it isn't exported
|
||||
*
|
||||
* wait_on_page_writeback_range - wait for writeback to complete
|
||||
* @mapping: target address_space
|
||||
* @start: beginning page index
|
||||
* @end: ending page index
|
||||
*
|
||||
* Wait for writeback to complete against pages indexed by start->end
|
||||
* inclusive
|
||||
*/
|
||||
int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
int nr_pages;
|
||||
int ret = 0;
|
||||
pgoff_t index;
|
||||
|
||||
if (end < start)
|
||||
return 0;
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
index = start;
|
||||
while ((index <= end) &&
|
||||
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
|
||||
PAGECACHE_TAG_WRITEBACK,
|
||||
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
/* until radix tree lookup accepts end_index */
|
||||
if (page->index > end)
|
||||
continue;
|
||||
|
||||
wait_on_page_writeback(page);
|
||||
if (PageError(page))
|
||||
ret = -EIO;
|
||||
}
|
||||
pagevec_release(&pvec);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* Check for outstanding write errors */
|
||||
if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
|
||||
ret = -ENOSPC;
|
||||
if (test_and_clear_bit(AS_EIO, &mapping->flags))
|
||||
ret = -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_ORDERED_DATA__
|
||||
#define __BTRFS_ORDERED_DATA__
|
||||
|
||||
/* one of these per inode */
|
||||
struct btrfs_ordered_inode_tree {
|
||||
struct mutex mutex;
|
||||
struct rb_root tree;
|
||||
struct rb_node *last;
|
||||
};
|
||||
|
||||
/*
|
||||
* these are used to collect checksums done just before bios submission.
|
||||
* They are attached via a list into the ordered extent, and
|
||||
* checksum items are inserted into the tree after all the blocks in
|
||||
* the ordered extent are on disk
|
||||
*/
|
||||
struct btrfs_sector_sum {
|
||||
/* bytenr on disk */
|
||||
u64 bytenr;
|
||||
u32 sum;
|
||||
};
|
||||
|
||||
struct btrfs_ordered_sum {
|
||||
/* bytenr is the start of this extent on disk */
|
||||
u64 bytenr;
|
||||
|
||||
/*
|
||||
* this is the length in bytes covered by the sums array below.
|
||||
*/
|
||||
unsigned long len;
|
||||
struct list_head list;
|
||||
/* last field is a variable length array of btrfs_sector_sums */
|
||||
struct btrfs_sector_sum sums[];
|
||||
};
|
||||
|
||||
/*
|
||||
* bits for the flags field:
|
||||
*
|
||||
* BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
|
||||
* It is used to make sure metadata is inserted into the tree only once
|
||||
* per extent.
|
||||
*
|
||||
* BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
|
||||
* rbtree, just before waking any waiters. It is used to indicate the
|
||||
* IO is done and any metadata is inserted into the tree.
|
||||
*/
|
||||
#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
|
||||
|
||||
#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
|
||||
|
||||
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
|
||||
|
||||
#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
|
||||
|
||||
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
|
||||
|
||||
struct btrfs_ordered_extent {
|
||||
/* logical offset in the file */
|
||||
u64 file_offset;
|
||||
|
||||
/* disk byte number */
|
||||
u64 start;
|
||||
|
||||
/* ram length of the extent in bytes */
|
||||
u64 len;
|
||||
|
||||
/* extent length on disk */
|
||||
u64 disk_len;
|
||||
|
||||
/* flags (described above) */
|
||||
unsigned long flags;
|
||||
|
||||
/* reference count */
|
||||
atomic_t refs;
|
||||
|
||||
/* the inode we belong to */
|
||||
struct inode *inode;
|
||||
|
||||
/* list of checksums for insertion when the extent io is done */
|
||||
struct list_head list;
|
||||
|
||||
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
|
||||
wait_queue_head_t wait;
|
||||
|
||||
/* our friendly rbtree entry */
|
||||
struct rb_node rb_node;
|
||||
|
||||
/* a per root list of all the pending ordered extents */
|
||||
struct list_head root_extent_list;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* calculates the total size you need to allocate for an ordered sum
|
||||
* structure spanning 'bytes' in the file
|
||||
*/
|
||||
static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
|
||||
unsigned long bytes)
|
||||
{
|
||||
unsigned long num_sectors = (bytes + root->sectorsize - 1) /
|
||||
root->sectorsize;
|
||||
num_sectors++;
|
||||
return sizeof(struct btrfs_ordered_sum) +
|
||||
num_sectors * sizeof(struct btrfs_sector_sum);
|
||||
}
|
||||
|
||||
static inline void
|
||||
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
|
||||
{
|
||||
mutex_init(&t->mutex);
|
||||
t->tree.rb_node = NULL;
|
||||
t->last = NULL;
|
||||
}
|
||||
|
||||
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
|
||||
int btrfs_remove_ordered_extent(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry);
|
||||
int btrfs_dec_test_ordered_pending(struct inode *inode,
|
||||
u64 file_offset, u64 io_size);
|
||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int tyep);
|
||||
int btrfs_add_ordered_sum(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum);
|
||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||
u64 file_offset);
|
||||
void btrfs_start_ordered_extent(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry, int wait);
|
||||
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
|
||||
struct btrfs_ordered_extent *
|
||||
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
|
||||
int btrfs_ordered_update_i_size(struct inode *inode,
|
||||
struct btrfs_ordered_extent *ordered);
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
|
||||
int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end, int sync_mode);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
|
||||
#endif
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Red Hat. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
|
||||
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 offset)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret = 0;
|
||||
|
||||
key.objectid = BTRFS_ORPHAN_OBJECTID;
|
||||
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
|
||||
key.offset = offset;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
|
||||
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 offset)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret = 0;
|
||||
|
||||
key.objectid = BTRFS_ORPHAN_OBJECTID;
|
||||
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
|
||||
key.offset = offset;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
|
||||
static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
|
||||
{
|
||||
int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
|
||||
int i;
|
||||
printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
|
||||
"num_stripes %d\n",
|
||||
(unsigned long long)btrfs_chunk_length(eb, chunk),
|
||||
(unsigned long long)btrfs_chunk_owner(eb, chunk),
|
||||
(unsigned long long)btrfs_chunk_type(eb, chunk),
|
||||
num_stripes);
|
||||
for (i = 0 ; i < num_stripes ; i++) {
|
||||
printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
|
||||
(unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
|
||||
(unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
|
||||
}
|
||||
}
|
||||
static void print_dev_item(struct extent_buffer *eb,
|
||||
struct btrfs_dev_item *dev_item)
|
||||
{
|
||||
printk(KERN_INFO "\t\tdev item devid %llu "
|
||||
"total_bytes %llu bytes used %llu\n",
|
||||
(unsigned long long)btrfs_device_id(eb, dev_item),
|
||||
(unsigned long long)btrfs_device_total_bytes(eb, dev_item),
|
||||
(unsigned long long)btrfs_device_bytes_used(eb, dev_item));
|
||||
}
|
||||
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
|
||||
{
|
||||
int i;
|
||||
u32 nr = btrfs_header_nritems(l);
|
||||
struct btrfs_item *item;
|
||||
struct btrfs_extent_item *ei;
|
||||
struct btrfs_root_item *ri;
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_inode_item *ii;
|
||||
struct btrfs_block_group_item *bi;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_extent_ref *ref;
|
||||
struct btrfs_dev_extent *dev_extent;
|
||||
u32 type;
|
||||
|
||||
printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
|
||||
(unsigned long long)btrfs_header_bytenr(l), nr,
|
||||
btrfs_leaf_free_space(root, l));
|
||||
for (i = 0 ; i < nr ; i++) {
|
||||
item = btrfs_item_nr(l, i);
|
||||
btrfs_item_key_to_cpu(l, &key, i);
|
||||
type = btrfs_key_type(&key);
|
||||
printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d "
|
||||
"itemsize %d\n",
|
||||
i,
|
||||
(unsigned long long)key.objectid, type,
|
||||
(unsigned long long)key.offset,
|
||||
btrfs_item_offset(l, item), btrfs_item_size(l, item));
|
||||
switch (type) {
|
||||
case BTRFS_INODE_ITEM_KEY:
|
||||
ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
|
||||
printk(KERN_INFO "\t\tinode generation %llu size %llu "
|
||||
"mode %o\n",
|
||||
(unsigned long long)
|
||||
btrfs_inode_generation(l, ii),
|
||||
(unsigned long long)btrfs_inode_size(l, ii),
|
||||
btrfs_inode_mode(l, ii));
|
||||
break;
|
||||
case BTRFS_DIR_ITEM_KEY:
|
||||
di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
|
||||
btrfs_dir_item_key_to_cpu(l, di, &found_key);
|
||||
printk(KERN_INFO "\t\tdir oid %llu type %u\n",
|
||||
(unsigned long long)found_key.objectid,
|
||||
btrfs_dir_type(l, di));
|
||||
break;
|
||||
case BTRFS_ROOT_ITEM_KEY:
|
||||
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
|
||||
printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
|
||||
(unsigned long long)
|
||||
btrfs_disk_root_bytenr(l, ri),
|
||||
btrfs_disk_root_refs(l, ri));
|
||||
break;
|
||||
case BTRFS_EXTENT_ITEM_KEY:
|
||||
ei = btrfs_item_ptr(l, i, struct btrfs_extent_item);
|
||||
printk(KERN_INFO "\t\textent data refs %u\n",
|
||||
btrfs_extent_refs(l, ei));
|
||||
break;
|
||||
case BTRFS_EXTENT_REF_KEY:
|
||||
ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
|
||||
printk(KERN_INFO "\t\textent back ref root %llu "
|
||||
"gen %llu owner %llu num_refs %lu\n",
|
||||
(unsigned long long)btrfs_ref_root(l, ref),
|
||||
(unsigned long long)btrfs_ref_generation(l, ref),
|
||||
(unsigned long long)btrfs_ref_objectid(l, ref),
|
||||
(unsigned long)btrfs_ref_num_refs(l, ref));
|
||||
break;
|
||||
|
||||
case BTRFS_EXTENT_DATA_KEY:
|
||||
fi = btrfs_item_ptr(l, i,
|
||||
struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(l, fi) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
printk(KERN_INFO "\t\tinline extent data "
|
||||
"size %u\n",
|
||||
btrfs_file_extent_inline_len(l, fi));
|
||||
break;
|
||||
}
|
||||
printk(KERN_INFO "\t\textent data disk bytenr %llu "
|
||||
"nr %llu\n",
|
||||
(unsigned long long)
|
||||
btrfs_file_extent_disk_bytenr(l, fi),
|
||||
(unsigned long long)
|
||||
btrfs_file_extent_disk_num_bytes(l, fi));
|
||||
printk(KERN_INFO "\t\textent data offset %llu "
|
||||
"nr %llu ram %llu\n",
|
||||
(unsigned long long)
|
||||
btrfs_file_extent_offset(l, fi),
|
||||
(unsigned long long)
|
||||
btrfs_file_extent_num_bytes(l, fi),
|
||||
(unsigned long long)
|
||||
btrfs_file_extent_ram_bytes(l, fi));
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_ITEM_KEY:
|
||||
bi = btrfs_item_ptr(l, i,
|
||||
struct btrfs_block_group_item);
|
||||
printk(KERN_INFO "\t\tblock group used %llu\n",
|
||||
(unsigned long long)
|
||||
btrfs_disk_block_group_used(l, bi));
|
||||
break;
|
||||
case BTRFS_CHUNK_ITEM_KEY:
|
||||
print_chunk(l, btrfs_item_ptr(l, i,
|
||||
struct btrfs_chunk));
|
||||
break;
|
||||
case BTRFS_DEV_ITEM_KEY:
|
||||
print_dev_item(l, btrfs_item_ptr(l, i,
|
||||
struct btrfs_dev_item));
|
||||
break;
|
||||
case BTRFS_DEV_EXTENT_KEY:
|
||||
dev_extent = btrfs_item_ptr(l, i,
|
||||
struct btrfs_dev_extent);
|
||||
printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
|
||||
"\t\tchunk objectid %llu chunk offset %llu "
|
||||
"length %llu\n",
|
||||
(unsigned long long)
|
||||
btrfs_dev_extent_chunk_tree(l, dev_extent),
|
||||
(unsigned long long)
|
||||
btrfs_dev_extent_chunk_objectid(l, dev_extent),
|
||||
(unsigned long long)
|
||||
btrfs_dev_extent_chunk_offset(l, dev_extent),
|
||||
(unsigned long long)
|
||||
btrfs_dev_extent_length(l, dev_extent));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
|
||||
{
|
||||
int i; u32 nr;
|
||||
struct btrfs_key key;
|
||||
int level;
|
||||
|
||||
if (!c)
|
||||
return;
|
||||
nr = btrfs_header_nritems(c);
|
||||
level = btrfs_header_level(c);
|
||||
if (level == 0) {
|
||||
btrfs_print_leaf(root, c);
|
||||
return;
|
||||
}
|
||||
printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
|
||||
(unsigned long long)btrfs_header_bytenr(c),
|
||||
btrfs_header_level(c), nr,
|
||||
(u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
|
||||
for (i = 0; i < nr; i++) {
|
||||
btrfs_node_key_to_cpu(c, &key, i);
|
||||
printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
|
||||
i,
|
||||
(unsigned long long)key.objectid,
|
||||
key.type,
|
||||
(unsigned long long)key.offset,
|
||||
(unsigned long long)btrfs_node_blockptr(c, i));
|
||||
}
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct extent_buffer *next = read_tree_block(root,
|
||||
btrfs_node_blockptr(c, i),
|
||||
btrfs_level_size(root, level - 1),
|
||||
btrfs_node_ptr_generation(c, i));
|
||||
if (btrfs_is_leaf(next) &&
|
||||
btrfs_header_level(c) != 1)
|
||||
BUG();
|
||||
if (btrfs_header_level(next) !=
|
||||
btrfs_header_level(c) - 1)
|
||||
BUG();
|
||||
btrfs_print_tree(root, next);
|
||||
free_extent_buffer(next);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __PRINT_TREE_
|
||||
#define __PRINT_TREE_
|
||||
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
|
||||
void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
|
||||
#endif
|
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include "ctree.h"
|
||||
#include "ref-cache.h"
|
||||
#include "transaction.h"
|
||||
|
||||
/*
|
||||
* leaf refs are used to cache the information about which extents
|
||||
* a given leaf has references on. This allows us to process that leaf
|
||||
* in btrfs_drop_snapshot without needing to read it back from disk.
|
||||
*/
|
||||
|
||||
/*
|
||||
* kmalloc a leaf reference struct and update the counters for the
|
||||
* total ref cache size
|
||||
*/
|
||||
struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
|
||||
int nr_extents)
|
||||
{
|
||||
struct btrfs_leaf_ref *ref;
|
||||
size_t size = btrfs_leaf_ref_size(nr_extents);
|
||||
|
||||
ref = kmalloc(size, GFP_NOFS);
|
||||
if (ref) {
|
||||
spin_lock(&root->fs_info->ref_cache_lock);
|
||||
root->fs_info->total_ref_cache_size += size;
|
||||
spin_unlock(&root->fs_info->ref_cache_lock);
|
||||
|
||||
memset(ref, 0, sizeof(*ref));
|
||||
atomic_set(&ref->usage, 1);
|
||||
INIT_LIST_HEAD(&ref->list);
|
||||
}
|
||||
return ref;
|
||||
}
|
||||
|
||||
/*
|
||||
* free a leaf reference struct and update the counters for the
|
||||
* total ref cache size
|
||||
*/
|
||||
void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
||||
{
|
||||
if (!ref)
|
||||
return;
|
||||
WARN_ON(atomic_read(&ref->usage) == 0);
|
||||
if (atomic_dec_and_test(&ref->usage)) {
|
||||
size_t size = btrfs_leaf_ref_size(ref->nritems);
|
||||
|
||||
BUG_ON(ref->in_tree);
|
||||
kfree(ref);
|
||||
|
||||
spin_lock(&root->fs_info->ref_cache_lock);
|
||||
root->fs_info->total_ref_cache_size -= size;
|
||||
spin_unlock(&root->fs_info->ref_cache_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_leaf_ref *entry;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
|
||||
|
||||
if (bytenr < entry->bytenr)
|
||||
p = &(*p)->rb_left;
|
||||
else if (bytenr > entry->bytenr)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return parent;
|
||||
}
|
||||
|
||||
entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
|
||||
rb_link_node(node, parent, p);
|
||||
rb_insert_color(node, root);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct btrfs_leaf_ref *entry;
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
|
||||
WARN_ON(!entry->in_tree);
|
||||
|
||||
if (bytenr < entry->bytenr)
|
||||
n = n->rb_left;
|
||||
else if (bytenr > entry->bytenr)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return n;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
|
||||
int shared)
|
||||
{
|
||||
struct btrfs_leaf_ref *ref = NULL;
|
||||
struct btrfs_leaf_ref_tree *tree = root->ref_tree;
|
||||
|
||||
if (shared)
|
||||
tree = &root->fs_info->shared_ref_tree;
|
||||
if (!tree)
|
||||
return 0;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
while (!list_empty(&tree->list)) {
|
||||
ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
|
||||
BUG_ON(ref->tree != tree);
|
||||
if (ref->root_gen > max_root_gen)
|
||||
break;
|
||||
if (!xchg(&ref->in_tree, 0)) {
|
||||
cond_resched_lock(&tree->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
rb_erase(&ref->rb_node, &tree->root);
|
||||
list_del_init(&ref->list);
|
||||
|
||||
spin_unlock(&tree->lock);
|
||||
btrfs_free_leaf_ref(root, ref);
|
||||
cond_resched();
|
||||
spin_lock(&tree->lock);
|
||||
}
|
||||
spin_unlock(&tree->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* find the leaf ref for a given extent. This returns the ref struct with
|
||||
* a usage reference incremented
|
||||
*/
|
||||
struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct rb_node *rb;
|
||||
struct btrfs_leaf_ref *ref = NULL;
|
||||
struct btrfs_leaf_ref_tree *tree = root->ref_tree;
|
||||
again:
|
||||
if (tree) {
|
||||
spin_lock(&tree->lock);
|
||||
rb = tree_search(&tree->root, bytenr);
|
||||
if (rb)
|
||||
ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
|
||||
if (ref)
|
||||
atomic_inc(&ref->usage);
|
||||
spin_unlock(&tree->lock);
|
||||
if (ref)
|
||||
return ref;
|
||||
}
|
||||
if (tree != &root->fs_info->shared_ref_tree) {
|
||||
tree = &root->fs_info->shared_ref_tree;
|
||||
goto again;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* add a fully filled in leaf ref struct
|
||||
* remove all the refs older than a given root generation
|
||||
*/
|
||||
int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
|
||||
int shared)
|
||||
{
|
||||
int ret = 0;
|
||||
struct rb_node *rb;
|
||||
struct btrfs_leaf_ref_tree *tree = root->ref_tree;
|
||||
|
||||
if (shared)
|
||||
tree = &root->fs_info->shared_ref_tree;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
|
||||
if (rb) {
|
||||
ret = -EEXIST;
|
||||
} else {
|
||||
atomic_inc(&ref->usage);
|
||||
ref->tree = tree;
|
||||
ref->in_tree = 1;
|
||||
list_add_tail(&ref->list, &tree->list);
|
||||
}
|
||||
spin_unlock(&tree->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove a single leaf ref from the tree. This drops the ref held by the tree
|
||||
* only
|
||||
*/
|
||||
int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
|
||||
{
|
||||
struct btrfs_leaf_ref_tree *tree;
|
||||
|
||||
if (!xchg(&ref->in_tree, 0))
|
||||
return 0;
|
||||
|
||||
tree = ref->tree;
|
||||
spin_lock(&tree->lock);
|
||||
|
||||
rb_erase(&ref->rb_node, &tree->root);
|
||||
list_del_init(&ref->list);
|
||||
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
btrfs_free_leaf_ref(root, ref);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
#ifndef __REFCACHE__
|
||||
#define __REFCACHE__
|
||||
|
||||
struct btrfs_extent_info {
|
||||
/* bytenr and num_bytes find the extent in the extent allocation tree */
|
||||
u64 bytenr;
|
||||
u64 num_bytes;
|
||||
|
||||
/* objectid and offset find the back reference for the file */
|
||||
u64 objectid;
|
||||
u64 offset;
|
||||
};
|
||||
|
||||
struct btrfs_leaf_ref {
|
||||
struct rb_node rb_node;
|
||||
struct btrfs_leaf_ref_tree *tree;
|
||||
int in_tree;
|
||||
atomic_t usage;
|
||||
|
||||
u64 root_gen;
|
||||
u64 bytenr;
|
||||
u64 owner;
|
||||
u64 generation;
|
||||
int nritems;
|
||||
|
||||
struct list_head list;
|
||||
struct btrfs_extent_info extents[];
|
||||
};
|
||||
|
||||
static inline size_t btrfs_leaf_ref_size(int nr_extents)
|
||||
{
|
||||
return sizeof(struct btrfs_leaf_ref) +
|
||||
sizeof(struct btrfs_extent_info) * nr_extents;
|
||||
}
|
||||
|
||||
static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
|
||||
{
|
||||
tree->root.rb_node = NULL;
|
||||
INIT_LIST_HEAD(&tree->list);
|
||||
spin_lock_init(&tree->lock);
|
||||
}
|
||||
|
||||
static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
|
||||
{
|
||||
return RB_EMPTY_ROOT(&tree->root);
|
||||
}
|
||||
|
||||
void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
|
||||
struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
|
||||
int nr_extents);
|
||||
void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
|
||||
struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
|
||||
u64 bytenr);
|
||||
int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
|
||||
int shared);
|
||||
int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
|
||||
int shared);
|
||||
int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,366 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "transaction.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
|
||||
/*
|
||||
* search forward for a root, starting with objectid 'search_start'
|
||||
* if a root key is found, the objectid we find is filled into 'found_objectid'
|
||||
* and 0 is returned. < 0 is returned on error, 1 if there is nothing
|
||||
* left in the tree.
|
||||
*/
|
||||
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
|
||||
u64 *found_objectid)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key search_key;
|
||||
int ret;
|
||||
|
||||
root = root->fs_info->tree_root;
|
||||
search_key.objectid = search_start;
|
||||
search_key.type = (u8)-1;
|
||||
search_key.offset = (u64)-1;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
again:
|
||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret == 0) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
|
||||
if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
|
||||
search_key.offset++;
|
||||
btrfs_release_path(root, path);
|
||||
goto again;
|
||||
}
|
||||
ret = 0;
|
||||
*found_objectid = search_key.objectid;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup the root with the highest offset for a given objectid. The key we do
|
||||
* find is copied into 'key'. If we find something return 0, otherwise 1, < 0
|
||||
* on error.
|
||||
*/
|
||||
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
|
||||
struct btrfs_root_item *item, struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key search_key;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *l;
|
||||
int ret;
|
||||
int slot;
|
||||
|
||||
search_key.objectid = objectid;
|
||||
search_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
search_key.offset = (u64)-1;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
BUG_ON(ret == 0);
|
||||
l = path->nodes[0];
|
||||
BUG_ON(path->slots[0] == 0);
|
||||
slot = path->slots[0] - 1;
|
||||
btrfs_item_key_to_cpu(l, &found_key, slot);
|
||||
if (found_key.objectid != objectid) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
|
||||
sizeof(*item));
|
||||
memcpy(key, &found_key, sizeof(found_key));
|
||||
ret = 0;
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* copy the data in 'item' into the btree
|
||||
*/
|
||||
int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_key *key, struct btrfs_root_item
|
||||
*item)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *l;
|
||||
int ret;
|
||||
int slot;
|
||||
unsigned long ptr;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (ret != 0) {
|
||||
btrfs_print_leaf(root, path->nodes[0]);
|
||||
printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
|
||||
(unsigned long long)key->objectid, key->type,
|
||||
(unsigned long long)key->offset);
|
||||
BUG_ON(1);
|
||||
}
|
||||
|
||||
l = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
ptr = btrfs_item_ptr_offset(l, slot);
|
||||
write_extent_buffer(l, item, ptr, sizeof(*item));
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
out:
|
||||
btrfs_release_path(root, path);
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_key *key, struct btrfs_root_item
|
||||
*item)
|
||||
{
|
||||
int ret;
|
||||
ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* at mount time we want to find all the old transaction snapshots that were in
|
||||
* the process of being deleted if we crashed. This is any root item with an
|
||||
* offset lower than the latest root. They need to be queued for deletion to
|
||||
* finish what was happening when we crashed.
|
||||
*/
|
||||
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
|
||||
struct btrfs_root *latest)
|
||||
{
|
||||
struct btrfs_root *dead_root;
|
||||
struct btrfs_item *item;
|
||||
struct btrfs_root_item *ri;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
u32 nritems;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
|
||||
key.objectid = objectid;
|
||||
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
|
||||
key.offset = 0;
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
again:
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
while (1) {
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
slot = path->slots[0];
|
||||
if (slot >= nritems) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret)
|
||||
break;
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
slot = path->slots[0];
|
||||
}
|
||||
item = btrfs_item_nr(leaf, slot);
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
|
||||
goto next;
|
||||
|
||||
if (key.objectid < objectid)
|
||||
goto next;
|
||||
|
||||
if (key.objectid > objectid)
|
||||
break;
|
||||
|
||||
ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item);
|
||||
if (btrfs_disk_root_refs(leaf, ri) != 0)
|
||||
goto next;
|
||||
|
||||
memcpy(&found_key, &key, sizeof(key));
|
||||
key.offset++;
|
||||
btrfs_release_path(root, path);
|
||||
dead_root =
|
||||
btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
|
||||
&found_key);
|
||||
if (IS_ERR(dead_root)) {
|
||||
ret = PTR_ERR(dead_root);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (objectid == BTRFS_TREE_RELOC_OBJECTID)
|
||||
ret = btrfs_add_dead_reloc_root(dead_root);
|
||||
else
|
||||
ret = btrfs_add_dead_root(dead_root, latest);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto again;
|
||||
next:
|
||||
slot++;
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* drop the root item for 'key' from 'root' */
|
||||
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
u32 refs;
|
||||
struct btrfs_root_item *ri;
|
||||
struct extent_buffer *leaf;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
BUG_ON(ret != 0);
|
||||
leaf = path->nodes[0];
|
||||
ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
|
||||
|
||||
refs = btrfs_disk_root_refs(leaf, ri);
|
||||
BUG_ON(refs != 0);
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
out:
|
||||
btrfs_release_path(root, path);
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0 /* this will get used when snapshot deletion is implemented */
|
||||
int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *tree_root,
|
||||
u64 root_id, u8 type, u64 ref_id)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
struct btrfs_path *path;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
||||
key.objectid = root_id;
|
||||
key.type = type;
|
||||
key.offset = ref_id;
|
||||
|
||||
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_del_item(trans, tree_root, path);
|
||||
BUG_ON(ret);
|
||||
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int btrfs_find_root_ref(struct btrfs_root *tree_root,
|
||||
struct btrfs_path *path,
|
||||
u64 root_id, u64 ref_id)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
|
||||
key.objectid = root_id;
|
||||
key.type = BTRFS_ROOT_REF_KEY;
|
||||
key.offset = ref_id;
|
||||
|
||||
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
|
||||
* or BTRFS_ROOT_BACKREF_KEY.
|
||||
*
|
||||
* The dirid, sequence, name and name_len refer to the directory entry
|
||||
* that is referencing the root.
|
||||
*
|
||||
* For a forward ref, the root_id is the id of the tree referencing
|
||||
* the root and ref_id is the id of the subvol or snapshot.
|
||||
*
|
||||
* For a back ref the root_id is the id of the subvol or snapshot and
|
||||
* ref_id is the id of the tree referencing it.
|
||||
*/
|
||||
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *tree_root,
|
||||
u64 root_id, u8 type, u64 ref_id,
|
||||
u64 dirid, u64 sequence,
|
||||
const char *name, int name_len)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_root_ref *ref;
|
||||
struct extent_buffer *leaf;
|
||||
unsigned long ptr;
|
||||
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
||||
key.objectid = root_id;
|
||||
key.type = type;
|
||||
key.offset = ref_id;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
|
||||
sizeof(*ref) + name_len);
|
||||
BUG_ON(ret);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
|
||||
btrfs_set_root_ref_dirid(leaf, ref, dirid);
|
||||
btrfs_set_root_ref_sequence(leaf, ref, sequence);
|
||||
btrfs_set_root_ref_name_len(leaf, ref, name_len);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
write_extent_buffer(leaf, name, ptr, name_len);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/highmem.h>
|
||||
|
||||
/* this is some deeply nasty code. ctree.h has a different
|
||||
* definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
|
||||
*
|
||||
* The end result is that anyone who #includes ctree.h gets a
|
||||
* declaration for the btrfs_set_foo functions and btrfs_foo functions
|
||||
*
|
||||
* This file declares the macros and then #includes ctree.h, which results
|
||||
* in cpp creating the function here based on the template below.
|
||||
*
|
||||
* These setget functions do all the extent_buffer related mapping
|
||||
* required to efficiently read and write specific fields in the extent
|
||||
* buffers. Every pointer to metadata items in btrfs is really just
|
||||
* an unsigned long offset into the extent buffer which has been
|
||||
* cast to a specific type. This gives us all the gcc type checking.
|
||||
*
|
||||
* The extent buffer api is used to do all the kmapping and page
|
||||
* spanning work required to get extent buffers in highmem and have
|
||||
* a metadata blocksize different from the page size.
|
||||
*
|
||||
* The macro starts with a simple function prototype declaration so that
|
||||
* sparse won't complain about it being static.
|
||||
*/
|
||||
|
||||
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
|
||||
u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
|
||||
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
|
||||
u##bits btrfs_##name(struct extent_buffer *eb, \
|
||||
type *s) \
|
||||
{ \
|
||||
unsigned long part_offset = (unsigned long)s; \
|
||||
unsigned long offset = part_offset + offsetof(type, member); \
|
||||
type *p; \
|
||||
/* ugly, but we want the fast path here */ \
|
||||
if (eb->map_token && offset >= eb->map_start && \
|
||||
offset + sizeof(((type *)0)->member) <= eb->map_start + \
|
||||
eb->map_len) { \
|
||||
p = (type *)(eb->kaddr + part_offset - eb->map_start); \
|
||||
return le##bits##_to_cpu(p->member); \
|
||||
} \
|
||||
{ \
|
||||
int err; \
|
||||
char *map_token; \
|
||||
char *kaddr; \
|
||||
int unmap_on_exit = (eb->map_token == NULL); \
|
||||
unsigned long map_start; \
|
||||
unsigned long map_len; \
|
||||
u##bits res; \
|
||||
err = map_extent_buffer(eb, offset, \
|
||||
sizeof(((type *)0)->member), \
|
||||
&map_token, &kaddr, \
|
||||
&map_start, &map_len, KM_USER1); \
|
||||
if (err) { \
|
||||
__le##bits leres; \
|
||||
read_eb_member(eb, s, type, member, &leres); \
|
||||
return le##bits##_to_cpu(leres); \
|
||||
} \
|
||||
p = (type *)(kaddr + part_offset - map_start); \
|
||||
res = le##bits##_to_cpu(p->member); \
|
||||
if (unmap_on_exit) \
|
||||
unmap_extent_buffer(eb, map_token, KM_USER1); \
|
||||
return res; \
|
||||
} \
|
||||
} \
|
||||
void btrfs_set_##name(struct extent_buffer *eb, \
|
||||
type *s, u##bits val) \
|
||||
{ \
|
||||
unsigned long part_offset = (unsigned long)s; \
|
||||
unsigned long offset = part_offset + offsetof(type, member); \
|
||||
type *p; \
|
||||
/* ugly, but we want the fast path here */ \
|
||||
if (eb->map_token && offset >= eb->map_start && \
|
||||
offset + sizeof(((type *)0)->member) <= eb->map_start + \
|
||||
eb->map_len) { \
|
||||
p = (type *)(eb->kaddr + part_offset - eb->map_start); \
|
||||
p->member = cpu_to_le##bits(val); \
|
||||
return; \
|
||||
} \
|
||||
{ \
|
||||
int err; \
|
||||
char *map_token; \
|
||||
char *kaddr; \
|
||||
int unmap_on_exit = (eb->map_token == NULL); \
|
||||
unsigned long map_start; \
|
||||
unsigned long map_len; \
|
||||
err = map_extent_buffer(eb, offset, \
|
||||
sizeof(((type *)0)->member), \
|
||||
&map_token, &kaddr, \
|
||||
&map_start, &map_len, KM_USER1); \
|
||||
if (err) { \
|
||||
__le##bits val2; \
|
||||
val2 = cpu_to_le##bits(val); \
|
||||
write_eb_member(eb, s, type, member, &val2); \
|
||||
return; \
|
||||
} \
|
||||
p = (type *)(kaddr + part_offset - map_start); \
|
||||
p->member = cpu_to_le##bits(val); \
|
||||
if (unmap_on_exit) \
|
||||
unmap_extent_buffer(eb, map_token, KM_USER1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#include "ctree.h"
|
||||
|
||||
void btrfs_node_key(struct extent_buffer *eb,
|
||||
struct btrfs_disk_key *disk_key, int nr)
|
||||
{
|
||||
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
|
||||
if (eb->map_token && ptr >= eb->map_start &&
|
||||
ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
|
||||
memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
|
||||
sizeof(*disk_key));
|
||||
return;
|
||||
} else if (eb->map_token) {
|
||||
unmap_extent_buffer(eb, eb->map_token, KM_USER1);
|
||||
eb->map_token = NULL;
|
||||
}
|
||||
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
|
||||
struct btrfs_key_ptr, key, disk_key);
|
||||
}
|
|
@ -0,0 +1,720 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/statfs.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/parser.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/version.h>
|
||||
#include "compat.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "ioctl.h"
|
||||
#include "print-tree.h"
|
||||
#include "xattr.h"
|
||||
#include "volumes.h"
|
||||
#include "version.h"
|
||||
#include "export.h"
|
||||
#include "compression.h"
|
||||
|
||||
#define BTRFS_SUPER_MAGIC 0x9123683E
|
||||
|
||||
static struct super_operations btrfs_super_ops;
|
||||
|
||||
static void btrfs_put_super(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(sb);
|
||||
int ret;
|
||||
|
||||
ret = close_ctree(root);
|
||||
sb->s_fs_info = NULL;
|
||||
}
|
||||
|
||||
enum {
|
||||
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
|
||||
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
|
||||
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err,
|
||||
};
|
||||
|
||||
static match_table_t tokens = {
|
||||
{Opt_degraded, "degraded"},
|
||||
{Opt_subvol, "subvol=%s"},
|
||||
{Opt_device, "device=%s"},
|
||||
{Opt_nodatasum, "nodatasum"},
|
||||
{Opt_nodatacow, "nodatacow"},
|
||||
{Opt_nobarrier, "nobarrier"},
|
||||
{Opt_max_extent, "max_extent=%s"},
|
||||
{Opt_max_inline, "max_inline=%s"},
|
||||
{Opt_alloc_start, "alloc_start=%s"},
|
||||
{Opt_thread_pool, "thread_pool=%d"},
|
||||
{Opt_compress, "compress"},
|
||||
{Opt_ssd, "ssd"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
u64 btrfs_parse_size(char *str)
|
||||
{
|
||||
u64 res;
|
||||
int mult = 1;
|
||||
char *end;
|
||||
char last;
|
||||
|
||||
res = simple_strtoul(str, &end, 10);
|
||||
|
||||
last = end[0];
|
||||
if (isalpha(last)) {
|
||||
last = tolower(last);
|
||||
switch (last) {
|
||||
case 'g':
|
||||
mult *= 1024;
|
||||
case 'm':
|
||||
mult *= 1024;
|
||||
case 'k':
|
||||
mult *= 1024;
|
||||
}
|
||||
res = res * mult;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular mount options parser. Everything that is needed only when
|
||||
* reading in a new superblock is parsed here.
|
||||
*/
|
||||
int btrfs_parse_options(struct btrfs_root *root, char *options)
|
||||
{
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char *p, *num;
|
||||
int intarg;
|
||||
|
||||
if (!options)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* strsep changes the string, duplicate it because parse_options
|
||||
* gets called twice
|
||||
*/
|
||||
options = kstrdup(options, GFP_NOFS);
|
||||
if (!options)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
while ((p = strsep(&options, ",")) != NULL) {
|
||||
int token;
|
||||
if (!*p)
|
||||
continue;
|
||||
|
||||
token = match_token(p, tokens, args);
|
||||
switch (token) {
|
||||
case Opt_degraded:
|
||||
printk(KERN_INFO "btrfs: allowing degraded mounts\n");
|
||||
btrfs_set_opt(info->mount_opt, DEGRADED);
|
||||
break;
|
||||
case Opt_subvol:
|
||||
case Opt_device:
|
||||
/*
|
||||
* These are parsed by btrfs_parse_early_options
|
||||
* and can be happily ignored here.
|
||||
*/
|
||||
break;
|
||||
case Opt_nodatasum:
|
||||
printk(KERN_INFO "btrfs: setting nodatacsum\n");
|
||||
btrfs_set_opt(info->mount_opt, NODATASUM);
|
||||
break;
|
||||
case Opt_nodatacow:
|
||||
printk(KERN_INFO "btrfs: setting nodatacow\n");
|
||||
btrfs_set_opt(info->mount_opt, NODATACOW);
|
||||
btrfs_set_opt(info->mount_opt, NODATASUM);
|
||||
break;
|
||||
case Opt_compress:
|
||||
printk(KERN_INFO "btrfs: use compression\n");
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
break;
|
||||
case Opt_ssd:
|
||||
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
|
||||
btrfs_set_opt(info->mount_opt, SSD);
|
||||
break;
|
||||
case Opt_nobarrier:
|
||||
printk(KERN_INFO "btrfs: turning off barriers\n");
|
||||
btrfs_set_opt(info->mount_opt, NOBARRIER);
|
||||
break;
|
||||
case Opt_thread_pool:
|
||||
intarg = 0;
|
||||
match_int(&args[0], &intarg);
|
||||
if (intarg) {
|
||||
info->thread_pool_size = intarg;
|
||||
printk(KERN_INFO "btrfs: thread pool %d\n",
|
||||
info->thread_pool_size);
|
||||
}
|
||||
break;
|
||||
case Opt_max_extent:
|
||||
num = match_strdup(&args[0]);
|
||||
if (num) {
|
||||
info->max_extent = btrfs_parse_size(num);
|
||||
kfree(num);
|
||||
|
||||
info->max_extent = max_t(u64,
|
||||
info->max_extent, root->sectorsize);
|
||||
printk(KERN_INFO "btrfs: max_extent at %llu\n",
|
||||
info->max_extent);
|
||||
}
|
||||
break;
|
||||
case Opt_max_inline:
|
||||
num = match_strdup(&args[0]);
|
||||
if (num) {
|
||||
info->max_inline = btrfs_parse_size(num);
|
||||
kfree(num);
|
||||
|
||||
if (info->max_inline) {
|
||||
info->max_inline = max_t(u64,
|
||||
info->max_inline,
|
||||
root->sectorsize);
|
||||
}
|
||||
printk(KERN_INFO "btrfs: max_inline at %llu\n",
|
||||
info->max_inline);
|
||||
}
|
||||
break;
|
||||
case Opt_alloc_start:
|
||||
num = match_strdup(&args[0]);
|
||||
if (num) {
|
||||
info->alloc_start = btrfs_parse_size(num);
|
||||
kfree(num);
|
||||
printk(KERN_INFO
|
||||
"btrfs: allocations start at %llu\n",
|
||||
info->alloc_start);
|
||||
}
|
||||
break;
|
||||
case Opt_noacl:
|
||||
root->fs_info->sb->s_flags &= ~MS_POSIXACL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
kfree(options);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse mount options that are required early in the mount process.
|
||||
*
|
||||
* All other options will be parsed on much later in the mount process and
|
||||
* only when we need to allocate a new super block.
|
||||
*/
|
||||
static int btrfs_parse_early_options(const char *options, fmode_t flags,
|
||||
void *holder, char **subvol_name,
|
||||
struct btrfs_fs_devices **fs_devices)
|
||||
{
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char *opts, *p;
|
||||
int error = 0;
|
||||
|
||||
if (!options)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* strsep changes the string, duplicate it because parse_options
|
||||
* gets called twice
|
||||
*/
|
||||
opts = kstrdup(options, GFP_KERNEL);
|
||||
if (!opts)
|
||||
return -ENOMEM;
|
||||
|
||||
while ((p = strsep(&opts, ",")) != NULL) {
|
||||
int token;
|
||||
if (!*p)
|
||||
continue;
|
||||
|
||||
token = match_token(p, tokens, args);
|
||||
switch (token) {
|
||||
case Opt_subvol:
|
||||
*subvol_name = match_strdup(&args[0]);
|
||||
break;
|
||||
case Opt_device:
|
||||
error = btrfs_scan_one_device(match_strdup(&args[0]),
|
||||
flags, holder, fs_devices);
|
||||
if (error)
|
||||
goto out_free_opts;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out_free_opts:
|
||||
kfree(opts);
|
||||
out:
|
||||
/*
|
||||
* If no subvolume name is specified we use the default one. Allocate
|
||||
* a copy of the string "." here so that code later in the
|
||||
* mount path doesn't care if it's the default volume or another one.
|
||||
*/
|
||||
if (!*subvol_name) {
|
||||
*subvol_name = kstrdup(".", GFP_KERNEL);
|
||||
if (!*subvol_name)
|
||||
return -ENOMEM;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
static int btrfs_fill_super(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
void *data, int silent)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct dentry *root_dentry;
|
||||
struct btrfs_super_block *disk_super;
|
||||
struct btrfs_root *tree_root;
|
||||
struct btrfs_inode *bi;
|
||||
int err;
|
||||
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_magic = BTRFS_SUPER_MAGIC;
|
||||
sb->s_op = &btrfs_super_ops;
|
||||
sb->s_export_op = &btrfs_export_ops;
|
||||
sb->s_xattr = btrfs_xattr_handlers;
|
||||
sb->s_time_gran = 1;
|
||||
sb->s_flags |= MS_POSIXACL;
|
||||
|
||||
tree_root = open_ctree(sb, fs_devices, (char *)data);
|
||||
|
||||
if (IS_ERR(tree_root)) {
|
||||
printk("btrfs: open_ctree failed\n");
|
||||
return PTR_ERR(tree_root);
|
||||
}
|
||||
sb->s_fs_info = tree_root;
|
||||
disk_super = &tree_root->fs_info->super_copy;
|
||||
inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
|
||||
tree_root->fs_info->fs_root);
|
||||
bi = BTRFS_I(inode);
|
||||
bi->location.objectid = inode->i_ino;
|
||||
bi->location.offset = 0;
|
||||
bi->root = tree_root->fs_info->fs_root;
|
||||
|
||||
btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
|
||||
|
||||
if (!inode) {
|
||||
err = -ENOMEM;
|
||||
goto fail_close;
|
||||
}
|
||||
if (inode->i_state & I_NEW) {
|
||||
btrfs_read_locked_inode(inode);
|
||||
unlock_new_inode(inode);
|
||||
}
|
||||
|
||||
root_dentry = d_alloc_root(inode);
|
||||
if (!root_dentry) {
|
||||
iput(inode);
|
||||
err = -ENOMEM;
|
||||
goto fail_close;
|
||||
}
|
||||
#if 0
|
||||
/* this does the super kobj at the same time */
|
||||
err = btrfs_sysfs_add_super(tree_root->fs_info);
|
||||
if (err)
|
||||
goto fail_close;
|
||||
#endif
|
||||
|
||||
sb->s_root = root_dentry;
|
||||
|
||||
save_mount_options(sb, data);
|
||||
return 0;
|
||||
|
||||
fail_close:
|
||||
close_ctree(tree_root);
|
||||
return err;
|
||||
}
|
||||
|
||||
int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_root *root;
|
||||
int ret;
|
||||
root = btrfs_sb(sb);
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return 0;
|
||||
|
||||
sb->s_dirt = 0;
|
||||
if (!wait) {
|
||||
filemap_flush(root->fs_info->btree_inode->i_mapping);
|
||||
return 0;
|
||||
}
|
||||
|
||||
btrfs_start_delalloc_inodes(root);
|
||||
btrfs_wait_ordered_extents(root, 0);
|
||||
|
||||
btrfs_clean_old_snapshots(root);
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
ret = btrfs_commit_transaction(trans, root);
|
||||
sb->s_dirt = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_write_super(struct super_block *sb)
|
||||
{
|
||||
sb->s_dirt = 0;
|
||||
}
|
||||
|
||||
static int btrfs_test_super(struct super_block *s, void *data)
|
||||
{
|
||||
struct btrfs_fs_devices *test_fs_devices = data;
|
||||
struct btrfs_root *root = btrfs_sb(s);
|
||||
|
||||
return root->fs_info->fs_devices == test_fs_devices;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a superblock for the given device / mount point.
|
||||
*
|
||||
* Note: This is based on get_sb_bdev from fs/super.c with a few additions
|
||||
* for multiple device setup. Make sure to keep it in sync.
|
||||
*/
|
||||
static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
|
||||
const char *dev_name, void *data, struct vfsmount *mnt)
|
||||
{
|
||||
char *subvol_name = NULL;
|
||||
struct block_device *bdev = NULL;
|
||||
struct super_block *s;
|
||||
struct dentry *root;
|
||||
struct btrfs_fs_devices *fs_devices = NULL;
|
||||
fmode_t mode = FMODE_READ;
|
||||
int error = 0;
|
||||
|
||||
if (!(flags & MS_RDONLY))
|
||||
mode |= FMODE_WRITE;
|
||||
|
||||
error = btrfs_parse_early_options(data, mode, fs_type,
|
||||
&subvol_name, &fs_devices);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
|
||||
if (error)
|
||||
goto error_free_subvol_name;
|
||||
|
||||
error = btrfs_open_devices(fs_devices, mode, fs_type);
|
||||
if (error)
|
||||
goto error_free_subvol_name;
|
||||
|
||||
if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
|
||||
error = -EACCES;
|
||||
goto error_close_devices;
|
||||
}
|
||||
|
||||
bdev = fs_devices->latest_bdev;
|
||||
s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
|
||||
if (IS_ERR(s))
|
||||
goto error_s;
|
||||
|
||||
if (s->s_root) {
|
||||
if ((flags ^ s->s_flags) & MS_RDONLY) {
|
||||
up_write(&s->s_umount);
|
||||
deactivate_super(s);
|
||||
error = -EBUSY;
|
||||
goto error_close_devices;
|
||||
}
|
||||
|
||||
btrfs_close_devices(fs_devices);
|
||||
} else {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
s->s_flags = flags;
|
||||
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
|
||||
error = btrfs_fill_super(s, fs_devices, data,
|
||||
flags & MS_SILENT ? 1 : 0);
|
||||
if (error) {
|
||||
up_write(&s->s_umount);
|
||||
deactivate_super(s);
|
||||
goto error_free_subvol_name;
|
||||
}
|
||||
|
||||
btrfs_sb(s)->fs_info->bdev_holder = fs_type;
|
||||
s->s_flags |= MS_ACTIVE;
|
||||
}
|
||||
|
||||
if (!strcmp(subvol_name, "."))
|
||||
root = dget(s->s_root);
|
||||
else {
|
||||
mutex_lock(&s->s_root->d_inode->i_mutex);
|
||||
root = lookup_one_len(subvol_name, s->s_root,
|
||||
strlen(subvol_name));
|
||||
mutex_unlock(&s->s_root->d_inode->i_mutex);
|
||||
|
||||
if (IS_ERR(root)) {
|
||||
up_write(&s->s_umount);
|
||||
deactivate_super(s);
|
||||
error = PTR_ERR(root);
|
||||
goto error_free_subvol_name;
|
||||
}
|
||||
if (!root->d_inode) {
|
||||
dput(root);
|
||||
up_write(&s->s_umount);
|
||||
deactivate_super(s);
|
||||
error = -ENXIO;
|
||||
goto error_free_subvol_name;
|
||||
}
|
||||
}
|
||||
|
||||
mnt->mnt_sb = s;
|
||||
mnt->mnt_root = root;
|
||||
|
||||
kfree(subvol_name);
|
||||
return 0;
|
||||
|
||||
error_s:
|
||||
error = PTR_ERR(s);
|
||||
error_close_devices:
|
||||
btrfs_close_devices(fs_devices);
|
||||
error_free_subvol_name:
|
||||
kfree(subvol_name);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(sb);
|
||||
int ret;
|
||||
|
||||
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
|
||||
return 0;
|
||||
|
||||
if (*flags & MS_RDONLY) {
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
|
||||
ret = btrfs_commit_super(root);
|
||||
WARN_ON(ret);
|
||||
} else {
|
||||
if (root->fs_info->fs_devices->rw_devices == 0)
|
||||
return -EACCES;
|
||||
|
||||
if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = btrfs_cleanup_reloc_trees(root);
|
||||
WARN_ON(ret);
|
||||
|
||||
ret = btrfs_cleanup_fs_roots(root->fs_info);
|
||||
WARN_ON(ret);
|
||||
|
||||
sb->s_flags &= ~MS_RDONLY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(dentry->d_sb);
|
||||
struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
|
||||
int bits = dentry->d_sb->s_blocksize_bits;
|
||||
__be32 *fsid = (__be32 *)root->fs_info->fsid;
|
||||
|
||||
buf->f_namelen = BTRFS_NAME_LEN;
|
||||
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
|
||||
buf->f_bfree = buf->f_blocks -
|
||||
(btrfs_super_bytes_used(disk_super) >> bits);
|
||||
buf->f_bavail = buf->f_bfree;
|
||||
buf->f_bsize = dentry->d_sb->s_blocksize;
|
||||
buf->f_type = BTRFS_SUPER_MAGIC;
|
||||
|
||||
/* We treat it as constant endianness (it doesn't matter _which_)
|
||||
because we want the fsid to come out the same whether mounted
|
||||
on a big-endian or little-endian host */
|
||||
buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
|
||||
buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
|
||||
/* Mask in the root object ID too, to disambiguate subvols */
|
||||
buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
|
||||
buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file_system_type btrfs_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "btrfs",
|
||||
.get_sb = btrfs_get_sb,
|
||||
.kill_sb = kill_anon_super,
|
||||
.fs_flags = FS_REQUIRES_DEV,
|
||||
};
|
||||
|
||||
/*
|
||||
* used by btrfsctl to scan devices when no FS is mounted
|
||||
*/
|
||||
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct btrfs_ioctl_vol_args *vol;
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
int ret = 0;
|
||||
int len;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
vol = kmalloc(sizeof(*vol), GFP_KERNEL);
|
||||
if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
len = strnlen(vol->name, BTRFS_PATH_NAME_MAX);
|
||||
switch (cmd) {
|
||||
case BTRFS_IOC_SCAN_DEV:
|
||||
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
|
||||
&btrfs_fs_type, &fs_devices);
|
||||
break;
|
||||
}
|
||||
out:
|
||||
kfree(vol);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_write_super_lockfs(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(sb);
|
||||
mutex_lock(&root->fs_info->transaction_kthread_mutex);
|
||||
mutex_lock(&root->fs_info->cleaner_mutex);
|
||||
}
|
||||
|
||||
static void btrfs_unlockfs(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(sb);
|
||||
mutex_unlock(&root->fs_info->cleaner_mutex);
|
||||
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
|
||||
}
|
||||
|
||||
static struct super_operations btrfs_super_ops = {
|
||||
.delete_inode = btrfs_delete_inode,
|
||||
.put_super = btrfs_put_super,
|
||||
.write_super = btrfs_write_super,
|
||||
.sync_fs = btrfs_sync_fs,
|
||||
.show_options = generic_show_options,
|
||||
.write_inode = btrfs_write_inode,
|
||||
.dirty_inode = btrfs_dirty_inode,
|
||||
.alloc_inode = btrfs_alloc_inode,
|
||||
.destroy_inode = btrfs_destroy_inode,
|
||||
.statfs = btrfs_statfs,
|
||||
.remount_fs = btrfs_remount,
|
||||
.write_super_lockfs = btrfs_write_super_lockfs,
|
||||
.unlockfs = btrfs_unlockfs,
|
||||
};
|
||||
|
||||
static const struct file_operations btrfs_ctl_fops = {
|
||||
.unlocked_ioctl = btrfs_control_ioctl,
|
||||
.compat_ioctl = btrfs_control_ioctl,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static struct miscdevice btrfs_misc = {
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.name = "btrfs-control",
|
||||
.fops = &btrfs_ctl_fops
|
||||
};
|
||||
|
||||
static int btrfs_interface_init(void)
|
||||
{
|
||||
return misc_register(&btrfs_misc);
|
||||
}
|
||||
|
||||
static void btrfs_interface_exit(void)
|
||||
{
|
||||
if (misc_deregister(&btrfs_misc) < 0)
|
||||
printk(KERN_INFO "misc_deregister failed for control device");
|
||||
}
|
||||
|
||||
static int __init init_btrfs_fs(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = btrfs_init_sysfs();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = btrfs_init_cachep();
|
||||
if (err)
|
||||
goto free_sysfs;
|
||||
|
||||
err = extent_io_init();
|
||||
if (err)
|
||||
goto free_cachep;
|
||||
|
||||
err = extent_map_init();
|
||||
if (err)
|
||||
goto free_extent_io;
|
||||
|
||||
err = btrfs_interface_init();
|
||||
if (err)
|
||||
goto free_extent_map;
|
||||
|
||||
err = register_filesystem(&btrfs_fs_type);
|
||||
if (err)
|
||||
goto unregister_ioctl;
|
||||
|
||||
printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
|
||||
return 0;
|
||||
|
||||
unregister_ioctl:
|
||||
btrfs_interface_exit();
|
||||
free_extent_map:
|
||||
extent_map_exit();
|
||||
free_extent_io:
|
||||
extent_io_exit();
|
||||
free_cachep:
|
||||
btrfs_destroy_cachep();
|
||||
free_sysfs:
|
||||
btrfs_exit_sysfs();
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit exit_btrfs_fs(void)
|
||||
{
|
||||
btrfs_destroy_cachep();
|
||||
extent_map_exit();
|
||||
extent_io_exit();
|
||||
btrfs_interface_exit();
|
||||
unregister_filesystem(&btrfs_fs_type);
|
||||
btrfs_exit_sysfs();
|
||||
btrfs_cleanup_fs_uuids();
|
||||
btrfs_zlib_exit();
|
||||
}
|
||||
|
||||
module_init(init_btrfs_fs)
|
||||
module_exit(exit_btrfs_fs)
|
||||
|
||||
MODULE_LICENSE("GPL");
|
|
@ -0,0 +1,269 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kobject.h>
|
||||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
||||
static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long)btrfs_root_used(&root->root_item));
|
||||
}
|
||||
|
||||
static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long)btrfs_root_limit(&root->root_item));
|
||||
}
|
||||
|
||||
static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
|
||||
{
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
|
||||
}
|
||||
|
||||
static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
|
||||
}
|
||||
|
||||
static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
|
||||
}
|
||||
|
||||
/* this is for root attrs (subvols/snapshots) */
|
||||
struct btrfs_root_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct btrfs_root *, char *);
|
||||
ssize_t (*store)(struct btrfs_root *, const char *, size_t);
|
||||
};
|
||||
|
||||
#define ROOT_ATTR(name, mode, show, store) \
|
||||
static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
|
||||
show, store)
|
||||
|
||||
ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
|
||||
ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
|
||||
|
||||
static struct attribute *btrfs_root_attrs[] = {
|
||||
&btrfs_root_attr_blocks_used.attr,
|
||||
&btrfs_root_attr_block_limit.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* this is for super attrs (actual full fs) */
|
||||
struct btrfs_super_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct btrfs_fs_info *, char *);
|
||||
ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
|
||||
};
|
||||
|
||||
#define SUPER_ATTR(name, mode, show, store) \
|
||||
static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
|
||||
show, store)
|
||||
|
||||
SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
|
||||
SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
|
||||
SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
|
||||
|
||||
static struct attribute *btrfs_super_attrs[] = {
|
||||
&btrfs_super_attr_blocks_used.attr,
|
||||
&btrfs_super_attr_total_blocks.attr,
|
||||
&btrfs_super_attr_blocksize.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static ssize_t btrfs_super_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr, char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
|
||||
super_kobj);
|
||||
struct btrfs_super_attr *a = container_of(attr,
|
||||
struct btrfs_super_attr,
|
||||
attr);
|
||||
|
||||
return a->show ? a->show(fs, buf) : 0;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_super_attr_store(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
|
||||
super_kobj);
|
||||
struct btrfs_super_attr *a = container_of(attr,
|
||||
struct btrfs_super_attr,
|
||||
attr);
|
||||
|
||||
return a->store ? a->store(fs, buf, len) : 0;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_root_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr, char *buf)
|
||||
{
|
||||
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
|
||||
root_kobj);
|
||||
struct btrfs_root_attr *a = container_of(attr,
|
||||
struct btrfs_root_attr,
|
||||
attr);
|
||||
|
||||
return a->show ? a->show(root, buf) : 0;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_root_attr_store(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
|
||||
root_kobj);
|
||||
struct btrfs_root_attr *a = container_of(attr,
|
||||
struct btrfs_root_attr,
|
||||
attr);
|
||||
return a->store ? a->store(root, buf, len) : 0;
|
||||
}
|
||||
|
||||
static void btrfs_super_release(struct kobject *kobj)
|
||||
{
|
||||
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
|
||||
super_kobj);
|
||||
complete(&fs->kobj_unregister);
|
||||
}
|
||||
|
||||
static void btrfs_root_release(struct kobject *kobj)
|
||||
{
|
||||
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
|
||||
root_kobj);
|
||||
complete(&root->kobj_unregister);
|
||||
}
|
||||
|
||||
static struct sysfs_ops btrfs_super_attr_ops = {
|
||||
.show = btrfs_super_attr_show,
|
||||
.store = btrfs_super_attr_store,
|
||||
};
|
||||
|
||||
static struct sysfs_ops btrfs_root_attr_ops = {
|
||||
.show = btrfs_root_attr_show,
|
||||
.store = btrfs_root_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type btrfs_root_ktype = {
|
||||
.default_attrs = btrfs_root_attrs,
|
||||
.sysfs_ops = &btrfs_root_attr_ops,
|
||||
.release = btrfs_root_release,
|
||||
};
|
||||
|
||||
static struct kobj_type btrfs_super_ktype = {
|
||||
.default_attrs = btrfs_super_attrs,
|
||||
.sysfs_ops = &btrfs_super_attr_ops,
|
||||
.release = btrfs_super_release,
|
||||
};
|
||||
|
||||
/* /sys/fs/btrfs/ entry */
|
||||
static struct kset *btrfs_kset;
|
||||
|
||||
int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
|
||||
{
|
||||
int error;
|
||||
char *name;
|
||||
char c;
|
||||
int len = strlen(fs->sb->s_id) + 1;
|
||||
int i;
|
||||
|
||||
name = kmalloc(len, GFP_NOFS);
|
||||
if (!name) {
|
||||
error = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
c = fs->sb->s_id[i];
|
||||
if (c == '/' || c == '\\')
|
||||
c = '!';
|
||||
name[i] = c;
|
||||
}
|
||||
name[len] = '\0';
|
||||
|
||||
fs->super_kobj.kset = btrfs_kset;
|
||||
error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
|
||||
NULL, "%s", name);
|
||||
kfree(name);
|
||||
if (error)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
int btrfs_sysfs_add_root(struct btrfs_root *root)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
|
||||
&root->fs_info->super_kobj,
|
||||
"%s", root->name);
|
||||
if (error)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
void btrfs_sysfs_del_root(struct btrfs_root *root)
|
||||
{
|
||||
kobject_put(&root->root_kobj);
|
||||
wait_for_completion(&root->kobj_unregister);
|
||||
}
|
||||
|
||||
void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
|
||||
{
|
||||
kobject_put(&fs->super_kobj);
|
||||
wait_for_completion(&fs->kobj_unregister);
|
||||
}
|
||||
|
||||
int btrfs_init_sysfs(void)
|
||||
{
|
||||
btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
|
||||
if (!btrfs_kset)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_exit_sysfs(void)
|
||||
{
|
||||
kset_unregister(btrfs_kset);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_TRANSACTION__
|
||||
#define __BTRFS_TRANSACTION__
|
||||
#include "btrfs_inode.h"
|
||||
|
||||
struct btrfs_transaction {
|
||||
u64 transid;
|
||||
unsigned long num_writers;
|
||||
unsigned long num_joined;
|
||||
int in_commit;
|
||||
int use_count;
|
||||
int commit_done;
|
||||
int blocked;
|
||||
struct list_head list;
|
||||
struct extent_io_tree dirty_pages;
|
||||
unsigned long start_time;
|
||||
wait_queue_head_t writer_wait;
|
||||
wait_queue_head_t commit_wait;
|
||||
struct list_head pending_snapshots;
|
||||
};
|
||||
|
||||
struct btrfs_trans_handle {
|
||||
u64 transid;
|
||||
unsigned long blocks_reserved;
|
||||
unsigned long blocks_used;
|
||||
struct btrfs_transaction *transaction;
|
||||
u64 block_group;
|
||||
u64 alloc_exclude_start;
|
||||
u64 alloc_exclude_nr;
|
||||
};
|
||||
|
||||
struct btrfs_pending_snapshot {
|
||||
struct dentry *dentry;
|
||||
struct btrfs_root *root;
|
||||
char *name;
|
||||
struct btrfs_key root_key;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct btrfs_dirty_root {
|
||||
struct list_head list;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_root *latest_root;
|
||||
};
|
||||
|
||||
static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode)
|
||||
{
|
||||
trans->block_group = BTRFS_I(inode)->block_group;
|
||||
}
|
||||
|
||||
static inline void btrfs_update_inode_block_group(
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct inode *inode)
|
||||
{
|
||||
BTRFS_I(inode)->block_group = trans->block_group;
|
||||
}
|
||||
|
||||
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode)
|
||||
{
|
||||
BTRFS_I(inode)->last_trans = trans->transaction->transid;
|
||||
}
|
||||
|
||||
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
|
||||
int num_blocks);
|
||||
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
|
||||
int num_blocks);
|
||||
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
|
||||
int num_blocks);
|
||||
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
|
||||
int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
|
||||
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
|
||||
int btrfs_clean_old_snapshots(struct btrfs_root *root);
|
||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_throttle(struct btrfs_root *root);
|
||||
int btrfs_record_root_in_trans(struct btrfs_root *root);
|
||||
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
|
||||
struct extent_io_tree *dirty_pages);
|
||||
#endif
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
#include "transaction.h"
|
||||
#include "locking.h"
|
||||
|
||||
/* defrag all the leaves in a given btree. If cache_only == 1, don't read
|
||||
* things from disk, otherwise read all the leaves and try to get key order to
|
||||
* better reflect disk order
|
||||
*/
|
||||
|
||||
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, int cache_only)
|
||||
{
|
||||
struct btrfs_path *path = NULL;
|
||||
struct btrfs_key key;
|
||||
int ret = 0;
|
||||
int wret;
|
||||
int level;
|
||||
int orig_level;
|
||||
int is_extent = 0;
|
||||
int next_key_ret = 0;
|
||||
u64 last_ret = 0;
|
||||
u64 min_trans = 0;
|
||||
|
||||
if (cache_only)
|
||||
goto out;
|
||||
|
||||
if (root->fs_info->extent_root == root) {
|
||||
/*
|
||||
* there's recursion here right now in the tree locking,
|
||||
* we can't defrag the extent root without deadlock
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (root->ref_cows == 0 && !is_extent)
|
||||
goto out;
|
||||
|
||||
if (btrfs_test_opt(root, SSD))
|
||||
goto out;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
level = btrfs_header_level(root->node);
|
||||
orig_level = level;
|
||||
|
||||
if (level == 0)
|
||||
goto out;
|
||||
|
||||
if (root->defrag_progress.objectid == 0) {
|
||||
struct extent_buffer *root_node;
|
||||
u32 nritems;
|
||||
|
||||
root_node = btrfs_lock_root_node(root);
|
||||
nritems = btrfs_header_nritems(root_node);
|
||||
root->defrag_max.objectid = 0;
|
||||
/* from above we know this is not a leaf */
|
||||
btrfs_node_key_to_cpu(root_node, &root->defrag_max,
|
||||
nritems - 1);
|
||||
btrfs_tree_unlock(root_node);
|
||||
free_extent_buffer(root_node);
|
||||
memset(&key, 0, sizeof(key));
|
||||
} else {
|
||||
memcpy(&key, &root->defrag_progress, sizeof(key));
|
||||
}
|
||||
|
||||
path->keep_locks = 1;
|
||||
if (cache_only)
|
||||
min_trans = root->defrag_trans_start;
|
||||
|
||||
ret = btrfs_search_forward(root, &key, NULL, path,
|
||||
cache_only, min_trans);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(root, path);
|
||||
wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
|
||||
|
||||
if (wret < 0) {
|
||||
ret = wret;
|
||||
goto out;
|
||||
}
|
||||
if (!path->nodes[1]) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
path->slots[1] = btrfs_header_nritems(path->nodes[1]);
|
||||
next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
|
||||
min_trans);
|
||||
ret = btrfs_realloc_node(trans, root,
|
||||
path->nodes[1], 0,
|
||||
cache_only, &last_ret,
|
||||
&root->defrag_progress);
|
||||
WARN_ON(ret && ret != -EAGAIN);
|
||||
if (next_key_ret == 0) {
|
||||
memcpy(&root->defrag_progress, &key, sizeof(key));
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
btrfs_release_path(root, path);
|
||||
if (is_extent)
|
||||
btrfs_extent_post_op(trans, root);
|
||||
out:
|
||||
if (path)
|
||||
btrfs_free_path(path);
|
||||
if (ret == -EAGAIN) {
|
||||
if (root->defrag_max.objectid > root->defrag_progress.objectid)
|
||||
goto done;
|
||||
if (root->defrag_max.type > root->defrag_progress.type)
|
||||
goto done;
|
||||
if (root->defrag_max.offset > root->defrag_progress.offset)
|
||||
goto done;
|
||||
ret = 0;
|
||||
}
|
||||
done:
|
||||
if (ret != -EAGAIN) {
|
||||
memset(&root->defrag_progress, 0,
|
||||
sizeof(root->defrag_progress));
|
||||
root->defrag_trans_start = trans->transid;
|
||||
}
|
||||
return ret;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __TREE_LOG_
|
||||
#define __TREE_LOG_
|
||||
|
||||
int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
|
||||
int btrfs_log_dentry(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct dentry *dentry);
|
||||
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
|
||||
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct dentry *dentry);
|
||||
int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct inode *inode,
|
||||
int inode_only);
|
||||
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct inode *dir, u64 index);
|
||||
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct inode *inode, u64 dirid);
|
||||
#endif
|
|
@ -0,0 +1,4 @@
|
|||
#ifndef __BTRFS_VERSION_H
|
||||
#define __BTRFS_VERSION_H
|
||||
#define BTRFS_BUILD_VERSION "Btrfs"
|
||||
#endif
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# determine-version -- report a useful version for releases
|
||||
#
|
||||
# Copyright 2008, Aron Griffis <agriffis@n01se.net>
|
||||
# Copyright 2008, Oracle
|
||||
# Released under the GNU GPLv2
|
||||
|
||||
v="v0.16"
|
||||
|
||||
which git &> /dev/null
|
||||
if [ $? == 0 ]; then
|
||||
git branch >& /dev/null
|
||||
if [ $? == 0 ]; then
|
||||
if head=`git rev-parse --verify HEAD 2>/dev/null`; then
|
||||
if tag=`git describe --tags 2>/dev/null`; then
|
||||
v="$tag"
|
||||
fi
|
||||
|
||||
# Are there uncommitted changes?
|
||||
git update-index --refresh --unmerged > /dev/null
|
||||
if git diff-index --name-only HEAD | \
|
||||
grep -v "^scripts/package" \
|
||||
| read dummy; then
|
||||
v="$v"-dirty
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "#ifndef __BUILD_VERSION" > .build-version.h
|
||||
echo "#define __BUILD_VERSION" >> .build-version.h
|
||||
echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h
|
||||
echo "#endif" >> .build-version.h
|
||||
|
||||
diff -q version.h .build-version.h >& /dev/null
|
||||
|
||||
if [ $? == 0 ]; then
|
||||
rm .build-version.h
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mv .build-version.h version.h
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_VOLUMES_
|
||||
#define __BTRFS_VOLUMES_
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include "async-thread.h"
|
||||
|
||||
struct buffer_head;
|
||||
struct btrfs_device {
|
||||
struct list_head dev_list;
|
||||
struct list_head dev_alloc_list;
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
struct btrfs_root *dev_root;
|
||||
struct bio *pending_bios;
|
||||
struct bio *pending_bio_tail;
|
||||
int running_pending;
|
||||
u64 generation;
|
||||
|
||||
int barriers;
|
||||
int writeable;
|
||||
int in_fs_metadata;
|
||||
|
||||
spinlock_t io_lock;
|
||||
|
||||
struct block_device *bdev;
|
||||
|
||||
/* the mode sent to open_bdev_exclusive */
|
||||
fmode_t mode;
|
||||
|
||||
char *name;
|
||||
|
||||
/* the internal btrfs device id */
|
||||
u64 devid;
|
||||
|
||||
/* size of the device */
|
||||
u64 total_bytes;
|
||||
|
||||
/* bytes used */
|
||||
u64 bytes_used;
|
||||
|
||||
/* optimal io alignment for this device */
|
||||
u32 io_align;
|
||||
|
||||
/* optimal io width for this device */
|
||||
u32 io_width;
|
||||
|
||||
/* minimal io size for this device */
|
||||
u32 sector_size;
|
||||
|
||||
/* type and info about this device */
|
||||
u64 type;
|
||||
|
||||
/* physical drive uuid (or lvm uuid) */
|
||||
u8 uuid[BTRFS_UUID_SIZE];
|
||||
|
||||
struct btrfs_work work;
|
||||
};
|
||||
|
||||
struct btrfs_fs_devices {
|
||||
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
|
||||
|
||||
/* the device with this id has the most recent coyp of the super */
|
||||
u64 latest_devid;
|
||||
u64 latest_trans;
|
||||
u64 num_devices;
|
||||
u64 open_devices;
|
||||
u64 rw_devices;
|
||||
u64 total_rw_bytes;
|
||||
struct block_device *latest_bdev;
|
||||
/* all of the devices in the FS */
|
||||
struct list_head devices;
|
||||
|
||||
/* devices not currently being allocated */
|
||||
struct list_head alloc_list;
|
||||
struct list_head list;
|
||||
|
||||
struct btrfs_fs_devices *seed;
|
||||
int seeding;
|
||||
|
||||
int opened;
|
||||
};
|
||||
|
||||
struct btrfs_bio_stripe {
|
||||
struct btrfs_device *dev;
|
||||
u64 physical;
|
||||
};
|
||||
|
||||
struct btrfs_multi_bio {
|
||||
atomic_t stripes_pending;
|
||||
bio_end_io_t *end_io;
|
||||
struct bio *orig_bio;
|
||||
void *private;
|
||||
atomic_t error;
|
||||
int max_errors;
|
||||
int num_stripes;
|
||||
struct btrfs_bio_stripe stripes[];
|
||||
};
|
||||
|
||||
#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
|
||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
||||
|
||||
int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device,
|
||||
u64 chunk_tree, u64 chunk_objectid,
|
||||
u64 chunk_offset, u64 start, u64 num_bytes);
|
||||
int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_multi_bio **multi_ret, int mirror_num);
|
||||
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
|
||||
u64 chunk_start, u64 physical, u64 devid,
|
||||
u64 **logical, int *naddrs, int *stripe_len);
|
||||
int btrfs_read_sys_array(struct btrfs_root *root);
|
||||
int btrfs_read_chunk_tree(struct btrfs_root *root);
|
||||
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root, u64 type);
|
||||
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
|
||||
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
|
||||
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
|
||||
int mirror_num, int async_submit);
|
||||
int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
|
||||
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
fmode_t flags, void *holder);
|
||||
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
struct btrfs_fs_devices **fs_devices_ret);
|
||||
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
||||
int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
|
||||
int btrfs_add_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_device *device);
|
||||
int btrfs_rm_device(struct btrfs_root *root, char *device_path);
|
||||
int btrfs_cleanup_fs_uuids(void);
|
||||
int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
|
||||
int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
|
||||
u64 logical, struct page *page);
|
||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 new_size);
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
|
||||
u8 *uuid, u8 *fsid);
|
||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||
int btrfs_init_new_device(struct btrfs_root *root, char *path);
|
||||
int btrfs_balance(struct btrfs_root *dev_root);
|
||||
void btrfs_unlock_volumes(void);
|
||||
void btrfs_lock_volumes(void);
|
||||
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
|
||||
#endif
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Red Hat. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "ctree.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "transaction.h"
|
||||
#include "xattr.h"
|
||||
#include "disk-io.h"
|
||||
|
||||
|
||||
ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
|
||||
void *buffer, size_t size)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
int ret = 0;
|
||||
unsigned long data_ptr;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
/* lookup the xattr by name */
|
||||
di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
|
||||
strlen(name), 0);
|
||||
if (!di || IS_ERR(di)) {
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
/* if size is 0, that means we want the size of the attr */
|
||||
if (!size) {
|
||||
ret = btrfs_dir_data_len(leaf, di);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* now get the data out of our dir_item */
|
||||
if (btrfs_dir_data_len(leaf, di) > size) {
|
||||
ret = -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
data_ptr = (unsigned long)((char *)(di + 1) +
|
||||
btrfs_dir_name_len(leaf, di));
|
||||
read_extent_buffer(leaf, buffer, data_ptr,
|
||||
btrfs_dir_data_len(leaf, di));
|
||||
ret = btrfs_dir_data_len(leaf, di);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __btrfs_setxattr(struct inode *inode, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_path *path;
|
||||
int ret = 0, mod = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
btrfs_set_trans_block_group(trans, inode);
|
||||
|
||||
/* first lets see if we already have this xattr */
|
||||
di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
|
||||
strlen(name), -1);
|
||||
if (IS_ERR(di)) {
|
||||
ret = PTR_ERR(di);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* ok we already have this xattr, lets remove it */
|
||||
if (di) {
|
||||
/* if we want create only exit */
|
||||
if (flags & XATTR_CREATE) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
||||
if (ret)
|
||||
goto out;
|
||||
btrfs_release_path(root, path);
|
||||
|
||||
/* if we don't have a value then we are removing the xattr */
|
||||
if (!value) {
|
||||
mod = 1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
btrfs_release_path(root, path);
|
||||
|
||||
if (flags & XATTR_REPLACE) {
|
||||
/* we couldn't find the attr to replace */
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* ok we have to create a completely new xattr */
|
||||
ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
|
||||
value, size, inode->i_ino);
|
||||
if (ret)
|
||||
goto out;
|
||||
mod = 1;
|
||||
|
||||
out:
|
||||
if (mod) {
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
}
|
||||
|
||||
btrfs_end_transaction(trans, root);
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
||||
{
|
||||
struct btrfs_key key, found_key;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_item *item;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_dir_item *di;
|
||||
int ret = 0, slot, advance;
|
||||
size_t total_size = 0, size_left = size;
|
||||
unsigned long name_ptr;
|
||||
size_t name_len;
|
||||
u32 nritems;
|
||||
|
||||
/*
|
||||
* ok we want all objects associated with this id.
|
||||
* NOTE: we set key.offset = 0; because we want to start with the
|
||||
* first xattr that we find and walk forward
|
||||
*/
|
||||
key.objectid = inode->i_ino;
|
||||
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
|
||||
key.offset = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
path->reada = 2;
|
||||
|
||||
/* search for our xattrs */
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
ret = 0;
|
||||
advance = 0;
|
||||
while (1) {
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
slot = path->slots[0];
|
||||
|
||||
/* this is where we start walking through the path */
|
||||
if (advance || slot >= nritems) {
|
||||
/*
|
||||
* if we've reached the last slot in this leaf we need
|
||||
* to go to the next leaf and reset everything
|
||||
*/
|
||||
if (slot >= nritems-1) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret)
|
||||
break;
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
slot = path->slots[0];
|
||||
} else {
|
||||
/*
|
||||
* just walking through the slots on this leaf
|
||||
*/
|
||||
slot++;
|
||||
path->slots[0]++;
|
||||
}
|
||||
}
|
||||
advance = 1;
|
||||
|
||||
item = btrfs_item_nr(leaf, slot);
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
/* check to make sure this item is what we want */
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY)
|
||||
break;
|
||||
|
||||
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
||||
|
||||
name_len = btrfs_dir_name_len(leaf, di);
|
||||
total_size += name_len + 1;
|
||||
|
||||
/* we are just looking for how big our buffer needs to be */
|
||||
if (!size)
|
||||
continue;
|
||||
|
||||
if (!buffer || (name_len + 1) > size_left) {
|
||||
ret = -ERANGE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
name_ptr = (unsigned long)(di + 1);
|
||||
read_extent_buffer(leaf, buffer, name_ptr, name_len);
|
||||
buffer[name_len] = '\0';
|
||||
|
||||
size_left -= name_len + 1;
|
||||
buffer += name_len + 1;
|
||||
}
|
||||
ret = total_size;
|
||||
|
||||
err:
|
||||
btrfs_free_path(path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* List of handlers for synthetic system.* attributes. All real ondisk
|
||||
* attributes are handled directly.
|
||||
*/
|
||||
struct xattr_handler *btrfs_xattr_handlers[] = {
|
||||
#ifdef CONFIG_FS_POSIX_ACL
|
||||
&btrfs_xattr_acl_access_handler,
|
||||
&btrfs_xattr_acl_default_handler,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* Check if the attribute is in a supported namespace.
|
||||
*
|
||||
* This applied after the check for the synthetic attributes in the system
|
||||
* namespace.
|
||||
*/
|
||||
static bool btrfs_is_valid_xattr(const char *name)
|
||||
{
|
||||
return !strncmp(name, XATTR_SECURITY_PREFIX,
|
||||
XATTR_SECURITY_PREFIX_LEN) ||
|
||||
!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
|
||||
!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
|
||||
!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
|
||||
}
|
||||
|
||||
ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size)
|
||||
{
|
||||
/*
|
||||
* If this is a request for a synthetic attribute in the system.*
|
||||
* namespace use the generic infrastructure to resolve a handler
|
||||
* for it via sb->s_xattr.
|
||||
*/
|
||||
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
|
||||
return generic_getxattr(dentry, name, buffer, size);
|
||||
|
||||
if (!btrfs_is_valid_xattr(name))
|
||||
return -EOPNOTSUPP;
|
||||
return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
|
||||
}
|
||||
|
||||
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
/*
|
||||
* If this is a request for a synthetic attribute in the system.*
|
||||
* namespace use the generic infrastructure to resolve a handler
|
||||
* for it via sb->s_xattr.
|
||||
*/
|
||||
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
|
||||
return generic_setxattr(dentry, name, value, size, flags);
|
||||
|
||||
if (!btrfs_is_valid_xattr(name))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (size == 0)
|
||||
value = ""; /* empty EA, do not remove */
|
||||
return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
|
||||
}
|
||||
|
||||
int btrfs_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
/*
|
||||
* If this is a request for a synthetic attribute in the system.*
|
||||
* namespace use the generic infrastructure to resolve a handler
|
||||
* for it via sb->s_xattr.
|
||||
*/
|
||||
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
|
||||
return generic_removexattr(dentry, name);
|
||||
|
||||
if (!btrfs_is_valid_xattr(name))
|
||||
return -EOPNOTSUPP;
|
||||
return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Red Hat. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __XATTR__
|
||||
#define __XATTR__
|
||||
|
||||
#include <linux/xattr.h>
|
||||
|
||||
extern struct xattr_handler btrfs_xattr_acl_access_handler;
|
||||
extern struct xattr_handler btrfs_xattr_acl_default_handler;
|
||||
extern struct xattr_handler *btrfs_xattr_handlers[];
|
||||
|
||||
extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
|
||||
void *buffer, size_t size);
|
||||
extern int __btrfs_setxattr(struct inode *inode, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
|
||||
extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size);
|
||||
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
|
||||
|
||||
#endif /* __XATTR__ */
|
|
@ -0,0 +1,632 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
* Based on jffs2 zlib code:
|
||||
* Copyright © 2001-2007 Red Hat, Inc.
|
||||
* Created by David Woodhouse <dwmw2@infradead.org>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/zlib.h>
|
||||
#include <linux/zutil.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/bio.h>
|
||||
#include "compression.h"
|
||||
|
||||
/* Plan: call deflate() with avail_in == *sourcelen,
|
||||
avail_out = *dstlen - 12 and flush == Z_FINISH.
|
||||
If it doesn't manage to finish, call it again with
|
||||
avail_in == 0 and avail_out set to the remaining 12
|
||||
bytes for it to clean up.
|
||||
Q: Is 12 bytes sufficient?
|
||||
*/
|
||||
#define STREAM_END_SPACE 12
|
||||
|
||||
struct workspace {
|
||||
z_stream inf_strm;
|
||||
z_stream def_strm;
|
||||
char *buf;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(idle_workspace);
|
||||
static DEFINE_SPINLOCK(workspace_lock);
|
||||
static unsigned long num_workspace;
|
||||
static atomic_t alloc_workspace = ATOMIC_INIT(0);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
|
||||
|
||||
/*
|
||||
* this finds an available zlib workspace or allocates a new one
|
||||
* NULL or an ERR_PTR is returned if things go bad.
|
||||
*/
|
||||
static struct workspace *find_zlib_workspace(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
int ret;
|
||||
int cpus = num_online_cpus();
|
||||
|
||||
again:
|
||||
spin_lock(&workspace_lock);
|
||||
if (!list_empty(&idle_workspace)) {
|
||||
workspace = list_entry(idle_workspace.next, struct workspace,
|
||||
list);
|
||||
list_del(&workspace->list);
|
||||
num_workspace--;
|
||||
spin_unlock(&workspace_lock);
|
||||
return workspace;
|
||||
|
||||
}
|
||||
spin_unlock(&workspace_lock);
|
||||
if (atomic_read(&alloc_workspace) > cpus) {
|
||||
DEFINE_WAIT(wait);
|
||||
prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(&alloc_workspace) > cpus)
|
||||
schedule();
|
||||
finish_wait(&workspace_wait, &wait);
|
||||
goto again;
|
||||
}
|
||||
atomic_inc(&alloc_workspace);
|
||||
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
|
||||
if (!workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
|
||||
if (!workspace->def_strm.workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
|
||||
if (!workspace->inf_strm.workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_inflate;
|
||||
}
|
||||
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
|
||||
if (!workspace->buf) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_kmalloc;
|
||||
}
|
||||
return workspace;
|
||||
|
||||
fail_kmalloc:
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
fail_inflate:
|
||||
vfree(workspace->def_strm.workspace);
|
||||
fail:
|
||||
kfree(workspace);
|
||||
atomic_dec(&alloc_workspace);
|
||||
wake_up(&workspace_wait);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* put a workspace struct back on the list or free it if we have enough
|
||||
* idle ones sitting around
|
||||
*/
|
||||
static int free_workspace(struct workspace *workspace)
|
||||
{
|
||||
spin_lock(&workspace_lock);
|
||||
if (num_workspace < num_online_cpus()) {
|
||||
list_add_tail(&workspace->list, &idle_workspace);
|
||||
num_workspace++;
|
||||
spin_unlock(&workspace_lock);
|
||||
if (waitqueue_active(&workspace_wait))
|
||||
wake_up(&workspace_wait);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&workspace_lock);
|
||||
vfree(workspace->def_strm.workspace);
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
kfree(workspace->buf);
|
||||
kfree(workspace);
|
||||
|
||||
atomic_dec(&alloc_workspace);
|
||||
if (waitqueue_active(&workspace_wait))
|
||||
wake_up(&workspace_wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cleanup function for module exit
|
||||
*/
|
||||
static void free_workspaces(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
while (!list_empty(&idle_workspace)) {
|
||||
workspace = list_entry(idle_workspace.next, struct workspace,
|
||||
list);
|
||||
list_del(&workspace->list);
|
||||
vfree(workspace->def_strm.workspace);
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
kfree(workspace->buf);
|
||||
kfree(workspace);
|
||||
atomic_dec(&alloc_workspace);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* given an address space and start/len, compress the bytes.
|
||||
*
|
||||
* pages are allocated to hold the compressed result and stored
|
||||
* in 'pages'
|
||||
*
|
||||
* out_pages is used to return the number of pages allocated. There
|
||||
* may be pages allocated even if we return an error
|
||||
*
|
||||
* total_in is used to return the number of bytes actually read. It
|
||||
* may be smaller then len if we had to exit early because we
|
||||
* ran out of room in the pages array or because we cross the
|
||||
* max_out threshold.
|
||||
*
|
||||
* total_out is used to return the total number of compressed bytes
|
||||
*
|
||||
* max_out tells us the max number of bytes that we're allowed to
|
||||
* stuff into pages
|
||||
*/
|
||||
int btrfs_zlib_compress_pages(struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out)
|
||||
{
|
||||
int ret;
|
||||
struct workspace *workspace;
|
||||
char *data_in;
|
||||
char *cpage_out;
|
||||
int nr_pages = 0;
|
||||
struct page *in_page = NULL;
|
||||
struct page *out_page = NULL;
|
||||
int out_written = 0;
|
||||
int in_read = 0;
|
||||
unsigned long bytes_left;
|
||||
|
||||
*out_pages = 0;
|
||||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (!workspace)
|
||||
return -1;
|
||||
|
||||
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
|
||||
printk(KERN_WARNING "deflateInit failed\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
workspace->def_strm.total_in = 0;
|
||||
workspace->def_strm.total_out = 0;
|
||||
|
||||
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
cpage_out = kmap(out_page);
|
||||
pages[0] = out_page;
|
||||
nr_pages = 1;
|
||||
|
||||
workspace->def_strm.next_in = data_in;
|
||||
workspace->def_strm.next_out = cpage_out;
|
||||
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
|
||||
|
||||
out_written = 0;
|
||||
in_read = 0;
|
||||
|
||||
while (workspace->def_strm.total_in < len) {
|
||||
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
|
||||
if (ret != Z_OK) {
|
||||
printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
|
||||
ret);
|
||||
zlib_deflateEnd(&workspace->def_strm);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* we're making it bigger, give up */
|
||||
if (workspace->def_strm.total_in > 8192 &&
|
||||
workspace->def_strm.total_in <
|
||||
workspace->def_strm.total_out) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
/* we need another page for writing out. Test this
|
||||
* before the total_in so we will pull in a new page for
|
||||
* the stream end if required
|
||||
*/
|
||||
if (workspace->def_strm.avail_out == 0) {
|
||||
kunmap(out_page);
|
||||
if (nr_pages == nr_dest_pages) {
|
||||
out_page = NULL;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
cpage_out = kmap(out_page);
|
||||
pages[nr_pages] = out_page;
|
||||
nr_pages++;
|
||||
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
workspace->def_strm.next_out = cpage_out;
|
||||
}
|
||||
/* we're all done */
|
||||
if (workspace->def_strm.total_in >= len)
|
||||
break;
|
||||
|
||||
/* we've read in a full page, get a new one */
|
||||
if (workspace->def_strm.avail_in == 0) {
|
||||
if (workspace->def_strm.total_out > max_out)
|
||||
break;
|
||||
|
||||
bytes_left = len - workspace->def_strm.total_in;
|
||||
kunmap(in_page);
|
||||
page_cache_release(in_page);
|
||||
|
||||
start += PAGE_CACHE_SIZE;
|
||||
in_page = find_get_page(mapping,
|
||||
start >> PAGE_CACHE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
workspace->def_strm.avail_in = min(bytes_left,
|
||||
PAGE_CACHE_SIZE);
|
||||
workspace->def_strm.next_in = data_in;
|
||||
}
|
||||
}
|
||||
workspace->def_strm.avail_in = 0;
|
||||
ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
|
||||
zlib_deflateEnd(&workspace->def_strm);
|
||||
|
||||
if (ret != Z_STREAM_END) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
*total_out = workspace->def_strm.total_out;
|
||||
*total_in = workspace->def_strm.total_in;
|
||||
out:
|
||||
*out_pages = nr_pages;
|
||||
if (out_page)
|
||||
kunmap(out_page);
|
||||
|
||||
if (in_page) {
|
||||
kunmap(in_page);
|
||||
page_cache_release(in_page);
|
||||
}
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pages_in is an array of pages with compressed data.
|
||||
*
|
||||
* disk_start is the starting logical offset of this array in the file
|
||||
*
|
||||
* bvec is a bio_vec of pages from the file that we want to decompress into
|
||||
*
|
||||
* vcnt is the count of pages in the biovec
|
||||
*
|
||||
* srclen is the number of bytes in pages_in
|
||||
*
|
||||
* The basic idea is that we have a bio that was created by readpages.
|
||||
* The pages in the bio are for the uncompressed data, and they may not
|
||||
* be contiguous. They all correspond to the range of bytes covered by
|
||||
* the compressed extent.
|
||||
*/
|
||||
int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen)
|
||||
{
|
||||
int ret = 0;
|
||||
int wbits = MAX_WBITS;
|
||||
struct workspace *workspace;
|
||||
char *data_in;
|
||||
size_t total_out = 0;
|
||||
unsigned long page_bytes_left;
|
||||
unsigned long page_in_index = 0;
|
||||
unsigned long page_out_index = 0;
|
||||
struct page *page_out;
|
||||
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
|
||||
PAGE_CACHE_SIZE;
|
||||
unsigned long buf_start;
|
||||
unsigned long buf_offset;
|
||||
unsigned long bytes;
|
||||
unsigned long working_bytes;
|
||||
unsigned long pg_offset;
|
||||
unsigned long start_byte;
|
||||
unsigned long current_buf_start;
|
||||
char *kaddr;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (!workspace)
|
||||
return -ENOMEM;
|
||||
|
||||
data_in = kmap(pages_in[page_in_index]);
|
||||
workspace->inf_strm.next_in = data_in;
|
||||
workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
|
||||
workspace->inf_strm.total_in = 0;
|
||||
|
||||
workspace->inf_strm.total_out = 0;
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
page_out = bvec[page_out_index].bv_page;
|
||||
page_bytes_left = PAGE_CACHE_SIZE;
|
||||
pg_offset = 0;
|
||||
|
||||
/* If it's deflate, and it's got no preset dictionary, then
|
||||
we can tell zlib to skip the adler32 check. */
|
||||
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
||||
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
||||
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
||||
|
||||
wbits = -((data_in[0] >> 4) + 8);
|
||||
workspace->inf_strm.next_in += 2;
|
||||
workspace->inf_strm.avail_in -= 2;
|
||||
}
|
||||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
||||
printk(KERN_WARNING "inflateInit failed\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
while (workspace->inf_strm.total_in < srclen) {
|
||||
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END)
|
||||
break;
|
||||
/*
|
||||
* buf start is the byte offset we're of the start of
|
||||
* our workspace buffer
|
||||
*/
|
||||
buf_start = total_out;
|
||||
|
||||
/* total_out is the last byte of the workspace buffer */
|
||||
total_out = workspace->inf_strm.total_out;
|
||||
|
||||
working_bytes = total_out - buf_start;
|
||||
|
||||
/*
|
||||
* start byte is the first byte of the page we're currently
|
||||
* copying into relative to the start of the compressed data.
|
||||
*/
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
if (working_bytes == 0) {
|
||||
/* we didn't make progress in this inflate
|
||||
* call, we're done
|
||||
*/
|
||||
if (ret != Z_STREAM_END)
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* we haven't yet hit data corresponding to this page */
|
||||
if (total_out <= start_byte)
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* the start of the data we care about is offset into
|
||||
* the middle of our working buffer
|
||||
*/
|
||||
if (total_out > start_byte && buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes -= buf_offset;
|
||||
} else {
|
||||
buf_offset = 0;
|
||||
}
|
||||
current_buf_start = buf_start;
|
||||
|
||||
/* copy bytes from the working buffer into the pages */
|
||||
while (working_bytes > 0) {
|
||||
bytes = min(PAGE_CACHE_SIZE - pg_offset,
|
||||
PAGE_CACHE_SIZE - buf_offset);
|
||||
bytes = min(bytes, working_bytes);
|
||||
kaddr = kmap_atomic(page_out, KM_USER0);
|
||||
memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
|
||||
bytes);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
flush_dcache_page(page_out);
|
||||
|
||||
pg_offset += bytes;
|
||||
page_bytes_left -= bytes;
|
||||
buf_offset += bytes;
|
||||
working_bytes -= bytes;
|
||||
current_buf_start += bytes;
|
||||
|
||||
/* check if we need to pick another page */
|
||||
if (page_bytes_left == 0) {
|
||||
page_out_index++;
|
||||
if (page_out_index >= vcnt) {
|
||||
ret = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
page_out = bvec[page_out_index].bv_page;
|
||||
pg_offset = 0;
|
||||
page_bytes_left = PAGE_CACHE_SIZE;
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
/*
|
||||
* make sure our new page is covered by this
|
||||
* working buffer
|
||||
*/
|
||||
if (total_out <= start_byte)
|
||||
goto next;
|
||||
|
||||
/* the next page in the biovec might not
|
||||
* be adjacent to the last page, but it
|
||||
* might still be found inside this working
|
||||
* buffer. bump our offset pointer
|
||||
*/
|
||||
if (total_out > start_byte &&
|
||||
current_buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes = total_out - start_byte;
|
||||
current_buf_start = buf_start +
|
||||
buf_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
next:
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
|
||||
if (workspace->inf_strm.avail_in == 0) {
|
||||
unsigned long tmp;
|
||||
kunmap(pages_in[page_in_index]);
|
||||
page_in_index++;
|
||||
if (page_in_index >= total_pages_in) {
|
||||
data_in = NULL;
|
||||
break;
|
||||
}
|
||||
data_in = kmap(pages_in[page_in_index]);
|
||||
workspace->inf_strm.next_in = data_in;
|
||||
tmp = srclen - workspace->inf_strm.total_in;
|
||||
workspace->inf_strm.avail_in = min(tmp,
|
||||
PAGE_CACHE_SIZE);
|
||||
}
|
||||
}
|
||||
if (ret != Z_STREAM_END)
|
||||
ret = -1;
|
||||
else
|
||||
ret = 0;
|
||||
done:
|
||||
zlib_inflateEnd(&workspace->inf_strm);
|
||||
if (data_in)
|
||||
kunmap(pages_in[page_in_index]);
|
||||
out:
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* a less complex decompression routine. Our compressed data fits in a
|
||||
* single page, and we want to read a single page out of it.
|
||||
* start_byte tells us the offset into the compressed data we're interested in
|
||||
*/
|
||||
int btrfs_zlib_decompress(unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen)
|
||||
{
|
||||
int ret = 0;
|
||||
int wbits = MAX_WBITS;
|
||||
struct workspace *workspace;
|
||||
unsigned long bytes_left = destlen;
|
||||
unsigned long total_out = 0;
|
||||
char *kaddr;
|
||||
|
||||
if (destlen > PAGE_CACHE_SIZE)
|
||||
return -ENOMEM;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (!workspace)
|
||||
return -ENOMEM;
|
||||
|
||||
workspace->inf_strm.next_in = data_in;
|
||||
workspace->inf_strm.avail_in = srclen;
|
||||
workspace->inf_strm.total_in = 0;
|
||||
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
workspace->inf_strm.total_out = 0;
|
||||
/* If it's deflate, and it's got no preset dictionary, then
|
||||
we can tell zlib to skip the adler32 check. */
|
||||
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
||||
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
||||
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
||||
|
||||
wbits = -((data_in[0] >> 4) + 8);
|
||||
workspace->inf_strm.next_in += 2;
|
||||
workspace->inf_strm.avail_in -= 2;
|
||||
}
|
||||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
||||
printk(KERN_WARNING "inflateInit failed\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (bytes_left > 0) {
|
||||
unsigned long buf_start;
|
||||
unsigned long buf_offset;
|
||||
unsigned long bytes;
|
||||
unsigned long pg_offset = 0;
|
||||
|
||||
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END)
|
||||
break;
|
||||
|
||||
buf_start = total_out;
|
||||
total_out = workspace->inf_strm.total_out;
|
||||
|
||||
if (total_out == buf_start) {
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (total_out <= start_byte)
|
||||
goto next;
|
||||
|
||||
if (total_out > start_byte && buf_start < start_byte)
|
||||
buf_offset = start_byte - buf_start;
|
||||
else
|
||||
buf_offset = 0;
|
||||
|
||||
bytes = min(PAGE_CACHE_SIZE - pg_offset,
|
||||
PAGE_CACHE_SIZE - buf_offset);
|
||||
bytes = min(bytes, bytes_left);
|
||||
|
||||
kaddr = kmap_atomic(dest_page, KM_USER0);
|
||||
memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
|
||||
pg_offset += bytes;
|
||||
bytes_left -= bytes;
|
||||
next:
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
}
|
||||
|
||||
if (ret != Z_STREAM_END && bytes_left != 0)
|
||||
ret = -1;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
zlib_inflateEnd(&workspace->inf_strm);
|
||||
out:
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_zlib_exit(void)
|
||||
{
|
||||
free_workspaces();
|
||||
}
|
Loading…
Reference in New Issue