linux/fs/nfsd/nfs4recover.c

422 lines
9.9 KiB
C
Raw Normal View History

/*
* linux/fs/nfsd/nfs4recover.c
*
* Copyright (c) 2004 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@citi.umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/err.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfs4.h>
#include <linux/nfsd/state.h>
#include <linux/nfsd/xdr4.h>
#include <linux/param.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <asm/uaccess.h>
#include <asm/scatterlist.h>
#include <linux/crypto.h>
#define NFSDDBG_FACILITY NFSDDBG_PROC
/* Globals */
static struct nameidata rec_dir;
static int rec_dir_init = 0;
static void
nfs4_save_user(uid_t *saveuid, gid_t *savegid)
{
*saveuid = current->fsuid;
*savegid = current->fsgid;
current->fsuid = 0;
current->fsgid = 0;
}
static void
nfs4_reset_user(uid_t saveuid, gid_t savegid)
{
current->fsuid = saveuid;
current->fsgid = savegid;
}
static void
md5_to_hex(char *out, char *md5)
{
int i;
for (i=0; i<16; i++) {
unsigned char c = md5[i];
*out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
*out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
}
*out = '\0';
}
__be32
nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
struct hash_desc desc;
struct scatterlist sg[1];
__be32 status = nfserr_resource;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(desc.tfm))
goto out_no_tfm;
cksum.len = crypto_hash_digestsize(desc.tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL)
goto out;
sg[0].page = virt_to_page(clname->data);
sg[0].offset = offset_in_page(clname->data);
sg[0].length = clname->len;
if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
goto out;
md5_to_hex(dname, cksum.data);
kfree(cksum.data);
status = nfs_ok;
out:
crypto_free_hash(desc.tfm);
out_no_tfm:
return status;
}
static void
nfsd4_sync_rec_dir(void)
{
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
nfsd_sync_dir(rec_dir.dentry);
mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
}
int
nfsd4_create_clid_dir(struct nfs4_client *clp)
{
char *dname = clp->cl_recdir;
struct dentry *dentry;
uid_t uid;
gid_t gid;
int status;
dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
if (!rec_dir_init || clp->cl_firststate)
return 0;
nfs4_save_user(&uid, &gid);
/* lock the parent */
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
}
status = -EEXIST;
if (dentry->d_inode) {
dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
goto out_put;
}
status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
out_put:
dput(dentry);
out_unlock:
mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
if (status == 0) {
clp->cl_firststate = 1;
nfsd4_sync_rec_dir();
}
nfs4_reset_user(uid, gid);
dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
return status;
}
typedef int (recdir_func)(struct dentry *, struct dentry *);
struct dentry_list {
struct dentry *dentry;
struct list_head list;
};
struct dentry_list_arg {
struct list_head dentries;
struct dentry *parent;
};
static int
nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
[PATCH] VFS: Make filldir_t and struct kstat deal in 64-bit inode numbers These patches make the kernel pass 64-bit inode numbers internally when communicating to userspace, even on a 32-bit system. They are required because some filesystems have intrinsic 64-bit inode numbers: NFS3+ and XFS for example. The 64-bit inode numbers are then propagated to userspace automatically where the arch supports it. Problems have been seen with userspace (eg: ld.so) using the 64-bit inode number returned by stat64() or getdents64() to differentiate files, and failing because the 64-bit inode number space was compressed to 32-bits, and so overlaps occur. This patch: Make filldir_t take a 64-bit inode number and struct kstat carry a 64-bit inode number so that 64-bit inode numbers can be passed back to userspace. The stat functions then returns the full 64-bit inode number where available and where possible. If it is not possible to represent the inode number supplied by the filesystem in the field provided by userspace, then error EOVERFLOW will be issued. Similarly, the getdents/readdir functions now pass the full 64-bit inode number to userspace where possible, returning EOVERFLOW instead when a directory entry is encountered that can't be properly represented. Note that this means that some inodes will not be stat'able on a 32-bit system with old libraries where they were before - but it does mean that there will be no ambiguity over what a 32-bit inode number refers to. Note similarly that directory scans may be cut short with an error on a 32-bit system with old libraries where the scan would work before for the same reasons. It is judged unlikely that this situation will occur because modern glibc uses 64-bit capable versions of stat and getdents class functions exclusively, and that older systems are unlikely to encounter unrepresentable inode numbers anyway. [akpm: alpha build fix] Signed-off-by: David Howells <dhowells@redhat.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-03 16:13:46 +08:00
loff_t offset, u64 ino, unsigned int d_type)
{
struct dentry_list_arg *dla = arg;
struct list_head *dentries = &dla->dentries;
struct dentry *parent = dla->parent;
struct dentry *dentry;
struct dentry_list *child;
if (name && isdotent(name, namlen))
return 0;
dentry = lookup_one_len(name, parent, namlen);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
child = kmalloc(sizeof(*child), GFP_KERNEL);
if (child == NULL)
return -ENOMEM;
child->dentry = dentry;
list_add(&child->list, dentries);
return 0;
}
static int
nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
{
struct file *filp;
struct dentry_list_arg dla = {
.parent = dir,
};
struct list_head *dentries = &dla.dentries;
struct dentry_list *child;
uid_t uid;
gid_t gid;
int status;
if (!rec_dir_init)
return 0;
nfs4_save_user(&uid, &gid);
filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
status = PTR_ERR(filp);
if (IS_ERR(filp))
goto out;
INIT_LIST_HEAD(dentries);
status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
fput(filp);
while (!list_empty(dentries)) {
child = list_entry(dentries->next, struct dentry_list, list);
status = f(dir, child->dentry);
if (status)
goto out;
list_del(&child->list);
dput(child->dentry);
kfree(child);
}
out:
while (!list_empty(dentries)) {
child = list_entry(dentries->next, struct dentry_list, list);
list_del(&child->list);
dput(child->dentry);
kfree(child);
}
nfs4_reset_user(uid, gid);
return status;
}
static int
nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
{
int status;
if (!S_ISREG(dir->d_inode->i_mode)) {
printk("nfsd4: non-file found in client recovery directory\n");
return -EINVAL;
}
[PATCH] lockdep: annotate nfsd4 recover code > ============================================= > [ INFO: possible recursive locking detected ] > 2.6.18-1.2724.lockdepPAE #1 > --------------------------------------------- > nfsd/6884 is trying to acquire lock: > (&inode->i_mutex){--..}, at: [<c04811e5>] vfs_rmdir+0x73/0xf4 > > but task is already holding lock: > (&inode->i_mutex){--..}, at: [<f8dfa621>] > nfsd4_clear_clid_dir+0x1f/0x3d [nfsd] > > other info that might help us debug this: > 3 locks held by nfsd/6884: > #0: (hash_sem){----}, at: [<f8de05eb>] nfsd+0x181/0x2ea [nfsd] > #1: (client_mutex){--..}, at: [<f8df6d19>] > nfsd4_setclientid_confirm+0x3b/0x2cf [nfsd] > #2: (&inode->i_mutex){--..}, at: [<f8dfa621>] > nfsd4_clear_clid_dir+0x1f/0x3d [nfsd] > > stack backtrace: > [<c040524d>] dump_trace+0x69/0x1af > [<c04053ab>] show_trace_log_lvl+0x18/0x2c > [<c040595f>] show_trace+0xf/0x11 > [<c0405a53>] dump_stack+0x15/0x17 > [<c043ca7a>] __lock_acquire+0x110/0x9b6 > [<c043d91e>] lock_acquire+0x5c/0x7a > [<c061a41b>] __mutex_lock_slowpath+0xde/0x234 > [<c04811e5>] vfs_rmdir+0x73/0xf4 > [<f8dfa62b>] nfsd4_clear_clid_dir+0x29/0x3d [nfsd] > [<f8dfa733>] nfsd4_remove_clid_dir+0xb8/0xf8 [nfsd] > [<f8df6e90>] nfsd4_setclientid_confirm+0x1b2/0x2cf [nfsd] > [<f8def19a>] nfsd4_proc_compound+0x137a/0x166c [nfsd] > [<f8de00d5>] nfsd_dispatch+0xc5/0x180 [nfsd] > [<f8d09d83>] svc_process+0x3bd/0x631 [sunrpc] > [<f8de0604>] nfsd+0x19a/0x2ea [nfsd] > [<c0404e27>] kernel_thread_helper+0x7/0x10 > DWARF2 unwinder stuck at kernel_thread_helper+0x7/0x10 > Leftover inexact backtrace: > ======================= Some nesting annotation to the nfsd4 recovery code. The vfs operations called will take dentry->d_inode->i_mutex. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 18:39:38 +08:00
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
status = vfs_unlink(dir->d_inode, dentry);
mutex_unlock(&dir->d_inode->i_mutex);
return status;
}
static int
nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
{
int status;
/* For now this directory should already be empty, but we empty it of
* any regular files anyway, just in case the directory was created by
* a kernel from the future.... */
nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
[PATCH] NFS4: fix for recursive locking problem When I was performing some operations on NFS, I got below error on server side. ============================================= [ INFO: possible recursive locking detected ] 2.6.19-prep #1 --------------------------------------------- nfsd4/3525 is trying to acquire lock: (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24 but task is already holding lock: (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24 other info that might help us debug this: 2 locks held by nfsd4/3525: #0: (client_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24 #1: (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24 stack backtrace: [<c04051ed>] show_trace_log_lvl+0x58/0x16a [<c04057fa>] show_trace+0xd/0x10 [<c0405913>] dump_stack+0x19/0x1b [<c043b6f1>] __lock_acquire+0x778/0x99c [<c043be86>] lock_acquire+0x4b/0x6d [<c0611ceb>] __mutex_lock_slowpath+0xbc/0x20a [<c0611e5a>] mutex_lock+0x21/0x24 [<c047fd7e>] vfs_rmdir+0x76/0xf8 [<f94b7ce9>] nfsd4_clear_clid_dir+0x2c/0x41 [nfsd] [<f94b7de9>] nfsd4_remove_clid_dir+0xb1/0xe8 [nfsd] [<f94b307b>] laundromat_main+0x9b/0x1c3 [nfsd] [<c04333d6>] run_workqueue+0x7a/0xbb [<c0433d0b>] worker_thread+0xd2/0x107 [<c0436285>] kthread+0xc3/0xf2 [<c0402005>] kernel_thread_helper+0x5/0xb =================================================================== Cause for this problem was,2 successive mutex_lock calls on 2 diffrent inodes ,as shown below static int nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) { int status; /* For now this directory should already be empty, but we empty it of * any regular files anyway, just in case the directory was created by * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); mutex_lock(&dir->d_inode->i_mutex); status = vfs_rmdir(dir->d_inode, dentry); ... int vfs_rmdir(struct inode *dir, struct dentry *dentry) { int error = may_delete(dir, dentry, 1); if (error) return error; if (!dir->i_op || !dir->i_op->rmdir) return -EPERM; DQUOT_INIT(dir); mutex_lock(&dentry->d_inode->i_mutex); ... So I have developed the patch to overcome this problem. Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com> Cc: Neil Brown <neilb@suse.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-11-03 14:07:12 +08:00
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
status = vfs_rmdir(dir->d_inode, dentry);
mutex_unlock(&dir->d_inode->i_mutex);
return status;
}
static int
nfsd4_unlink_clid_dir(char *name, int namlen)
{
struct dentry *dentry;
int status;
dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
dentry = lookup_one_len(name, rec_dir.dentry, namlen);
mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
return status;
}
status = -ENOENT;
if (!dentry->d_inode)
goto out;
status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
out:
dput(dentry);
return status;
}
void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
uid_t uid;
gid_t gid;
int status;
if (!rec_dir_init || !clp->cl_firststate)
return;
clp->cl_firststate = 0;
nfs4_save_user(&uid, &gid);
status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
nfs4_reset_user(uid, gid);
if (status == 0)
nfsd4_sync_rec_dir();
if (status)
printk("NFSD: Failed to remove expired client state directory"
" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
return;
}
static int
purge_old(struct dentry *parent, struct dentry *child)
{
int status;
if (nfs4_has_reclaimed_state(child->d_name.name))
return 0;
status = nfsd4_clear_clid_dir(parent, child);
if (status)
printk("failed to remove client recovery directory %s\n",
child->d_name.name);
/* Keep trying, success or failure: */
return 0;
}
void
nfsd4_recdir_purge_old(void) {
int status;
if (!rec_dir_init)
return;
status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
if (status == 0)
nfsd4_sync_rec_dir();
if (status)
printk("nfsd4: failed to purge old clients from recovery"
" directory %s\n", rec_dir.dentry->d_name.name);
return;
}
static int
load_recdir(struct dentry *parent, struct dentry *child)
{
if (child->d_name.len != HEXDIR_LEN - 1) {
printk("nfsd4: illegal name %s in recovery directory\n",
child->d_name.name);
/* Keep trying; maybe the others are OK: */
return 0;
}
nfs4_client_to_reclaim(child->d_name.name);
return 0;
}
int
nfsd4_recdir_load(void) {
int status;
status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
if (status)
printk("nfsd4: failed loading clients from recovery"
" directory %s\n", rec_dir.dentry->d_name.name);
return status;
}
/*
* Hold reference to the recovery directory.
*/
void
nfsd4_init_recdir(char *rec_dirname)
{
uid_t uid = 0;
gid_t gid = 0;
int status;
printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
rec_dirname);
BUG_ON(rec_dir_init);
nfs4_save_user(&uid, &gid);
status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
&rec_dir);
if (status)
printk("NFSD: unable to find recovery directory %s\n",
rec_dirname);
if (!status)
rec_dir_init = 1;
nfs4_reset_user(uid, gid);
}
void
nfsd4_shutdown_recdir(void)
{
if (!rec_dir_init)
return;
rec_dir_init = 0;
path_release(&rec_dir);
}