From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon@gmail.com>
Date: Wed, 29 Aug 2012 10:10:10 -0400
Subject: [PATCH 001/111] exportfs: add FILEID_INVALID to indicate invalid
 fid_type

This commit adds FILEID_INVALID = 0xff in fid_type to
indicate invalid fid_type

It avoids using magic number 255

Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Vivek Trivedi <vtrivedi018@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/exportfs/expfs.c      | 4 ++--
 fs/fhandle.c             | 2 +-
 fs/nfsd/nfsfh.c          | 4 ++--
 include/linux/exportfs.h | 5 +++++
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 29ab099e3e08..f1f1c59c2966 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 
 	if (parent && (len < 4)) {
 		*max_len = 4;
-		return 255;
+		return FILEID_INVALID;
 	} else if (len < 2) {
 		*max_len = 2;
-		return 255;
+		return FILEID_INVALID;
 	}
 
 	len = 2;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index f775bfdd6e4a..26f12b95702a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path,
 	handle_bytes = handle_dwords * sizeof(u32);
 	handle->handle_bytes = handle_bytes;
 	if ((handle->handle_bytes > f_handle.handle_bytes) ||
-	    (retval == 255) || (retval == -ENOSPC)) {
+	    (retval == FILEID_INVALID) || (retval == -ENOSPC)) {
 		/* As per old exportfs_encode_fh documentation
 		 * we could return ENOSPC to indicate overflow
 		 * But file system returned 255 always. So handle
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 032af381b3aa..814afaa4458a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 
 		if (inode)
 			_fh_update(fhp, exp, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255) {
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
 			fh_put(fhp);
 			return nfserr_opnotsupp;
 		}
@@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp)
 			goto out;
 
 		_fh_update(fhp, fhp->fh_export, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255)
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
 			return nfserr_opnotsupp;
 	}
 out:
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..0e1452546300 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -83,6 +83,11 @@ enum fid_type {
 	 * 64 bit parent inode number.
 	 */
 	FILEID_NILFS_WITH_PARENT = 0x62,
+
+	/*
+	 * Filesystems must not use 0xff file ID.
+	 */
+	FILEID_INVALID = 0xff,
 };
 
 struct fid {

From 01f6c8fd949f3a25a2617e6e1579a5c974b1cabf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 18 Oct 2012 22:44:21 +0800
Subject: [PATCH 002/111] nfsd4: remove unused variable in nfsd4_delegreturn()

The variable inode is initialized but never used
otherwise, so remove the unused variable.

dpatch engine is used to auto generate this patch.
(https://github.com/weiyj/dpatch)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d0237f872cc4..620ff8143751 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3807,12 +3807,10 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct nfs4_delegation *dp;
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
-	struct inode *inode;
 	__be32 status;
 
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		return status;
-	inode = cstate->current_fh.fh_dentry->d_inode;
 
 	nfs4_lock_state();
 	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion);

From 3c40794b2dd0f355ef4e6bf8d85af5dcd7da7ece Mon Sep 17 00:00:00 2001
From: Yanchuan Nian <ycnian@gmail.com>
Date: Wed, 24 Oct 2012 14:44:19 +0800
Subject: [PATCH 003/111] nfs: fix wrong object type in lockowner_slab

The object type in the cache of lockowner_slab is wrong, and it is
better to fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Yanchuan Nian <ycnian@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 620ff8143751..fba2996ed511 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2340,7 +2340,7 @@ nfsd4_init_slabs(void)
 	if (openowner_slab == NULL)
 		goto out_nomem;
 	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
-			sizeof(struct nfs4_openowner), 0, 0, NULL);
+			sizeof(struct nfs4_lockowner), 0, 0, NULL);
 	if (lockowner_slab == NULL)
 		goto out_nomem;
 	file_slab = kmem_cache_create("nfsd4_files",

From ae7095a7c44b4cda963e3d4059788ff60e119684 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 1 Oct 2012 17:50:56 -0400
Subject: [PATCH 004/111] nfsd4: helper function for getting mounted_on ino

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fd548d155088..af65fda7685a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2014,6 +2014,22 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
 	return 0;
 }
 
+
+static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
+{
+	struct path path = exp->ex_path;
+	int err;
+
+	path_get(&path);
+	while (follow_up(&path)) {
+		if (path.dentry != path.mnt->mnt_root)
+			break;
+	}
+	err = vfs_getattr(path.mnt, path.dentry, stat);
+	path_put(&path);
+	return err;
+}
+
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
@@ -2430,18 +2446,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 		 * and this is the root of a cross-mounted filesystem.
 		 */
 		if (ignore_crossmnt == 0 &&
-		    dentry == exp->ex_path.mnt->mnt_root) {
-			struct path path = exp->ex_path;
-			path_get(&path);
-			while (follow_up(&path)) {
-				if (path.dentry != path.mnt->mnt_root)
-					break;
-			}
-			err = vfs_getattr(path.mnt, path.dentry, &stat);
-			path_put(&path);
-			if (err)
-				goto out_nfserr;
-		}
+		    dentry == exp->ex_path.mnt->mnt_root)
+			get_parent_attributes(exp, &stat);
 		WRITE64(stat.ino);
 	}
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {

From 7c1f8b65af4bda8eb53cdfe4965cbcfd7fb20c7d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 1 Nov 2012 16:54:01 -0400
Subject: [PATCH 005/111] nfsd4: remove unused init_session return

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fba2996ed511..bc8507c23525 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -945,7 +945,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
 	return new;
 }
 
-static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
+void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
 
@@ -978,7 +978,6 @@ static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_s
 		rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
 		clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
 	}
-	return new;
 }
 
 /* caller must hold client_lock */

From 7032a3dd923f434132643321ad5faad128611f9e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 9 Oct 2012 18:33:38 -0400
Subject: [PATCH 006/111] svcrpc: demote some printks to a dprintk

In general I'd rather random bad behavior on the network won't trigger a
printk.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 3ee7461926d8..be301e1c1290 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1035,7 +1035,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
 }
 
 /*
- * Printk the given error with the address of the client that caused it.
+ * dprintk the given error with the address of the client that caused it.
  */
 static __printf(2, 3)
 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
@@ -1049,8 +1049,7 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	net_warn_ratelimited("svc: %s: %pV",
-			     svc_print_addr(rqstp, buf, sizeof(buf)), &vaf);
+	dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf);
 
 	va_end(args);
 }

From 7fa10cd12df3ec0873a5db0d8dc8e978423b87dc Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 16 Oct 2012 12:39:33 -0400
Subject: [PATCH 007/111] nfsd4: don't BUG in delegation break callback

These conditions would indeed indicate bugs in the code, but if we want
to hear about them we're likely better off warning and returning than
immediately dying while holding file_lock_lock.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bc8507c23525..db7258c13423 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2554,9 +2554,14 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
 	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
 	struct nfs4_delegation *dp;
 
-	BUG_ON(!fp);
-	/* We assume break_lease is only called once per lease: */
-	BUG_ON(fp->fi_had_conflict);
+	if (!fp) {
+		WARN(1, "(%p)->fl_owner NULL\n", fl);
+		return;
+	}
+	if (fp->fi_had_conflict) {
+		WARN(1, "duplicate break on %p\n", fp);
+		return;
+	}
 	/*
 	 * We don't want the locks code to timeout the lease for us;
 	 * we'll remove it ourself if a delegation isn't returned

From fae5096ad217db2e3368e980c1d86223f786856b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 26 Oct 2012 16:04:08 -0400
Subject: [PATCH 008/111] nfsd: assume writeable exportabled filesystems have
 f_sync

I don't really see how you could claim to support nfsd and not support
fsync somehow.

And in practice a quick look through the exportable filesystems suggests
the only ones without an ->fsync are read-only (efs, isofs, squashfs) or
in-memory (shmem).

Also, performing a write and then returning an error if the sync fails
(as we would do here in the wgather case) seems unhelpful to clients.

Also remove an incorrect comment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c120b48ec305..ed3eb59b607e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1020,21 +1020,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	inode = dentry->d_inode;
 	exp   = fhp->fh_export;
 
-	/*
-	 * Request sync writes if
-	 *  -	the sync export option has been set, or
-	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
-	 *  -   The file system doesn't support fsync().
-	 * When NFSv2 gathered writes have been configured for this volume,
-	 * flushing the data to disk is handled separately below.
-	 */
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
-	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
-	       stable = 2;
-	       *stablep = 2; /* FILE_SYNC */
-	}
-
 	if (!EX_ISSYNC(exp))
 		stable = 0;
 	if (stable && !use_wgather) {

From face15025ffdf664de95e86ae831544154d26c9c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 26 Oct 2012 16:12:31 -0400
Subject: [PATCH 009/111] nfsd: use vfs_fsync_range(), not O_SYNC, for stable
 writes

NFSv4 shares the same struct file across multiple writes.  (And we'd
like NFSv2 and NFSv3 to do that as well some day.)

So setting O_SYNC on the struct file as a way to request a synchronous
write doesn't work.

Instead, do a vfs_fsync_range() in that case.

Reported-by: Peter Staubach <pstaubach@exagrid.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed3eb59b607e..b584205b25b4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1024,11 +1024,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (!EX_ISSYNC(exp))
 		stable = 0;
-	if (stable && !use_wgather) {
-		spin_lock(&file->f_lock);
-		file->f_flags |= O_SYNC;
-		spin_unlock(&file->f_lock);
-	}
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
@@ -1044,8 +1039,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	if (inode->i_mode & (S_ISUID | S_ISGID))
 		kill_suid(dentry);
 
-	if (stable && use_wgather)
-		host_err = wait_for_concurrent_writes(file);
+	if (stable) {
+		if (use_wgather)
+			host_err = wait_for_concurrent_writes(file);
+		else
+			host_err = vfs_fsync_range(file, offset, offset+*cnt, 0);
+	}
 
 out_nfserr:
 	dprintk("nfsd: write complete host_err=%d\n", host_err);

From acb2887e04c2140c2c63c8bf94e0b446efcc7001 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 27 Mar 2012 14:50:26 -0400
Subject: [PATCH 010/111] nfsd4: clean up callback security parsing

Move the callback parsing into a separate function.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 118 +++++++++++++++++++++++++---------------------
 fs/nfsd/state.h   |   9 +++-
 2 files changed, 70 insertions(+), 57 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index af65fda7685a..511f980b605c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -422,6 +422,67 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
 	DECODE_TAIL;
 }
 
+static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+{
+	DECODE_HEAD;
+	u32 dummy;
+	char *machine_name;
+	int i;
+	int nr_secflavs;
+
+	/* callback_sec_params4 */
+	READ_BUF(4);
+	READ32(nr_secflavs);
+	for (i = 0; i < nr_secflavs; ++i) {
+		READ_BUF(4);
+		READ32(dummy);
+		switch (dummy) {
+		case RPC_AUTH_NULL:
+			/* Nothing to read */
+			break;
+		case RPC_AUTH_UNIX:
+			READ_BUF(8);
+			/* stamp */
+			READ32(dummy);
+
+			/* machine name */
+			READ32(dummy);
+			READ_BUF(dummy);
+			SAVEMEM(machine_name, dummy);
+
+			/* uid, gid */
+			READ_BUF(8);
+			READ32(cbs->uid);
+			READ32(cbs->gid);
+
+			/* more gids */
+			READ_BUF(4);
+			READ32(dummy);
+			READ_BUF(dummy * 4);
+			break;
+		case RPC_AUTH_GSS:
+			dprintk("RPC_AUTH_GSS callback secflavor "
+				"not supported!\n");
+			READ_BUF(8);
+			/* gcbp_service */
+			READ32(dummy);
+			/* gcbp_handle_from_server */
+			READ32(dummy);
+			READ_BUF(dummy);
+			p += XDR_QUADLEN(dummy);
+			/* gcbp_handle_from_client */
+			READ_BUF(4);
+			READ32(dummy);
+			READ_BUF(dummy);
+			break;
+		default:
+			dprintk("Illegal callback secflavor\n");
+			return nfserr_inval;
+		}
+	}
+	DECODE_TAIL;
+}
+
 static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
 {
 	DECODE_HEAD;
@@ -1237,11 +1298,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 			    struct nfsd4_create_session *sess)
 {
 	DECODE_HEAD;
-
 	u32 dummy;
-	char *machine_name;
-	int i;
-	int nr_secflavs;
 
 	READ_BUF(16);
 	COPYMEM(&sess->clientid, 8);
@@ -1282,58 +1339,9 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 		goto xdr_error;
 	}
 
-	READ_BUF(8);
+	READ_BUF(4);
 	READ32(sess->callback_prog);
-
-	/* callback_sec_params4 */
-	READ32(nr_secflavs);
-	for (i = 0; i < nr_secflavs; ++i) {
-		READ_BUF(4);
-		READ32(dummy);
-		switch (dummy) {
-		case RPC_AUTH_NULL:
-			/* Nothing to read */
-			break;
-		case RPC_AUTH_UNIX:
-			READ_BUF(8);
-			/* stamp */
-			READ32(dummy);
-
-			/* machine name */
-			READ32(dummy);
-			READ_BUF(dummy);
-			SAVEMEM(machine_name, dummy);
-
-			/* uid, gid */
-			READ_BUF(8);
-			READ32(sess->uid);
-			READ32(sess->gid);
-
-			/* more gids */
-			READ_BUF(4);
-			READ32(dummy);
-			READ_BUF(dummy * 4);
-			break;
-		case RPC_AUTH_GSS:
-			dprintk("RPC_AUTH_GSS callback secflavor "
-				"not supported!\n");
-			READ_BUF(8);
-			/* gcbp_service */
-			READ32(dummy);
-			/* gcbp_handle_from_server */
-			READ32(dummy);
-			READ_BUF(dummy);
-			p += XDR_QUADLEN(dummy);
-			/* gcbp_handle_from_client */
-			READ_BUF(4);
-			READ32(dummy);
-			READ_BUF(dummy);
-			break;
-		default:
-			dprintk("Illegal callback secflavor\n");
-			return nfserr_inval;
-		}
-	}
+	nfsd4_decode_cb_sec(argp, &sess->cb_sec);
 	DECODE_TAIL;
 }
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e036894bce57..df33e781f36c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -150,6 +150,11 @@ struct nfsd4_channel_attrs {
 	u32		rdma_attrs;
 };
 
+struct nfsd4_cb_sec {
+	u32	uid;
+	u32	gid;
+};
+
 struct nfsd4_create_session {
 	clientid_t			clientid;
 	struct nfs4_sessionid		sessionid;
@@ -158,8 +163,7 @@ struct nfsd4_create_session {
 	struct nfsd4_channel_attrs	fore_channel;
 	struct nfsd4_channel_attrs	back_channel;
 	u32				callback_prog;
-	u32				uid;
-	u32				gid;
+	struct nfsd4_cb_sec		cb_sec;
 };
 
 struct nfsd4_bind_conn_to_session {
@@ -192,6 +196,7 @@ struct nfsd4_session {
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
+	struct nfsd4_cb_sec	se_cb_sec;
 	struct list_head	se_conns;
 	u32			se_cb_prog;
 	u32			se_cb_seq_nr;

From c6bb3ca27d78b902baa143b931a8d9ef53298afa Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 1 Nov 2012 16:31:02 -0400
Subject: [PATCH 011/111] nfsd4: use callback security parameters in
 create_session

We're currently ignoring the callback security parameters specified in
create_session, and just assuming the client wants auth_sys, because
that's all the current linux client happens to care about.  But this
could cause us callbacks to fail to a client that wanted something
different.

For now, all we're doing is no longer ignoring the uid and gid passed in
the auth_sys case.  Further patches will add support for auth_null and
gss (and possibly use more of the auth_sys information; the spec wants
us to use exactly the credential we're passed, though it's hard to
imagine why a client would care).

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 49 ++++++++++++++++++++++++++++++------------
 fs/nfsd/nfs4state.c    |  1 +
 fs/nfsd/state.h        |  1 +
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index bdf29c96e4cd..b32639ee0a42 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -630,6 +630,31 @@ static int max_cb_time(void)
 	return max(nfsd4_lease/10, (time_t)1) * HZ;
 }
 
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
+{
+	if (callback_cred)
+		return 0;
+	callback_cred = rpc_lookup_machine_cred("nfs");
+	if (!callback_cred)
+		return -ENOMEM;
+	return 0;
+}
+
+struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
+{
+	if (clp->cl_minorversion == 0) {
+		return get_rpccred(callback_cred);
+	} else {
+		struct rpc_auth *auth = client->cl_auth;
+		struct auth_cred acred = {};
+
+		acred.uid = ses->se_cb_sec.uid;
+		acred.gid = ses->se_cb_sec.gid;
+		return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0);
+	}
+}
 
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
@@ -648,6 +673,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 	};
 	struct rpc_clnt *client;
+	struct rpc_cred *cred;
 
 	if (clp->cl_minorversion == 0) {
 		if (!clp->cl_cred.cr_principal &&
@@ -675,7 +701,13 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 			PTR_ERR(client));
 		return PTR_ERR(client);
 	}
+	cred = get_backchannel_cred(clp, client, ses);
+	if (IS_ERR(cred)) {
+		rpc_shutdown_client(client);
+		return PTR_ERR(cred);
+	}
 	clp->cl_cb_client = client;
+	clp->cl_cb_cred = cred;
 	return 0;
 
 }
@@ -714,18 +746,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *callback_cred;
-
-int set_callback_cred(void)
-{
-	if (callback_cred)
-		return 0;
-	callback_cred = rpc_lookup_machine_cred("nfs");
-	if (!callback_cred)
-		return -ENOMEM;
-	return 0;
-}
-
 static struct workqueue_struct *callback_wq;
 
 static void run_nfsd4_cb(struct nfsd4_callback *cb)
@@ -743,7 +763,6 @@ static void do_probe_callback(struct nfs4_client *clp)
 	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
 	cb->cb_msg.rpc_argp = NULL;
 	cb->cb_msg.rpc_resp = NULL;
-	cb->cb_msg.rpc_cred = callback_cred;
 
 	cb->cb_ops = &nfsd4_cb_probe_ops;
 
@@ -962,6 +981,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
 	if (clp->cl_cb_client) {
 		rpc_shutdown_client(clp->cl_cb_client);
 		clp->cl_cb_client = NULL;
+		put_rpccred(clp->cl_cb_cred);
+		clp->cl_cb_cred = NULL;
 	}
 	if (clp->cl_cb_conn.cb_xprt) {
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -1010,6 +1031,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
 		nfsd4_release_cb(cb);
 		return;
 	}
+	cb->cb_msg.rpc_cred = clp->cl_cb_cred;
 	rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 			cb->cb_ops, cb);
 }
@@ -1025,7 +1047,6 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp)
 	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
 	cb->cb_msg.rpc_argp = cb;
 	cb->cb_msg.rpc_resp = cb;
-	cb->cb_msg.rpc_cred = callback_cred;
 
 	cb->cb_ops = &nfsd4_cb_recall_ops;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index db7258c13423..dbbbd2fe5236 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -957,6 +957,7 @@ void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4
 	new->se_cb_seq_nr = 1;
 	new->se_flags = cses->flags;
 	new->se_cb_prog = cses->callback_prog;
+	new->se_cb_sec = cses->cb_sec;
 	kref_init(&new->se_ref);
 	idx = hash_sessionid(&new->se_sessionid);
 	spin_lock(&client_lock);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index df33e781f36c..bff856c34a32 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -250,6 +250,7 @@ struct nfs4_client {
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
 	unsigned long		cl_flags;
+	struct rpc_cred		*cl_cb_cred;
 	struct rpc_clnt		*cl_cb_client;
 	u32			cl_cb_ident;
 #define NFSD4_CB_UP		0

From cb73a9f4649bf63c0397e565a15abf8a91ecf56f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 1 Nov 2012 18:09:48 -0400
Subject: [PATCH 012/111] nfsd4: implement backchannel_ctl operation

This operation is mandatory for servers to implement.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/nfs/nfs41-server.txt |  7 +------
 fs/nfsd/nfs4proc.c                             |  6 ++++++
 fs/nfsd/nfs4state.c                            | 14 ++++++++++++++
 fs/nfsd/nfs4xdr.c                              | 13 ++++++++++++-
 fs/nfsd/state.h                                |  5 +++++
 fs/nfsd/xdr4.h                                 |  2 ++
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 092fad92a3f0..f5ddff2da302 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -39,11 +39,6 @@ interoperability problems with future clients.  Known issues:
 	  from a linux client are possible, but we aren't really
 	  conformant with the spec (for example, we don't use kerberos
 	  on the backchannel correctly).
-	- Incomplete backchannel support: incomplete backchannel gss
-	  support and no support for BACKCHANNEL_CTL mean that
-	  callbacks (hence delegations and layouts) may not be
-	  available and clients confused by the incomplete
-	  implementation may fail.
 	- We do not support SSV, which provides security for shared
 	  client-server state (thus preventing unauthorized tampering
 	  with locks and opens, for example).  It is mandatory for
@@ -89,7 +84,7 @@ Operations
    |                      | MNI        | or OPT)      |                |
    +----------------------+------------+--------------+----------------+
    | ACCESS               | REQ        |              | Section 18.1   |
-NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
+I  | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
 I  | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
    | CLOSE                | REQ        |              | Section 18.2   |
    | COMMIT               | REQ        |              | Section 18.3   |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6c9a4b291dba..f955176f1b6f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1666,6 +1666,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_EXCHANGE_ID",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
 	},
+	[OP_BACKCHANNEL_CTL] = {
+		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
+		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
+		.op_name = "OP_BACKCHANNEL_CTL",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+	},
 	[OP_BIND_CONN_TO_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dbbbd2fe5236..4023e77687ee 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1865,6 +1865,20 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 	return nfserr_inval;
 }
 
+__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
+{
+	struct nfsd4_session *session = cstate->session;
+
+	spin_lock(&client_lock);
+	session->se_cb_prog = bc->bc_cb_program;
+	session->se_cb_sec = bc->bc_cb_sec;
+	spin_unlock(&client_lock);
+
+	nfsd4_probe_callback(session->se_client);
+
+	return nfs_ok;
+}
+
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_bind_conn_to_session *bcts)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 511f980b605c..d7e7c110246e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -483,6 +483,17 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
 	DECODE_TAIL;
 }
 
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(bc->bc_cb_program);
+	nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+
+	DECODE_TAIL;
+}
+
 static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
 {
 	DECODE_HEAD;
@@ -1536,7 +1547,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
 	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_notsupp,
 
 	/* new operations for NFSv4.1 */
-	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_backchannel_ctl,
 	[OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
 	[OP_EXCHANGE_ID]	= (nfsd4_dec)nfsd4_decode_exchange_id,
 	[OP_CREATE_SESSION]	= (nfsd4_dec)nfsd4_decode_create_session,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index bff856c34a32..758bc9c2646b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -166,6 +166,11 @@ struct nfsd4_create_session {
 	struct nfsd4_cb_sec		cb_sec;
 };
 
+struct nfsd4_backchannel_ctl {
+	u32	bc_cb_program;
+	struct nfsd4_cb_sec		bc_cb_sec;
+};
+
 struct nfsd4_bind_conn_to_session {
 	struct nfs4_sessionid		sessionid;
 	u32				dir;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index acd127d4ee82..71c5c47f2750 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -462,6 +462,7 @@ struct nfsd4_op {
 
 		/* NFSv4.1 */
 		struct nfsd4_exchange_id	exchange_id;
+		struct nfsd4_backchannel_ctl	backchannel_ctl;
 		struct nfsd4_bind_conn_to_session bind_conn_to_session;
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
@@ -566,6 +567,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 		struct nfsd4_sequence *seq);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
+extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
 extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
 extern __be32 nfsd4_create_session(struct svc_rqst *,
 		struct nfsd4_compound_state *,

From 57725155dc1b8c78b7a96886d5cdc69dc89e9c54 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 5 Nov 2012 15:10:26 -0500
Subject: [PATCH 013/111] nfsd4: common helper to initialize callback work

I've found it confusing having the only references to
nfsd4_do_callback_rpc() in a different file.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 7 ++++++-
 fs/nfsd/nfs4state.c    | 4 ++--
 fs/nfsd/state.h        | 2 +-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index b32639ee0a42..a1aa18db08fb 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1016,7 +1016,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
 		run_nfsd4_cb(cb);
 }
 
-void nfsd4_do_callback_rpc(struct work_struct *w)
+static void nfsd4_do_callback_rpc(struct work_struct *w)
 {
 	struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
 	struct nfs4_client *clp = cb->cb_clp;
@@ -1036,6 +1036,11 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
 			cb->cb_ops, cb);
 }
 
+void nfsd4_init_callback(struct nfsd4_callback *cb)
+{
+	INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
+}
+
 void nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfsd4_callback *cb = &dp->dl_recall;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4023e77687ee..13f3471b02a2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -340,7 +340,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
 	fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+	nfsd4_init_callback(&dp->dl_recall);
 	return dp;
 }
 
@@ -1313,7 +1313,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	INIT_LIST_HEAD(&clp->cl_lru);
 	INIT_LIST_HEAD(&clp->cl_callbacks);
 	spin_lock_init(&clp->cl_lock);
-	INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
+	nfsd4_init_callback(&clp->cl_cb_null);
 	clp->cl_time = get_seconds();
 	clear_bit(0, &clp->cl_cb_slot_busy);
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 758bc9c2646b..0fd342a2174e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -470,10 +470,10 @@ extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);
 extern void nfs4_free_openowner(struct nfs4_openowner *);
 extern void nfs4_free_lockowner(struct nfs4_lockowner *);
 extern int set_callback_cred(void);
+extern void nfsd4_init_callback(struct nfsd4_callback *);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
-extern void nfsd4_do_callback_rpc(struct work_struct *);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);

From 12fc3e92d4b18b4e99af624586e1696479ff36ce Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 5 Nov 2012 16:01:48 -0500
Subject: [PATCH 014/111] nfsd4: backchannel should use client-provided
 security flavor

For now this only adds support for AUTH_NULL.  (Previously we assumed
AUTH_UNIX.)  We'll also need AUTH_GSS, which is trickier.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c |  3 +--
 fs/nfsd/nfs4xdr.c      | 14 +++++++++++---
 fs/nfsd/state.h        |  1 +
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a1aa18db08fb..7bb187ac1492 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -692,7 +692,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
 		args.protocol = XPRT_TRANSPORT_BC_TCP;
-		args.authflavor = RPC_AUTH_UNIX;
+		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
 	client = rpc_create(&args);
@@ -709,7 +709,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 	clp->cl_cb_client = client;
 	clp->cl_cb_cred = cred;
 	return 0;
-
 }
 
 static void warn_no_callback_path(struct nfs4_client *clp, int reason)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d7e7c110246e..406d0c4620f6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -425,7 +425,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
 static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
 {
 	DECODE_HEAD;
-	u32 dummy;
+	u32 dummy, uid, gid;
 	char *machine_name;
 	int i;
 	int nr_secflavs;
@@ -433,12 +433,15 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
 	/* callback_sec_params4 */
 	READ_BUF(4);
 	READ32(nr_secflavs);
+	cbs->flavor = (u32)(-1);
 	for (i = 0; i < nr_secflavs; ++i) {
 		READ_BUF(4);
 		READ32(dummy);
 		switch (dummy) {
 		case RPC_AUTH_NULL:
 			/* Nothing to read */
+			if (cbs->flavor == (u32)(-1))
+				cbs->flavor = RPC_AUTH_NULL;
 			break;
 		case RPC_AUTH_UNIX:
 			READ_BUF(8);
@@ -452,13 +455,18 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
 
 			/* uid, gid */
 			READ_BUF(8);
-			READ32(cbs->uid);
-			READ32(cbs->gid);
+			READ32(uid);
+			READ32(gid);
 
 			/* more gids */
 			READ_BUF(4);
 			READ32(dummy);
 			READ_BUF(dummy * 4);
+			if (cbs->flavor == (u32)(-1)) {
+				cbs->uid = uid;
+				cbs->gid = gid;
+				cbs->flavor = RPC_AUTH_UNIX;
+			}
 			break;
 		case RPC_AUTH_GSS:
 			dprintk("RPC_AUTH_GSS callback secflavor "
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0fd342a2174e..0498053b8f0e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -151,6 +151,7 @@ struct nfsd4_channel_attrs {
 };
 
 struct nfsd4_cb_sec {
+	u32	flavor; /* (u32)(-1) used to mean "no valid flavor" */
 	u32	uid;
 	u32	gid;
 };

From 292a41716a4ad3e93c57155f99786abe8a8d386a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 7 Nov 2012 19:41:51 -0500
Subject: [PATCH 015/111] nfsd4: update documentation on 4.1 progress

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/nfs/nfs41-server.txt | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index f5ddff2da302..392ef637e101 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -43,12 +43,6 @@ interoperability problems with future clients.  Known issues:
 	  client-server state (thus preventing unauthorized tampering
 	  with locks and opens, for example).  It is mandatory for
 	  servers to support this, though no clients use it yet.
-	- Mandatory operations which we do not support, such as
-	  DESTROY_CLIENTID, are not currently used by clients, but will be
-	  (and the spec recommends their uses in common cases), and
-	  clients should not be expected to know how to recover from the
-	  case where they are not supported.  This will eventually cause
-	  interoperability failures.
 
 In addition, some limitations are inherited from the current NFSv4
 implementation:
@@ -94,7 +88,7 @@ NS*| DELEGPURGE           | OPT        | FDELG (REQ)  | Section 18.5   |
    | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
    |                      |            | DDELG, pNFS  |                |
    |                      |            | (REQ)        |                |
-NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
+I  | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
 I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
 I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
 I  | FREE_STATEID         | REQ        |              | Section 18.38  |
@@ -187,7 +181,6 @@ EXCHANGE_ID:
 
 CREATE_SESSION:
 * backchannel attributes are ignored
-* backchannel security parameters are ignored
 
 SEQUENCE:
 * no support for dynamic slot table renegotiation (optional)

From 698d8d875a0593f65092f6619d97de49bc5caa45 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 9 Nov 2012 15:31:53 -0500
Subject: [PATCH 016/111] nfsd: fix error handling in nfsd4_remove_clid_dir

If the credential save fails, then we'll leak our mnt_want_write_file
reference.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 43295d45cc2b..0f1e2e21f7d9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -301,12 +301,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
-		goto out;
+		goto out_drop_write;
 
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
 	if (status == 0)
 		vfs_fsync(rec_file, 0);
+out_drop_write:
 	mnt_drop_write_file(rec_file);
 out:
 	if (status)

From a0af710a6510213672d28f83681c391d36a7555e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 9 Nov 2012 15:06:38 -0500
Subject: [PATCH 017/111] nfsd: remove unused argument to
 nfs4_has_reclaimed_state

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 2 +-
 fs/nfsd/nfs4state.c   | 2 +-
 fs/nfsd/state.h       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 0f1e2e21f7d9..151921bd164e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -320,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
 {
 	int status;
 
-	if (nfs4_has_reclaimed_state(child->d_name.name, false))
+	if (nfs4_has_reclaimed_state(child->d_name.name))
 		return 0;
 
 	status = vfs_rmdir(parent->d_inode, child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 13f3471b02a2..d6b602a92657 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4484,7 +4484,7 @@ alloc_reclaim(void)
 }
 
 int
-nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
+nfs4_has_reclaimed_state(const char *name)
 {
 	unsigned int strhashval = clientstr_hashval(name);
 	struct nfs4_client *clp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0498053b8f0e..8053b5747960 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -482,7 +482,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 extern int nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern int nfs4_has_reclaimed_state(const char *name);
 extern void release_session_client(struct nfsd4_session *);
 extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
 

From 2873d2147e1e14b82367bde14354a011ffda0496 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:48 -0500
Subject: [PATCH 018/111] nfsd: add a usermodehelper upcall for NFSv4 client ID
 tracking

Add a new client tracker upcall type that uses call_usermodehelper to
call out to a program. This seems to be the preferred method of
calling out to usermode these days for seldom-called upcalls. It's
simple and doesn't require a running daemon, so it should "just work"
as long as the binary is installed.

The client tracking exit operation is also changed to check for a
NULL pointer before running. The UMH upcall doesn't need to do anything
at module teardown time.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 134 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 133 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 151921bd164e..2fc2f6cb8d95 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -927,6 +927,137 @@ static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
 	.grace_done	= nfsd4_cld_grace_done,
 };
 
+/* upcall via usermodehelper */
+static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
+module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
+			S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
+
+static int
+nfsd4_umh_cltrack_upcall(char *cmd, char *arg)
+{
+	char *envp[] = { NULL };
+	char *argv[4];
+	int ret;
+
+	if (unlikely(!cltrack_prog[0])) {
+		dprintk("%s: cltrack_prog is disabled\n", __func__);
+		return -EACCES;
+	}
+
+	dprintk("%s: cmd: %s\n", __func__, cmd);
+	dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
+
+	argv[0] = (char *)cltrack_prog;
+	argv[1] = cmd;
+	argv[2] = arg;
+	argv[3] = NULL;
+
+	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+	/*
+	 * Disable the upcall mechanism if we're getting an ENOENT or EACCES
+	 * error. The admin can re-enable it on the fly by using sysfs
+	 * once the problem has been fixed.
+	 */
+	if (ret == -ENOENT || ret == -EACCES) {
+		dprintk("NFSD: %s was not found or isn't executable (%d). "
+			"Setting cltrack_prog to blank string!",
+			cltrack_prog, ret);
+		cltrack_prog[0] = '\0';
+	}
+	dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret);
+
+	return ret;
+}
+
+static char *
+bin_to_hex_dup(const unsigned char *src, int srclen)
+{
+	int i;
+	char *buf, *hex;
+
+	/* +1 for terminating NULL */
+	buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
+	if (!buf)
+		return buf;
+
+	hex = buf;
+	for (i = 0; i < srclen; i++) {
+		sprintf(hex, "%2.2x", *src++);
+		hex += 2;
+	}
+	return buf;
+}
+
+static int
+nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net)
+{
+	return nfsd4_umh_cltrack_upcall("init", NULL);
+}
+
+static void
+nfsd4_umh_cltrack_create(struct nfs4_client *clp)
+{
+	char *hexid;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return;
+	}
+	nfsd4_umh_cltrack_upcall("create", hexid);
+	kfree(hexid);
+}
+
+static void
+nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
+{
+	char *hexid;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return;
+	}
+	nfsd4_umh_cltrack_upcall("remove", hexid);
+	kfree(hexid);
+}
+
+static int
+nfsd4_umh_cltrack_check(struct nfs4_client *clp)
+{
+	int ret;
+	char *hexid;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return -ENOMEM;
+	}
+	ret = nfsd4_umh_cltrack_upcall("check", hexid);
+	kfree(hexid);
+	return ret;
+}
+
+static void
+nfsd4_umh_cltrack_grace_done(struct net __attribute__((unused)) *net,
+				time_t boot_time)
+{
+	char timestr[22]; /* FIXME: better way to determine max size? */
+
+	sprintf(timestr, "%ld", boot_time);
+	nfsd4_umh_cltrack_upcall("gracedone", timestr);
+}
+
+static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
+	.init		= nfsd4_umh_cltrack_init,
+	.exit		= NULL,
+	.create		= nfsd4_umh_cltrack_create,
+	.remove		= nfsd4_umh_cltrack_remove,
+	.check		= nfsd4_umh_cltrack_check,
+	.grace_done	= nfsd4_umh_cltrack_grace_done,
+};
+
 int
 nfsd4_client_tracking_init(struct net *net)
 {
@@ -957,7 +1088,8 @@ void
 nfsd4_client_tracking_exit(struct net *net)
 {
 	if (client_tracking_ops) {
-		client_tracking_ops->exit(net);
+		if (client_tracking_ops->exit)
+			client_tracking_ops->exit(net);
 		client_tracking_ops = NULL;
 	}
 }

From 2d77bf0a55d64559adb2d48a37bc7e876d6adc11 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:49 -0500
Subject: [PATCH 019/111] nfsd: change heuristic for selecting the
 client_tracking_ops

First, try to use the new usermodehelper upcall. It should succeed or
fail quickly, so there's little cost to doing so.

If it fails, and the legacy tracking dir exists, use that. If it
doesn't exist then fall back to using nfsdcld.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2fc2f6cb8d95..e71f713bd7c0 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1064,17 +1064,35 @@ nfsd4_client_tracking_init(struct net *net)
 	int status;
 	struct path path;
 
-	if (!client_tracking_ops) {
-		client_tracking_ops = &nfsd4_cld_tracking_ops;
-		status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
-		if (!status) {
-			if (S_ISDIR(path.dentry->d_inode->i_mode))
-				client_tracking_ops =
-						&nfsd4_legacy_tracking_ops;
-			path_put(&path);
-		}
+	/* just run the init if it the method is already decided */
+	if (client_tracking_ops)
+		goto do_init;
+
+	/*
+	 * First, try a UMH upcall. It should succeed or fail quickly, so
+	 * there's little harm in trying that first.
+	 */
+	client_tracking_ops = &nfsd4_umh_tracking_ops;
+	status = client_tracking_ops->init(net);
+	if (!status)
+		return status;
+
+	/*
+	 * See if the recoverydir exists and is a directory. If it is,
+	 * then use the legacy ops.
+	 */
+	client_tracking_ops = &nfsd4_legacy_tracking_ops;
+	status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
+	if (!status) {
+		status = S_ISDIR(path.dentry->d_inode->i_mode);
+		path_put(&path);
+		if (status)
+			goto do_init;
 	}
 
+	/* Finally, try to use nfsdcld */
+	client_tracking_ops = &nfsd4_cld_tracking_ops;
+do_init:
 	status = client_tracking_ops->init(net);
 	if (status) {
 		printk(KERN_WARNING "NFSD: Unable to initialize client "

From f3aa7e24c91ee3fd387150c2c5a9934b09f44ec5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:50 -0500
Subject: [PATCH 020/111] nfsd: pass info about the legacy recoverydir in
 environment variables

The usermodehelper upcall program can then decide to use this info as
a (one-way) transition mechanism to the new scheme. When a "check"
upcall occurs and the client doesn't exist in the database, we can
look to see whether the directory exists. If it does, then we'd add
the client to the database, remove the legacy recdir, and return
success to the kernel to allow the recovery to proceed.

For gracedone, we simply pass the v4recovery "topdir" so that the
upcall can clean it out prior to returning to the kernel.

A module parm is also added to disable the legacy conversion if
the admin chooses.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 92 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e71f713bd7c0..38af61556895 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -933,10 +933,75 @@ module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
 			S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
 
-static int
-nfsd4_umh_cltrack_upcall(char *cmd, char *arg)
+static bool cltrack_legacy_disable;
+module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_legacy_disable,
+		"Disable legacy recoverydir conversion. Default: false");
+
+#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
+#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
+
+static char *
+nfsd4_cltrack_legacy_topdir(void)
 {
-	char *envp[] = { NULL };
+	int copied;
+	size_t len;
+	char *result;
+
+	if (cltrack_legacy_disable)
+		return NULL;
+
+	len = strlen(LEGACY_TOPDIR_ENV_PREFIX) +
+		strlen(nfs4_recoverydir()) + 1;
+
+	result = kmalloc(len, GFP_KERNEL);
+	if (!result)
+		return result;
+
+	copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s",
+				nfs4_recoverydir());
+	if (copied >= len) {
+		/* just return nothing if output was truncated */
+		kfree(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static char *
+nfsd4_cltrack_legacy_recdir(const char *recdir)
+{
+	int copied;
+	size_t len;
+	char *result;
+
+	if (cltrack_legacy_disable)
+		return NULL;
+
+	/* +1 is for '/' between "topdir" and "recdir" */
+	len = strlen(LEGACY_RECDIR_ENV_PREFIX) +
+		strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN;
+
+	result = kmalloc(len, GFP_KERNEL);
+	if (!result)
+		return result;
+
+	copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/%s",
+				nfs4_recoverydir(), recdir);
+	if (copied >= len) {
+		/* just return nothing if output was truncated */
+		kfree(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static int
+nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
+{
+	char *envp[2];
 	char *argv[4];
 	int ret;
 
@@ -947,6 +1012,10 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg)
 
 	dprintk("%s: cmd: %s\n", __func__, cmd);
 	dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
+	dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)");
+
+	envp[0] = legacy;
+	envp[1] = NULL;
 
 	argv[0] = (char *)cltrack_prog;
 	argv[1] = cmd;
@@ -992,7 +1061,7 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
 static int
 nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net)
 {
-	return nfsd4_umh_cltrack_upcall("init", NULL);
+	return nfsd4_umh_cltrack_upcall("init", NULL, NULL);
 }
 
 static void
@@ -1005,7 +1074,7 @@ nfsd4_umh_cltrack_create(struct nfs4_client *clp)
 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
 		return;
 	}
-	nfsd4_umh_cltrack_upcall("create", hexid);
+	nfsd4_umh_cltrack_upcall("create", hexid, NULL);
 	kfree(hexid);
 }
 
@@ -1019,7 +1088,7 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
 		return;
 	}
-	nfsd4_umh_cltrack_upcall("remove", hexid);
+	nfsd4_umh_cltrack_upcall("remove", hexid, NULL);
 	kfree(hexid);
 }
 
@@ -1027,14 +1096,16 @@ static int
 nfsd4_umh_cltrack_check(struct nfs4_client *clp)
 {
 	int ret;
-	char *hexid;
+	char *hexid, *legacy;
 
 	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
 	if (!hexid) {
 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
 		return -ENOMEM;
 	}
-	ret = nfsd4_umh_cltrack_upcall("check", hexid);
+	legacy = nfsd4_cltrack_legacy_recdir(clp->cl_recdir);
+	ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy);
+	kfree(legacy);
 	kfree(hexid);
 	return ret;
 }
@@ -1043,10 +1114,13 @@ static void
 nfsd4_umh_cltrack_grace_done(struct net __attribute__((unused)) *net,
 				time_t boot_time)
 {
+	char *legacy;
 	char timestr[22]; /* FIXME: better way to determine max size? */
 
 	sprintf(timestr, "%ld", boot_time);
-	nfsd4_umh_cltrack_upcall("gracedone", timestr);
+	legacy = nfsd4_cltrack_legacy_topdir();
+	nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy);
+	kfree(legacy);
 }
 
 static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {

From 8b0554e9a24298c91de89a779a714c87073380a2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:51 -0500
Subject: [PATCH 021/111] nfsd: warn about impending removal of nfsdcld upcall

Let's shoot for removing the nfsdcld upcall in 3.10. Most likely,
no one is actually using it so I don't expect this warning to
fire often (except maybe on misconfigured systems).

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 38af61556895..6aaf5d92a43c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1166,6 +1166,9 @@ nfsd4_client_tracking_init(struct net *net)
 
 	/* Finally, try to use nfsdcld */
 	client_tracking_ops = &nfsd4_cld_tracking_ops;
+	printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
+			"removed in 3.10. Please transition to using "
+			"nfsdcltrack.\n");
 do_init:
 	status = client_tracking_ops->init(net);
 	if (status) {

From 278c931cb05ae624df8c82b6bdfbb0e03392cde7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:52 -0500
Subject: [PATCH 022/111] nfsd: have nfsd4_find_reclaim_client take a char *
 argument

Currently, it takes a client pointer, but later we're going to need to
search for these records without knowing whether a matching client even
exists.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c |  2 +-
 fs/nfsd/nfs4state.c   | 11 ++++-------
 fs/nfsd/state.h       |  2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 6aaf5d92a43c..4e92fb38cfb2 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -486,7 +486,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
 		return 0;
 
 	/* look for it in the reclaim hashtable otherwise */
-	if (nfsd4_find_reclaim_client(clp)) {
+	if (nfsd4_find_reclaim_client(clp->cl_recdir)) {
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 		return 0;
 	}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d6b602a92657..18e554942da3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4537,19 +4537,16 @@ nfs4_release_reclaim(void)
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
 struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(struct nfs4_client *clp)
+nfsd4_find_reclaim_client(const char *recdir)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
-		            clp->cl_name.len, clp->cl_name.data,
-			    clp->cl_recdir);
+	dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
 
-	/* find clp->cl_name in reclaim_str_hashtbl */
-	strhashval = clientstr_hashval(clp->cl_recdir);
+	strhashval = clientstr_hashval(recdir);
 	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
+		if (same_name(crp->cr_recdir, recdir)) {
 			return crp;
 		}
 	}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 8053b5747960..c41c28020cad 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -466,7 +466,7 @@ extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
 extern void nfs4_release_reclaim(void);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp);
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);
 extern void nfs4_free_openowner(struct nfs4_openowner *);
 extern void nfs4_free_lockowner(struct nfs4_lockowner *);

From ce30e5392fcb26b6aa53bb16d06da1d7d8bb0863 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:53 -0500
Subject: [PATCH 023/111] nfsd: break out reclaim record removal into separate
 function

We'll need to be able to call this from nfs4recover.c eventually.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 12 +++++++++---
 fs/nfsd/state.h     |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 18e554942da3..24dcda2b327c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4516,6 +4516,14 @@ nfs4_client_to_reclaim(const char *name)
 	return 1;
 }
 
+void
+nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp)
+{
+	list_del(&crp->cr_strhash);
+	kfree(crp);
+	reclaim_str_hashtbl_size--;
+}
+
 void
 nfs4_release_reclaim(void)
 {
@@ -4526,9 +4534,7 @@ nfs4_release_reclaim(void)
 		while (!list_empty(&reclaim_str_hashtbl[i])) {
 			crp = list_entry(reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
-			list_del(&crp->cr_strhash);
-			kfree(crp);
-			reclaim_str_hashtbl_size--;
+			nfs4_remove_reclaim_record(crp);
 		}
 	}
 	BUG_ON(reclaim_str_hashtbl_size);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c41c28020cad..3528616c955e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -465,6 +465,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net,
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
+void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *);
 extern void nfs4_release_reclaim(void);
 extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);

From 772a9bbbb5769c646c74452ef21df538bbe2ebf0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:54 -0500
Subject: [PATCH 024/111] nfsd: make nfs4_client_to_reclaim return a pointer to
 the reclaim record

Later callers will need to make changes to the record.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 20 ++++++++++----------
 fs/nfsd/state.h     |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 24dcda2b327c..1c6f82e4335e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4498,22 +4498,22 @@ nfs4_has_reclaimed_state(const char *name)
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-int
+struct nfs4_client_reclaim *
 nfs4_client_to_reclaim(const char *name)
 {
 	unsigned int strhashval;
-	struct nfs4_client_reclaim *crp = NULL;
+	struct nfs4_client_reclaim *crp;
 
 	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
 	crp = alloc_reclaim();
-	if (!crp)
-		return 0;
-	strhashval = clientstr_hashval(name);
-	INIT_LIST_HEAD(&crp->cr_strhash);
-	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
-	reclaim_str_hashtbl_size++;
-	return 1;
+	if (crp) {
+		strhashval = clientstr_hashval(name);
+		INIT_LIST_HEAD(&crp->cr_strhash);
+		list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+		reclaim_str_hashtbl_size++;
+	}
+	return crp;
 }
 
 void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3528616c955e..3f8b26b9b47b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -482,7 +482,7 @@ extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern int nfs4_client_to_reclaim(const char *name);
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name);
 extern int nfs4_has_reclaimed_state(const char *name);
 extern void release_session_client(struct nfsd4_session *);
 extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);

From 0ce0c2b5d23080eec39ccc52354be1eea326ed5f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:55 -0500
Subject: [PATCH 025/111] nfsd: don't search for client by hash on legacy
 reboot recovery gracedone

When nfsd starts, the legacy reboot recovery code creates a tracking
struct for each directory in the v4recoverydir. When the grace period
ends, it basically does a "readdir" on the directory again, and matches
each dentry in there to an existing client id to see if it should be
removed or not. If the matching client doesn't exist, or hasn't
reclaimed its state then it will remove that dentry.

This is pretty inefficient since it involves doing a lot of hash-bucket
searching. It also means that we have to keep relying on being able to
search for a nfs4_client by md5 hashed cl_recdir name.

Instead, add a pointer to the nfs4_client that indicates the association
between the nfs4_client_reclaim and nfs4_client. When a reclaim operation
comes in, we set the pointer to make that association. On gracedone, the
legacy client tracker will keep the recdir around iff:

1/ there is a reclaim record for the directory

...and...

2/ there's an association between the reclaim record and a client record
-- that is, a create or check operation was performed on the client that
matches that directory.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 31 +++++++++++++++++++++++++++----
 fs/nfsd/nfs4state.c   | 12 +++++-------
 fs/nfsd/state.h       |  4 ++--
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 4e92fb38cfb2..3048c012d4bc 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -65,6 +65,7 @@ struct nfsd4_client_tracking_ops {
 static struct file *rec_file;
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 static struct nfsd4_client_tracking_ops *client_tracking_ops;
+static bool in_grace;
 
 static int
 nfs4_save_creds(const struct cred **original_creds)
@@ -142,6 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	const struct cred *original_cred;
 	char *dname = clp->cl_recdir;
 	struct dentry *dir, *dentry;
+	struct nfs4_client_reclaim *crp;
 	int status;
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
@@ -182,13 +184,19 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	dput(dentry);
 out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
-	if (status == 0)
+	if (status == 0) {
+		if (in_grace) {
+			crp = nfs4_client_to_reclaim(clp->cl_recdir);
+			if (crp)
+				crp->cr_clp = clp;
+		}
 		vfs_fsync(rec_file, 0);
-	else
+	} else {
 		printk(KERN_ERR "NFSD: failed to write recovery record"
 				" (err %d); please check that %s exists"
 				" and is writeable", status,
 				user_recovery_dirname);
+	}
 	mnt_drop_write_file(rec_file);
 	nfs4_reset_creds(original_cred);
 }
@@ -289,6 +297,7 @@ static void
 nfsd4_remove_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
+	struct nfs4_client_reclaim *crp;
 	int status;
 
 	if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -305,8 +314,15 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
-	if (status == 0)
+	if (status == 0) {
 		vfs_fsync(rec_file, 0);
+		if (in_grace) {
+			/* remove reclaim record */
+			crp = nfsd4_find_reclaim_client(clp->cl_recdir);
+			if (crp)
+				nfs4_remove_reclaim_record(crp);
+		}
+	}
 out_drop_write:
 	mnt_drop_write_file(rec_file);
 out:
@@ -336,6 +352,7 @@ nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
 {
 	int status;
 
+	in_grace = false;
 	if (!rec_file)
 		return;
 	status = mnt_want_write_file(rec_file);
@@ -410,6 +427,8 @@ nfsd4_init_recdir(void)
 	}
 
 	nfs4_reset_creds(original_cred);
+	if (!status)
+		in_grace = true;
 	return status;
 }
 
@@ -481,13 +500,17 @@ nfs4_recoverydir(void)
 static int
 nfsd4_check_legacy_client(struct nfs4_client *clp)
 {
+	struct nfs4_client_reclaim *crp;
+
 	/* did we already find that this client is stable? */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return 0;
 
 	/* look for it in the reclaim hashtable otherwise */
-	if (nfsd4_find_reclaim_client(clp->cl_recdir)) {
+	crp = nfsd4_find_reclaim_client(clp->cl_recdir);
+	if (crp) {
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+		crp->cr_clp = clp;
 		return 0;
 	}
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1c6f82e4335e..559ab574d46b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4483,16 +4483,13 @@ alloc_reclaim(void)
 	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
-int
+bool
 nfs4_has_reclaimed_state(const char *name)
 {
-	unsigned int strhashval = clientstr_hashval(name);
-	struct nfs4_client *clp;
+	struct nfs4_client_reclaim *crp;
 
-	clp = find_confirmed_client_by_str(name, strhashval);
-	if (!clp)
-		return 0;
-	return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+	crp = nfsd4_find_reclaim_client(name);
+	return (crp && crp->cr_clp);
 }
 
 /*
@@ -4511,6 +4508,7 @@ nfs4_client_to_reclaim(const char *name)
 		INIT_LIST_HEAD(&crp->cr_strhash);
 		list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
 		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+		crp->cr_clp = NULL;
 		reclaim_str_hashtbl_size++;
 	}
 	return crp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3f8b26b9b47b..cf9f7ba4df8d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -304,6 +304,7 @@ is_client_expired(struct nfs4_client *clp)
  */
 struct nfs4_client_reclaim {
 	struct list_head	cr_strhash;	/* hash by cr_name */
+	struct nfs4_client	*cr_clp;	/* pointer to associated clp */
 	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
 };
 
@@ -464,7 +465,6 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net,
 		stateid_t *stateid, int flags, struct file **filp);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
-extern int nfs4_in_grace(void);
 void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *);
 extern void nfs4_release_reclaim(void);
 extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir);
@@ -483,7 +483,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name);
+extern bool nfs4_has_reclaimed_state(const char *name);
 extern void release_session_client(struct nfsd4_session *);
 extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
 

From ac55fdc408039b425a2fa3cbcaed7444e5339f9a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:56 -0500
Subject: [PATCH 026/111] nfsd: move the confirmed and unconfirmed hlists to a
 rbtree

The current code requires that we md5 hash the name in order to store
the client in the confirmed and unconfirmed trees. Change it instead
to store the clients in a pair of rbtrees, and simply compare the
cl_names directly instead of hashing them. This also necessitates that
we add a new flag to the clp->cl_flags field to indicate which tree
the client is currently in.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 144 ++++++++++++++++++++++++++++----------------
 fs/nfsd/state.h     |   3 +-
 2 files changed, 95 insertions(+), 52 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 559ab574d46b..99998a1eb426 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -412,10 +412,10 @@ static unsigned int clientstr_hashval(const char *name)
  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
  * used in reboot/reset lease grace period processing
  *
- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+ * conf_id_hashtbl[], and conf_name_tree hold confirmed
  * setclientid_confirmed info. 
  *
- * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+ * unconf_id_hashtbl[] and unconf_name_tree hold unconfirmed
  * setclientid info.
  *
  * client_lru holds client queue ordered by nfs4_client.cl_time
@@ -423,13 +423,15 @@ static unsigned int clientstr_hashval(const char *name)
  *
  * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
  * for last close replay.
+ *
+ * All of the above fields are protected by the client_mutex.
  */
 static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
 static int reclaim_str_hashtbl_size = 0;
 static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct rb_root conf_name_tree;
+static struct rb_root unconf_name_tree;
 static struct list_head client_lru;
 static struct list_head close_lru;
 
@@ -1144,7 +1146,10 @@ destroy_client(struct nfs4_client *clp)
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	list_del(&clp->cl_idhash);
-	list_del(&clp->cl_strhash);
+	if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+		rb_erase(&clp->cl_namenode, &conf_name_tree);
+	else
+		rb_erase(&clp->cl_namenode, &unconf_name_tree);
 	spin_lock(&client_lock);
 	unhash_client_locked(clp);
 	if (atomic_read(&clp->cl_refcount) == 0)
@@ -1187,6 +1192,17 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 	return 0;
 }
 
+static long long
+compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
+{
+	long long res;
+
+	res = o1->len - o2->len;
+	if (res)
+		return res;
+	return (long long)memcmp(o1->data, o2->data, o1->len);
+}
+
 static int same_name(const char *n1, const char *n2)
 {
 	return 0 == memcmp(n1, n2, HEXDIR_LEN);
@@ -1307,7 +1323,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	atomic_set(&clp->cl_refcount, 0);
 	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
 	INIT_LIST_HEAD(&clp->cl_idhash);
-	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
@@ -1325,11 +1340,52 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 }
 
 static void
-add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
+add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct nfs4_client *clp;
+
+	while (*new) {
+		clp = rb_entry(*new, struct nfs4_client, cl_namenode);
+		parent = *new;
+
+		if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&new_clp->cl_namenode, parent, new);
+	rb_insert_color(&new_clp->cl_namenode, root);
+}
+
+static struct nfs4_client *
+find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
+{
+	long long cmp;
+	struct rb_node *node = root->rb_node;
+	struct nfs4_client *clp;
+
+	while (node) {
+		clp = rb_entry(node, struct nfs4_client, cl_namenode);
+		cmp = compare_blob(&clp->cl_name, name);
+		if (cmp > 0)
+			node = node->rb_left;
+		else if (cmp < 0)
+			node = node->rb_right;
+		else
+			return clp;
+	}
+	return NULL;
+}
+
+static void
+add_to_unconfirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval;
 
-	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
+	add_clp_to_name_tree(clp, &unconf_name_tree);
 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
 	renew_client(clp);
@@ -1339,12 +1395,12 @@ static void
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
-	unsigned int strhashval;
 
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-	strhashval = clientstr_hashval(clp->cl_recdir);
-	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+	rb_erase(&clp->cl_namenode, &unconf_name_tree);
+	add_clp_to_name_tree(clp, &conf_name_tree);
+	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
 	renew_client(clp);
 }
 
@@ -1387,27 +1443,15 @@ static bool clp_used_exchangeid(struct nfs4_client *clp)
 } 
 
 static struct nfs4_client *
-find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+find_confirmed_client_by_name(struct xdr_netobj *name)
 {
-	struct nfs4_client *clp;
-
-	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
-		if (same_name(clp->cl_recdir, dname))
-			return clp;
-	}
-	return NULL;
+	return find_clp_in_name_tree(name, &conf_name_tree);
 }
 
 static struct nfs4_client *
-find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+find_unconfirmed_client_by_name(struct xdr_netobj *name)
 {
-	struct nfs4_client *clp;
-
-	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
-		if (same_name(clp->cl_recdir, dname))
-			return clp;
-	}
-	return NULL;
+	return find_clp_in_name_tree(name, &unconf_name_tree);
 }
 
 static void
@@ -1572,7 +1616,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 {
 	struct nfs4_client *unconf, *conf, *new;
 	__be32 status;
-	unsigned int		strhashval;
 	char			dname[HEXDIR_LEN];
 	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
@@ -1605,11 +1648,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	if (status)
 		return status;
 
-	strhashval = clientstr_hashval(dname);
-
 	/* Cases below refer to rfc 5661 section 18.35.4: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_str(dname, strhashval);
+	conf = find_confirmed_client_by_name(&exid->clname);
 	if (conf) {
 		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
 		bool verfs_match = same_verf(&verf, &conf->cl_verifier);
@@ -1654,7 +1695,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 		goto out;
 	}
 
-	unconf  = find_unconfirmed_client_by_str(dname, strhashval);
+	unconf  = find_unconfirmed_client_by_name(&exid->clname);
 	if (unconf) /* case 4, possible retry or client restart */
 		expire_client(unconf);
 
@@ -1668,7 +1709,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	new->cl_minorversion = 1;
 
 	gen_clid(new);
-	add_to_unconfirmed(new, strhashval);
+	add_to_unconfirmed(new);
 out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
 	exid->clientid.cl_id = new->cl_clientid.cl_id;
@@ -1789,7 +1830,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			goto out_free_conn;
 		}
 	} else if (unconf) {
-		unsigned int hash;
 		struct nfs4_client *old;
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1803,8 +1843,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			status = nfserr_seq_misordered;
 			goto out_free_conn;
 		}
-		hash = clientstr_hashval(unconf->cl_recdir);
-		old = find_confirmed_client_by_str(unconf->cl_recdir, hash);
+		old = find_confirmed_client_by_name(&unconf->cl_name);
 		if (old)
 			expire_client(old);
 		move_to_confirmed(unconf);
@@ -2195,7 +2234,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct xdr_netobj 	clname = setclid->se_name;
 	nfs4_verifier		clverifier = setclid->se_verf;
-	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
 	char                    dname[HEXDIR_LEN];
@@ -2204,11 +2242,9 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		return status;
 
-	strhashval = clientstr_hashval(dname);
-
 	/* Cases below refer to rfc 3530 section 14.2.33: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_str(dname, strhashval);
+	conf = find_confirmed_client_by_name(&clname);
 	if (conf) {
 		/* case 0: */
 		status = nfserr_clid_inuse;
@@ -2223,7 +2259,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out;
 		}
 	}
-	unconf = find_unconfirmed_client_by_str(dname, strhashval);
+	unconf = find_unconfirmed_client_by_name(&clname);
 	if (unconf)
 		expire_client(unconf);
 	status = nfserr_jukebox;
@@ -2237,7 +2273,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		gen_clid(new);
 	new->cl_minorversion = 0;
 	gen_callback(new, setclid, rqstp);
-	add_to_unconfirmed(new, strhashval);
+	add_to_unconfirmed(new);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
 	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
@@ -2290,9 +2326,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		nfsd4_probe_callback(conf);
 		expire_client(unconf);
 	} else { /* case 3: normal case; new or rebooted client */
-		unsigned int hash = clientstr_hashval(unconf->cl_recdir);
-
-		conf = find_confirmed_client_by_str(unconf->cl_recdir, hash);
+		conf = find_confirmed_client_by_name(&unconf->cl_name);
 		if (conf)
 			expire_client(conf);
 		move_to_confirmed(unconf);
@@ -4706,11 +4740,11 @@ nfs4_state_init(void)
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
-		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
-		INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	}
+	conf_name_tree = RB_ROOT;
+	unconf_name_tree = RB_ROOT;
 	for (i = 0; i < SESSION_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
@@ -4795,6 +4829,7 @@ nfs4_state_start(void)
 	return ret;
 }
 
+/* should be called with the state lock held */
 static void
 __nfs4_state_shutdown(void)
 {
@@ -4802,17 +4837,24 @@ __nfs4_state_shutdown(void)
 	struct nfs4_client *clp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
+	struct rb_node *node, *tmp;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&conf_id_hashtbl[i])) {
 			clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
 			destroy_client(clp);
 		}
-		while (!list_empty(&unconf_str_hashtbl[i])) {
-			clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
-			destroy_client(clp);
-		}
 	}
+
+	node = rb_first(&unconf_name_tree);
+	while (node != NULL) {
+		tmp = node;
+		node = rb_next(tmp);
+		clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
+		rb_erase(tmp, &unconf_name_tree);
+		destroy_client(clp);
+	}
+
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cf9f7ba4df8d..6c342bd806e5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -232,7 +232,7 @@ struct nfsd4_sessionid {
  */
 struct nfs4_client {
 	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
-	struct list_head	cl_strhash; 	/* hash by cl_name */
+	struct rb_node		cl_namenode;	/* link into by-name trees */
 	struct list_head	cl_openowners;
 	struct idr		cl_stateids;	/* stateid lookup */
 	struct list_head	cl_delegations;
@@ -253,6 +253,7 @@ struct nfs4_client {
 #define NFSD4_CLIENT_CB_KILL		(1)
 #define NFSD4_CLIENT_STABLE		(2)	/* client on stable storage */
 #define NFSD4_CLIENT_RECLAIM_COMPLETE	(3)	/* reclaim_complete done */
+#define NFSD4_CLIENT_CONFIRMED		(4)	/* client is confirmed */
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
 	unsigned long		cl_flags;

From 2216d449a97927cc105912e337d169cd4d4db548 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:57 -0500
Subject: [PATCH 027/111] nfsd: get rid of cl_recdir field

Remove the cl_recdir field from the nfs4_client struct. Instead, just
compute it on the fly when and if it's needed, which is now only when
the legacy client tracking code is in effect.

The error handling in the legacy client tracker is also changed to
handle the case where md5 is unavailable. In that case, we'll warn
the admin with a KERN_ERR message and disable the client tracking.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 93 ++++++++++++++++++++++++++++++++++---------
 fs/nfsd/nfs4state.c   | 18 ++-------
 fs/nfsd/state.h       |  2 -
 3 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3048c012d4bc..80e77cc14250 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -103,33 +103,39 @@ md5_to_hex(char *out, char *md5)
 	*out = '\0';
 }
 
-__be32
-nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+static int
+nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
 	struct hash_desc desc;
 	struct scatterlist sg;
-	__be32 status = nfserr_jukebox;
+	int status;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(desc.tfm))
+	if (IS_ERR(desc.tfm)) {
+		status = PTR_ERR(desc.tfm);
 		goto out_no_tfm;
+	}
+
 	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
-	if (cksum.data == NULL)
+	if (cksum.data == NULL) {
+		status = -ENOMEM;
  		goto out;
+	}
 
 	sg_init_one(&sg, clname->data, clname->len);
 
-	if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data))
+	status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data);
+	if (status)
 		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
-	status = nfs_ok;
+	status = 0;
 out:
 	kfree(cksum.data);
 	crypto_free_hash(desc.tfm);
@@ -137,11 +143,36 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 	return status;
 }
 
+/*
+ * If we had an error generating the recdir name for the legacy tracker
+ * then warn the admin. If the error doesn't appear to be transient,
+ * then disable recovery tracking.
+ */
+static void
+legacy_recdir_name_error(int error)
+{
+	printk(KERN_ERR "NFSD: unable to generate recoverydir "
+			"name (%d).\n", error);
+
+	/*
+	 * if the algorithm just doesn't exist, then disable the recovery
+	 * tracker altogether. The crypto libs will generally return this if
+	 * FIPS is enabled as well.
+	 */
+	if (error == -ENOENT) {
+		printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
+			"Reboot recovery will not function correctly!\n");
+
+		/* the argument is ignored by the legacy exit function */
+		nfsd4_client_tracking_exit(NULL);
+	}
+}
+
 static void
 nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
-	char *dname = clp->cl_recdir;
+	char dname[HEXDIR_LEN];
 	struct dentry *dir, *dentry;
 	struct nfs4_client_reclaim *crp;
 	int status;
@@ -152,6 +183,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 		return;
 	if (!rec_file)
 		return;
+
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status)
+		return legacy_recdir_name_error(status);
+
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
 		return;
@@ -186,7 +222,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
 		if (in_grace) {
-			crp = nfs4_client_to_reclaim(clp->cl_recdir);
+			crp = nfs4_client_to_reclaim(dname);
 			if (crp)
 				crp->cr_clp = clp;
 		}
@@ -298,11 +334,16 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
 	struct nfs4_client_reclaim *crp;
+	char dname[HEXDIR_LEN];
 	int status;
 
 	if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status)
+		return legacy_recdir_name_error(status);
+
 	status = mnt_want_write_file(rec_file);
 	if (status)
 		goto out;
@@ -312,13 +353,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	if (status < 0)
 		goto out_drop_write;
 
-	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+	status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
 	if (status == 0) {
 		vfs_fsync(rec_file, 0);
 		if (in_grace) {
 			/* remove reclaim record */
-			crp = nfsd4_find_reclaim_client(clp->cl_recdir);
+			crp = nfsd4_find_reclaim_client(dname);
 			if (crp)
 				nfs4_remove_reclaim_record(crp);
 		}
@@ -328,7 +369,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 out:
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
-				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+				" %.*s\n", HEXDIR_LEN, dname);
 }
 
 static int
@@ -500,14 +541,22 @@ nfs4_recoverydir(void)
 static int
 nfsd4_check_legacy_client(struct nfs4_client *clp)
 {
+	int status;
+	char dname[HEXDIR_LEN];
 	struct nfs4_client_reclaim *crp;
 
 	/* did we already find that this client is stable? */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return 0;
 
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status) {
+		legacy_recdir_name_error(status);
+		return status;
+	}
+
 	/* look for it in the reclaim hashtable otherwise */
-	crp = nfsd4_find_reclaim_client(clp->cl_recdir);
+	crp = nfsd4_find_reclaim_client(dname);
 	if (crp) {
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 		crp->cr_clp = clp;
@@ -993,7 +1042,7 @@ nfsd4_cltrack_legacy_topdir(void)
 }
 
 static char *
-nfsd4_cltrack_legacy_recdir(const char *recdir)
+nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
 {
 	int copied;
 	size_t len;
@@ -1010,10 +1059,16 @@ nfsd4_cltrack_legacy_recdir(const char *recdir)
 	if (!result)
 		return result;
 
-	copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/%s",
-				nfs4_recoverydir(), recdir);
-	if (copied >= len) {
-		/* just return nothing if output was truncated */
+	copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/",
+				nfs4_recoverydir());
+	if (copied > (len - HEXDIR_LEN)) {
+		/* just return nothing if output will be truncated */
+		kfree(result);
+		return NULL;
+	}
+
+	copied = nfs4_make_rec_clidname(result + copied, name);
+	if (copied) {
 		kfree(result);
 		return NULL;
 	}
@@ -1126,7 +1181,7 @@ nfsd4_umh_cltrack_check(struct nfs4_client *clp)
 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
 		return -ENOMEM;
 	}
-	legacy = nfsd4_cltrack_legacy_recdir(clp->cl_recdir);
+	legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
 	ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy);
 	kfree(legacy);
 	kfree(hexid);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 99998a1eb426..37b19f7948ed 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1299,7 +1299,7 @@ static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t
 	return NULL;
 }
 
-static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
+static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
 	struct nfs4_client *clp;
@@ -1319,7 +1319,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 		return NULL;
 	}
 	idr_init(&clp->cl_stateids);
-	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_refcount, 0);
 	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
 	INIT_LIST_HEAD(&clp->cl_idhash);
@@ -1616,7 +1615,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 {
 	struct nfs4_client *unconf, *conf, *new;
 	__be32 status;
-	char			dname[HEXDIR_LEN];
 	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
 	struct sockaddr		*sa = svc_addr(rqstp);
@@ -1643,11 +1641,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 		return nfserr_serverfault;	/* no excuse :-/ */
 	}
 
-	status = nfs4_make_rec_clidname(dname, &exid->clname);
-
-	if (status)
-		return status;
-
 	/* Cases below refer to rfc 5661 section 18.35.4: */
 	nfs4_lock_state();
 	conf = find_confirmed_client_by_name(&exid->clname);
@@ -1701,7 +1694,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 
 	/* case 1 (normal case) */
 out_new:
-	new = create_client(exid->clname, dname, rqstp, &verf);
+	new = create_client(exid->clname, rqstp, &verf);
 	if (new == NULL) {
 		status = nfserr_jukebox;
 		goto out;
@@ -2236,12 +2229,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	nfs4_verifier		clverifier = setclid->se_verf;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
-	char                    dname[HEXDIR_LEN];
 	
-	status = nfs4_make_rec_clidname(dname, &clname);
-	if (status)
-		return status;
-
 	/* Cases below refer to rfc 3530 section 14.2.33: */
 	nfs4_lock_state();
 	conf = find_confirmed_client_by_name(&clname);
@@ -2263,7 +2251,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (unconf)
 		expire_client(unconf);
 	status = nfserr_jukebox;
-	new = create_client(clname, dname, rqstp, &clverifier);
+	new = create_client(clname, rqstp, &clverifier);
 	if (new == NULL)
 		goto out;
 	if (conf && same_verf(&conf->cl_verifier, &clverifier))
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6c342bd806e5..029217ad2cb0 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -238,7 +238,6 @@ struct nfs4_client {
 	struct list_head	cl_delegations;
 	struct list_head        cl_lru;         /* tail queue */
 	struct xdr_netobj	cl_name; 	/* id generated by client */
-	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
@@ -482,7 +481,6 @@ extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name);
 extern bool nfs4_has_reclaimed_state(const char *name);
 extern void release_session_client(struct nfsd4_session *);

From 7e4f015d815d04d888d434889dd3bbb4c210511a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 12 Nov 2012 15:00:58 -0500
Subject: [PATCH 028/111] nfsd: release the legacy reclaimable clients list in
 grace_done

The current code holds on to this list until nfsd is shut down, but it's
never touched once the grace period ends. Release that memory back into
the wild when the grace period ends.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 80e77cc14250..b03b6aa7a6a0 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -404,6 +404,7 @@ nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
 		vfs_fsync(rec_file, 0);
 	mnt_drop_write_file(rec_file);
 out:
+	nfs4_release_reclaim();
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
 			" directory %s\n", rec_file->f_path.dentry->d_name.name);

From 135ae8270d233067c8be426e2a59d0733ba74723 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Sat, 10 Nov 2012 07:20:25 -0500
Subject: [PATCH 029/111] nfsd4: init_session should be declared static

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 37b19f7948ed..7de9ba00a718 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -947,7 +947,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
 	return new;
 }
 
-void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
+static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
 

From 2b4cf668a7b8f84182a35f07152d8b6f012629d2 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Tue, 13 Nov 2012 15:41:27 -0500
Subject: [PATCH 030/111] nfsd4: get_backchannel_cred should be static

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7bb187ac1492..996847023015 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -642,7 +642,7 @@ int set_callback_cred(void)
 	return 0;
 }
 
-struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
+static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
 {
 	if (clp->cl_minorversion == 0) {
 		return get_rpccred(callback_cred);

From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 14 Nov 2012 10:48:05 -0500
Subject: [PATCH 031/111] svcrpc: Revert "sunrpc/cache.h: replace
 simple_strtoul"

Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace
simple_strtoul" introduced new range-checking which could cause get_int
to fail on unsigned integers too large to be represented as an int.

We could parse them as unsigned instead--but it turns out svcgssd is
actually passing down "-1" in some cases.  Which is perhaps stupid, but
there's nothing we can do about it now.

So just revert back to the previous "sloppy" behavior that accepts
either representation.

Cc: stable@vger.kernel.org
Reported-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f792794f6634..5dc9ee4d616e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize);
 static inline int get_int(char **bpp, int *anint)
 {
 	char buf[50];
+	char *ep;
+	int rv;
 	int len = qword_get(bpp, buf, sizeof(buf));
 
 	if (len < 0)
@@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint)
 	if (len == 0)
 		return -ENOENT;
 
-	if (kstrtoint(buf, 0, anint))
+	rv = simple_strtol(buf, &ep, 0);
+	if (*ep)
 		return -EINVAL;
 
+	*anint = rv;
 	return 0;
 }
 

From 7f2210fa6b791c290e36d8b3c8af7aaf22b2aaf0 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:05 +0300
Subject: [PATCH 032/111] nfsd: use service net instead of hard-coded net where
 possible

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7de9ba00a718..207b9afbbacf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2281,7 +2281,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net	*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (STALE_CLIENTID(clid, nn))
 		return nfserr_stale_clientid;
@@ -3151,7 +3151,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct nfs4_client *clp;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	nfs4_lock_state();
 	dprintk("process_renew(%08x/%08x): starting\n", 
@@ -4104,7 +4104,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	bool new_state = false;
 	int lkflg;
 	int err;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
 		(long long) lock->lk_offset,
@@ -4277,7 +4277,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct file_lock *file_lock = NULL;
 	struct nfs4_lockowner *lo;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (locks_in_grace(SVC_NET(rqstp)))
 		return nfserr_grace;
@@ -4453,7 +4453,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	struct list_head matches;
 	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);

From c212cecfa21b3d30cd5cc2389754a46973ad9027 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:10 +0300
Subject: [PATCH 033/111] nfsd: make nfs4_client network namespace dependent

And use it's net where possible.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c |  2 +-
 fs/nfsd/nfs4recover.c  |  9 +++------
 fs/nfsd/nfs4state.c    | 15 +++++++++------
 fs/nfsd/state.h        |  1 +
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 996847023015..826cc269c445 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -663,7 +663,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
-		.net		= &init_net,
+		.net		= clp->net,
 		.address	= (struct sockaddr *) &conn->cb_addr,
 		.addrsize	= conn->cb_addrlen,
 		.saddress	= (struct sockaddr *) &conn->cb_saddr,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b03b6aa7a6a0..9881bcad264b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -859,8 +859,7 @@ nfsd4_cld_create(struct nfs4_client *clp)
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if it's already stored */
@@ -897,8 +896,7 @@ nfsd4_cld_remove(struct nfs4_client *clp)
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if it's already removed */
@@ -935,8 +933,7 @@ nfsd4_cld_check(struct nfs4_client *clp)
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if one was already stored during this grace pd */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 207b9afbbacf..001bbc99d7a4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1263,10 +1263,9 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
 }
 
-static void gen_clid(struct nfs4_client *clp)
+static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
 {
 	static u32 current_clientid = 1;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	clp->cl_clientid.cl_boot = nn->boot_time;
 	clp->cl_clientid.cl_id = current_clientid++; 
@@ -1305,6 +1304,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	struct nfs4_client *clp;
 	struct sockaddr *sa = svc_addr(rqstp);
 	int ret;
+	struct net *net = SVC_NET(rqstp);
 
 	clp = alloc_client(name);
 	if (clp == NULL)
@@ -1335,6 +1335,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
+	clp->net = net;
 	return clp;
 }
 
@@ -1471,7 +1472,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
 	else
 		goto out_err;
 
-	conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val,
+	conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val,
 					    se->se_callback_addr_len,
 					    (struct sockaddr *)&conn->cb_addr,
 					    sizeof(conn->cb_addr));
@@ -1619,6 +1620,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	nfs4_verifier		verf = exid->verifier;
 	struct sockaddr		*sa = svc_addr(rqstp);
 	bool	update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A;
+	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
@@ -1701,7 +1703,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	}
 	new->cl_minorversion = 1;
 
-	gen_clid(new);
+	gen_clid(new, nn);
 	add_to_unconfirmed(new);
 out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -2229,7 +2231,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	nfs4_verifier		clverifier = setclid->se_verf;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
-	
+	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	/* Cases below refer to rfc 3530 section 14.2.33: */
 	nfs4_lock_state();
 	conf = find_confirmed_client_by_name(&clname);
@@ -2258,7 +2261,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		/* case 1: probable callback update */
 		copy_clid(new, conf);
 	else /* case 4 (new client) or cases 2, 3 (client reboot): */
-		gen_clid(new);
+		gen_clid(new, nn);
 	new->cl_minorversion = 0;
 	gen_callback(new, setclid, rqstp);
 	add_to_unconfirmed(new);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 029217ad2cb0..ca8ee8c3ae74 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -283,6 +283,7 @@ struct nfs4_client {
 	unsigned long		cl_cb_slot_busy;
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
+	struct net		*net;
 };
 
 static inline void

From 52e19c09a183d82d99f10c284bc8b27933b1d1fc Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:16 +0300
Subject: [PATCH 034/111] nfsd: make reclaim_str_hashtbl allocated per net

This hash holds nfs4_clients info, which are network namespace aware.
So let's make it allocated per network namespace.

Note: this hash is used only by legacy tracker. So let's allocate hash in
tracker init.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h       |  12 +++++
 fs/nfsd/nfs4recover.c | 100 ++++++++++++++++++++++++++++++++----------
 fs/nfsd/nfs4state.c   |  42 +++++++-----------
 fs/nfsd/state.h       |  12 ++---
 4 files changed, 111 insertions(+), 55 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 65c2431ea32f..49e54790d862 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -24,6 +24,11 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
+/* Hash tables for nfs4_clientid state */
+#define CLIENT_HASH_BITS                 4
+#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
+
 struct cld_net;
 
 struct nfsd_net {
@@ -38,6 +43,13 @@ struct nfsd_net {
 	struct lock_manager nfsd4_manager;
 	bool grace_ended;
 	time_t boot_time;
+
+	/*
+	 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+	 * used in reboot/reset lease grace period processing
+	 */
+	struct list_head *reclaim_str_hashtbl;
+	int reclaim_str_hashtbl_size;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9881bcad264b..376692ab1b3b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -176,6 +176,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	struct dentry *dir, *dentry;
 	struct nfs4_client_reclaim *crp;
 	int status;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
@@ -222,7 +223,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
 		if (in_grace) {
-			crp = nfs4_client_to_reclaim(dname);
+			crp = nfs4_client_to_reclaim(dname, nn);
 			if (crp)
 				crp->cr_clp = clp;
 		}
@@ -237,7 +238,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	nfs4_reset_creds(original_cred);
 }
 
-typedef int (recdir_func)(struct dentry *, struct dentry *);
+typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
 
 struct name_list {
 	char name[HEXDIR_LEN];
@@ -263,7 +264,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen,
 }
 
 static int
-nfsd4_list_rec_dir(recdir_func *f)
+nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 {
 	const struct cred *original_cred;
 	struct dentry *dir = rec_file->f_path.dentry;
@@ -292,7 +293,7 @@ nfsd4_list_rec_dir(recdir_func *f)
 				status = PTR_ERR(dentry);
 				break;
 			}
-			status = f(dir, dentry);
+			status = f(dir, dentry, nn);
 			dput(dentry);
 		}
 		list_del(&entry->list);
@@ -336,6 +337,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	struct nfs4_client_reclaim *crp;
 	char dname[HEXDIR_LEN];
 	int status;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
@@ -359,9 +361,9 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 		vfs_fsync(rec_file, 0);
 		if (in_grace) {
 			/* remove reclaim record */
-			crp = nfsd4_find_reclaim_client(dname);
+			crp = nfsd4_find_reclaim_client(dname, nn);
 			if (crp)
-				nfs4_remove_reclaim_record(crp);
+				nfs4_remove_reclaim_record(crp, nn);
 		}
 	}
 out_drop_write:
@@ -373,11 +375,11 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 }
 
 static int
-purge_old(struct dentry *parent, struct dentry *child)
+purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	int status;
 
-	if (nfs4_has_reclaimed_state(child->d_name.name))
+	if (nfs4_has_reclaimed_state(child->d_name.name, nn))
 		return 0;
 
 	status = vfs_rmdir(parent->d_inode, child);
@@ -392,6 +394,7 @@ static void
 nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
 {
 	int status;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	in_grace = false;
 	if (!rec_file)
@@ -399,19 +402,19 @@ nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
 	status = mnt_want_write_file(rec_file);
 	if (status)
 		goto out;
-	status = nfsd4_list_rec_dir(purge_old);
+	status = nfsd4_list_rec_dir(purge_old, nn);
 	if (status == 0)
 		vfs_fsync(rec_file, 0);
 	mnt_drop_write_file(rec_file);
 out:
-	nfs4_release_reclaim();
+	nfs4_release_reclaim(nn);
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
 			" directory %s\n", rec_file->f_path.dentry->d_name.name);
 }
 
 static int
-load_recdir(struct dentry *parent, struct dentry *child)
+load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	if (child->d_name.len != HEXDIR_LEN - 1) {
 		printk("nfsd4: illegal name %s in recovery directory\n",
@@ -419,18 +422,19 @@ load_recdir(struct dentry *parent, struct dentry *child)
 		/* Keep trying; maybe the others are OK: */
 		return 0;
 	}
-	nfs4_client_to_reclaim(child->d_name.name);
+	nfs4_client_to_reclaim(child->d_name.name, nn);
 	return 0;
 }
 
 static int
-nfsd4_recdir_load(void) {
+nfsd4_recdir_load(struct net *net) {
 	int status;
+	struct nfsd_net *nn =  net_generic(net, nfsd_net_id);
 
 	if (!rec_file)
 		return 0;
 
-	status = nfsd4_list_rec_dir(load_recdir);
+	status = nfsd4_list_rec_dir(load_recdir, nn);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
 			" directory %s\n", rec_file->f_path.dentry->d_name.name);
@@ -474,11 +478,53 @@ nfsd4_init_recdir(void)
 	return status;
 }
 
+
+static int
+nfs4_legacy_state_init(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int i;
+
+	nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) *
+					  CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->reclaim_str_hashtbl)
+		return -ENOMEM;
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+	nn->reclaim_str_hashtbl_size = 0;
+
+	return 0;
+}
+
+static void
+nfs4_legacy_state_shutdown(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	kfree(nn->reclaim_str_hashtbl);
+}
+
 static int
 nfsd4_load_reboot_recovery_data(struct net *net)
 {
 	int status;
 
+	nfs4_lock_state();
+	status = nfsd4_init_recdir();
+	if (!status)
+		status = nfsd4_recdir_load(net);
+	nfs4_unlock_state();
+	if (status)
+		printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
+	return status;
+}
+
+static int
+nfsd4_legacy_tracking_init(struct net *net)
+{
+	int status;
+
 	/* XXX: The legacy code won't work in a container */
 	if (net != &init_net) {
 		WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client "
@@ -486,13 +532,17 @@ nfsd4_load_reboot_recovery_data(struct net *net)
 		return -EINVAL;
 	}
 
-	nfs4_lock_state();
-	status = nfsd4_init_recdir();
-	if (!status)
-		status = nfsd4_recdir_load();
-	nfs4_unlock_state();
+	status = nfs4_legacy_state_init(net);
 	if (status)
-		printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
+		return status;
+
+	status = nfsd4_load_reboot_recovery_data(net);
+	if (status)
+		goto err;
+	return 0;
+
+err:
+	nfs4_legacy_state_shutdown(net);
 	return status;
 }
 
@@ -508,8 +558,11 @@ nfsd4_shutdown_recdir(void)
 static void
 nfsd4_legacy_tracking_exit(struct net *net)
 {
-	nfs4_release_reclaim();
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	nfs4_release_reclaim(nn);
 	nfsd4_shutdown_recdir();
+	nfs4_legacy_state_shutdown(net);
 }
 
 /*
@@ -545,6 +598,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
 	int status;
 	char dname[HEXDIR_LEN];
 	struct nfs4_client_reclaim *crp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	/* did we already find that this client is stable? */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -557,7 +611,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
 	}
 
 	/* look for it in the reclaim hashtable otherwise */
-	crp = nfsd4_find_reclaim_client(dname);
+	crp = nfsd4_find_reclaim_client(dname, nn);
 	if (crp) {
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 		crp->cr_clp = clp;
@@ -568,7 +622,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
 }
 
 static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
-	.init		= nfsd4_load_reboot_recovery_data,
+	.init		= nfsd4_legacy_tracking_init,
 	.exit		= nfsd4_legacy_tracking_exit,
 	.create		= nfsd4_create_clid_dir,
 	.remove		= nfsd4_remove_clid_dir,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 001bbc99d7a4..ba4785559509 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -393,11 +393,6 @@ unhash_delegation(struct nfs4_delegation *dp)
 /* client_lock protects the client lru list and session hash table */
 static DEFINE_SPINLOCK(client_lock);
 
-/* Hash tables for nfs4_clientid state */
-#define CLIENT_HASH_BITS                 4
-#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
-#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
-
 static unsigned int clientid_hashval(u32 id)
 {
 	return id & CLIENT_HASH_MASK;
@@ -409,11 +404,8 @@ static unsigned int clientstr_hashval(const char *name)
 }
 
 /*
- * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
- * used in reboot/reset lease grace period processing
- *
  * conf_id_hashtbl[], and conf_name_tree hold confirmed
- * setclientid_confirmed info. 
+ * setclientid_confirmed info.
  *
  * unconf_id_hashtbl[] and unconf_name_tree hold unconfirmed
  * setclientid info.
@@ -426,8 +418,6 @@ static unsigned int clientstr_hashval(const char *name)
  *
  * All of the above fields are protected by the client_mutex.
  */
-static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
-static int reclaim_str_hashtbl_size = 0;
 static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct rb_root conf_name_tree;
@@ -4509,11 +4499,11 @@ alloc_reclaim(void)
 }
 
 bool
-nfs4_has_reclaimed_state(const char *name)
+nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
 {
 	struct nfs4_client_reclaim *crp;
 
-	crp = nfsd4_find_reclaim_client(name);
+	crp = nfsd4_find_reclaim_client(name, nn);
 	return (crp && crp->cr_clp);
 }
 
@@ -4521,7 +4511,7 @@ nfs4_has_reclaimed_state(const char *name)
  * failure => all reset bets are off, nfserr_no_grace...
  */
 struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(const char *name)
+nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp;
@@ -4531,42 +4521,42 @@ nfs4_client_to_reclaim(const char *name)
 	if (crp) {
 		strhashval = clientstr_hashval(name);
 		INIT_LIST_HEAD(&crp->cr_strhash);
-		list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
 		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
 		crp->cr_clp = NULL;
-		reclaim_str_hashtbl_size++;
+		nn->reclaim_str_hashtbl_size++;
 	}
 	return crp;
 }
 
 void
-nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp)
+nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
 {
 	list_del(&crp->cr_strhash);
 	kfree(crp);
-	reclaim_str_hashtbl_size--;
+	nn->reclaim_str_hashtbl_size--;
 }
 
 void
-nfs4_release_reclaim(void)
+nfs4_release_reclaim(struct nfsd_net *nn)
 {
 	struct nfs4_client_reclaim *crp = NULL;
 	int i;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		while (!list_empty(&reclaim_str_hashtbl[i])) {
-			crp = list_entry(reclaim_str_hashtbl[i].next,
+		while (!list_empty(&nn->reclaim_str_hashtbl[i])) {
+			crp = list_entry(nn->reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
-			nfs4_remove_reclaim_record(crp);
+			nfs4_remove_reclaim_record(crp, nn);
 		}
 	}
-	BUG_ON(reclaim_str_hashtbl_size);
+	BUG_ON(nn->reclaim_str_hashtbl_size);
 }
 
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
 struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(const char *recdir)
+nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
@@ -4574,7 +4564,7 @@ nfsd4_find_reclaim_client(const char *recdir)
 	dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
 
 	strhashval = clientstr_hashval(recdir);
-	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
+	list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
 		if (same_name(crp->cr_recdir, recdir)) {
 			return crp;
 		}
@@ -4732,7 +4722,6 @@ nfs4_state_init(void)
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
-		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	}
 	conf_name_tree = RB_ROOT;
 	unconf_name_tree = RB_ROOT;
@@ -4749,7 +4738,6 @@ nfs4_state_init(void)
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	reclaim_str_hashtbl_size = 0;
 }
 
 /*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ca8ee8c3ae74..26a912cdfe0c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -466,9 +466,10 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net,
 		stateid_t *stateid, int flags, struct file **filp);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
-void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *);
-extern void nfs4_release_reclaim(void);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir);
+void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
+extern void nfs4_release_reclaim(struct nfsd_net *);
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
+							struct nfsd_net *nn);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);
 extern void nfs4_free_openowner(struct nfs4_openowner *);
 extern void nfs4_free_lockowner(struct nfs4_lockowner *);
@@ -482,8 +483,9 @@ extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name);
-extern bool nfs4_has_reclaimed_state(const char *name);
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
+							struct nfsd_net *nn);
+extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
 extern void release_session_client(struct nfsd4_session *);
 extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
 

From 8daae4dc0d09d44d38194f72bc91740b46a6ce53 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:21 +0300
Subject: [PATCH 035/111] nfsd: make conf_id_hashtbl allocated per net

This hash holds nfs4_clients info, which are network namespace aware.
So let's make it allocated per network namespace.

Note: this hash can be allocated in per-net operations. But it looks
better to allocate it on nfsd state start and thus don't waste resources
if server is not running.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4state.c | 75 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 49e54790d862..0cc85e95e8a4 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -50,6 +50,7 @@ struct nfsd_net {
 	 */
 	struct list_head *reclaim_str_hashtbl;
 	int reclaim_str_hashtbl_size;
+	struct list_head *conf_id_hashtbl;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ba4785559509..6df427773d01 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -418,7 +418,6 @@ static unsigned int clientstr_hashval(const char *name)
  *
  * All of the above fields are protected by the client_mutex.
  */
-static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct rb_root conf_name_tree;
 static struct rb_root unconf_name_tree;
@@ -1385,9 +1384,10 @@ static void
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
-	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
 	rb_erase(&clp->cl_namenode, &unconf_name_tree);
 	add_clp_to_name_tree(clp, &conf_name_tree);
 	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
@@ -1395,12 +1395,12 @@ move_to_confirmed(struct nfs4_client *clp)
 }
 
 static struct nfs4_client *
-find_confirmed_client(clientid_t *clid, bool sessions)
+find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+	list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) {
 		if (same_clid(&clp->cl_clientid, clid)) {
 			if ((bool)clp->cl_minorversion != sessions)
 				return NULL;
@@ -1787,6 +1787,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	struct nfsd4_conn *conn;
 	struct nfsd4_clid_slot *cs_slot = NULL;
 	__be32 status = 0;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
 		return nfserr_inval;
@@ -1802,7 +1803,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	nfs4_lock_state();
 	unconf = find_unconfirmed_client(&cr_ses->clientid, true);
-	conf = find_confirmed_client(&cr_ses->clientid, true);
+	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
 
 	if (conf) {
 		cs_slot = &conf->cl_cs_slot;
@@ -2142,10 +2143,11 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 {
 	struct nfs4_client *conf, *unconf, *clp;
 	__be32 status = 0;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	nfs4_lock_state();
 	unconf = find_unconfirmed_client(&dc->clientid, true);
-	conf = find_confirmed_client(&dc->clientid, true);
+	conf = find_confirmed_client(&dc->clientid, true, nn);
 
 	if (conf) {
 		clp = conf;
@@ -2280,7 +2282,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		return nfserr_stale_clientid;
 	nfs4_lock_state();
 
-	conf = find_confirmed_client(clid, false);
+	conf = find_confirmed_client(clid, false, nn);
 	unconf = find_unconfirmed_client(clid, false);
 	/*
 	 * We try hard to give out unique clientid's, so if we get an
@@ -2656,7 +2658,8 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 	oo = find_openstateowner_str(strhashval, open, cstate->minorversion);
 	open->op_openowner = oo;
 	if (!oo) {
-		clp = find_confirmed_client(clientid, cstate->minorversion);
+		clp = find_confirmed_client(clientid, cstate->minorversion,
+					    nn);
 		if (clp == NULL)
 			return nfserr_expired;
 		goto new_owner;
@@ -3152,7 +3155,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = nfserr_stale_clientid;
 	if (STALE_CLIENTID(clid, nn))
 		goto out;
-	clp = find_confirmed_client(clid, cstate->minorversion);
+	clp = find_confirmed_client(clid, cstate->minorversion, nn);
 	status = nfserr_expired;
 	if (clp == NULL) {
 		/* We assume the client took too long to RENEW. */
@@ -3428,7 +3431,7 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, s
 		return nfserr_bad_stateid;
 	if (STALE_STATEID(stateid, nn))
 		return nfserr_stale_stateid;
-	cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions);
+	cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn);
 	if (!cl)
 		return nfserr_expired;
 	*s = find_stateid_by_type(cl, stateid, typemask);
@@ -4579,9 +4582,10 @@ __be32
 nfs4_check_open_reclaim(clientid_t *clid, bool sessions)
 {
 	struct nfs4_client *clp;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	/* find clientid in conf_id_hashtbl */
-	clp = find_confirmed_client(clid, sessions);
+	clp = find_confirmed_client(clid, sessions, nn);
 	if (clp == NULL)
 		return nfserr_reclaim_bad;
 
@@ -4720,7 +4724,6 @@ nfs4_state_init(void)
 	int i;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
 	}
 	conf_name_tree = RB_ROOT;
@@ -4761,6 +4764,38 @@ set_max_delegations(void)
 	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
 }
 
+static int nfs4_state_start_net(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int i;
+
+	nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) *
+			CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->conf_id_hashtbl)
+		return -ENOMEM;
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
+
+	return 0;
+}
+
+static void
+__nfs4_state_shutdown_net(struct net *net)
+{
+	int i;
+	struct nfs4_client *clp = NULL;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		while (!list_empty(&nn->conf_id_hashtbl[i])) {
+			clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+			destroy_client(clp);
+		}
+	}
+	kfree(nn->conf_id_hashtbl);
+}
+
 /* initialization to perform when the nfsd service is started: */
 
 int
@@ -4778,6 +4813,9 @@ nfs4_state_start(void)
 	 * basis.
 	 */
 	get_net(net);
+	ret = nfs4_state_start_net(net);
+	if (ret)
+		return ret;
 	nfsd4_client_tracking_init(net);
 	nn->boot_time = get_seconds();
 	locks_start_grace(net, &nn->nfsd4_manager);
@@ -4804,26 +4842,21 @@ nfs4_state_start(void)
 	destroy_workqueue(laundry_wq);
 out_recovery:
 	nfsd4_client_tracking_exit(net);
+	__nfs4_state_shutdown_net(net);
 	put_net(net);
 	return ret;
 }
 
 /* should be called with the state lock held */
 static void
-__nfs4_state_shutdown(void)
+__nfs4_state_shutdown(struct net *net)
 {
-	int i;
 	struct nfs4_client *clp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
 	struct rb_node *node, *tmp;
 
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		while (!list_empty(&conf_id_hashtbl[i])) {
-			clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
-			destroy_client(clp);
-		}
-	}
+	__nfs4_state_shutdown_net(net);
 
 	node = rb_first(&unconf_name_tree);
 	while (node != NULL) {
@@ -4860,7 +4893,7 @@ nfs4_state_shutdown(void)
 	destroy_workqueue(laundry_wq);
 	locks_end_grace(&nn->nfsd4_manager);
 	nfs4_lock_state();
-	__nfs4_state_shutdown();
+	__nfs4_state_shutdown(net);
 	nfs4_unlock_state();
 	nfsd4_destroy_callback_queue();
 }

From 382a62e76cbf91fb364a4cd8732761e4ecf62153 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:26 +0300
Subject: [PATCH 036/111] nfsd: make conf_name_tree per net

This tree holds nfs4_clients info, which are network namespace aware.
So let's make it per network namespace.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  4 ++++
 fs/nfsd/nfs4state.c | 31 ++++++++++++++++---------------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 0cc85e95e8a4..afd911638464 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -47,10 +47,14 @@ struct nfsd_net {
 	/*
 	 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
 	 * used in reboot/reset lease grace period processing
+	 *
+	 * conf_id_hashtbl[], and conf_name_tree hold confirmed
+	 * setclientid_confirmed info.
 	 */
 	struct list_head *reclaim_str_hashtbl;
 	int reclaim_str_hashtbl_size;
 	struct list_head *conf_id_hashtbl;
+	struct rb_root conf_name_tree;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6df427773d01..d40e57b9051a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -404,9 +404,6 @@ static unsigned int clientstr_hashval(const char *name)
 }
 
 /*
- * conf_id_hashtbl[], and conf_name_tree hold confirmed
- * setclientid_confirmed info.
- *
  * unconf_id_hashtbl[] and unconf_name_tree hold unconfirmed
  * setclientid info.
  *
@@ -419,7 +416,6 @@ static unsigned int clientstr_hashval(const char *name)
  * All of the above fields are protected by the client_mutex.
  */
 static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
-static struct rb_root conf_name_tree;
 static struct rb_root unconf_name_tree;
 static struct list_head client_lru;
 static struct list_head close_lru;
@@ -1114,6 +1110,7 @@ destroy_client(struct nfs4_client *clp)
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct list_head reaplist;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
@@ -1136,7 +1133,7 @@ destroy_client(struct nfs4_client *clp)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	list_del(&clp->cl_idhash);
 	if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
-		rb_erase(&clp->cl_namenode, &conf_name_tree);
+		rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
 	else
 		rb_erase(&clp->cl_namenode, &unconf_name_tree);
 	spin_lock(&client_lock);
@@ -1389,7 +1386,7 @@ move_to_confirmed(struct nfs4_client *clp)
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
 	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
 	rb_erase(&clp->cl_namenode, &unconf_name_tree);
-	add_clp_to_name_tree(clp, &conf_name_tree);
+	add_clp_to_name_tree(clp, &nn->conf_name_tree);
 	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
 	renew_client(clp);
 }
@@ -1433,9 +1430,9 @@ static bool clp_used_exchangeid(struct nfs4_client *clp)
 } 
 
 static struct nfs4_client *
-find_confirmed_client_by_name(struct xdr_netobj *name)
+find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
 {
-	return find_clp_in_name_tree(name, &conf_name_tree);
+	return find_clp_in_name_tree(name, &nn->conf_name_tree);
 }
 
 static struct nfs4_client *
@@ -1635,7 +1632,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 
 	/* Cases below refer to rfc 5661 section 18.35.4: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_name(&exid->clname);
+	conf = find_confirmed_client_by_name(&exid->clname, nn);
 	if (conf) {
 		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
 		bool verfs_match = same_verf(&verf, &conf->cl_verifier);
@@ -1829,7 +1826,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			status = nfserr_seq_misordered;
 			goto out_free_conn;
 		}
-		old = find_confirmed_client_by_name(&unconf->cl_name);
+		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
 		if (old)
 			expire_client(old);
 		move_to_confirmed(unconf);
@@ -2227,7 +2224,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	/* Cases below refer to rfc 3530 section 14.2.33: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_name(&clname);
+	conf = find_confirmed_client_by_name(&clname, nn);
 	if (conf) {
 		/* case 0: */
 		status = nfserr_clid_inuse;
@@ -2309,7 +2306,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		nfsd4_probe_callback(conf);
 		expire_client(unconf);
 	} else { /* case 3: normal case; new or rebooted client */
-		conf = find_confirmed_client_by_name(&unconf->cl_name);
+		conf = find_confirmed_client_by_name(&unconf->cl_name, nn);
 		if (conf)
 			expire_client(conf);
 		move_to_confirmed(unconf);
@@ -4726,7 +4723,6 @@ nfs4_state_init(void)
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
 	}
-	conf_name_tree = RB_ROOT;
 	unconf_name_tree = RB_ROOT;
 	for (i = 0; i < SESSION_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
@@ -4772,12 +4768,17 @@ static int nfs4_state_start_net(struct net *net)
 	nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) *
 			CLIENT_HASH_SIZE, GFP_KERNEL);
 	if (!nn->conf_id_hashtbl)
-		return -ENOMEM;
+		goto err;
 
-	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
+	}
+	nn->conf_name_tree = RB_ROOT;
 
 	return 0;
+
+err:
+	return -ENOMEM;
 }
 
 static void

From 0a7ec37727dcc3293cd4c9958b25c43f3a797d47 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:31 +0300
Subject: [PATCH 037/111] nfsd: make unconf_id_hashtbl allocated per net

This hash holds nfs4_clients info, which are network namespace aware.
So let's make it allocated per network namespace.

Note: this hash can be allocated in per-net operations. But it looks
better to allocate it on nfsd state start and thus don't waste resources
if server is not running.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4state.c | 25 +++++++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index afd911638464..1ff781f9c3d0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -55,6 +55,7 @@ struct nfsd_net {
 	int reclaim_str_hashtbl_size;
 	struct list_head *conf_id_hashtbl;
 	struct rb_root conf_name_tree;
+	struct list_head *unconf_id_hashtbl;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d40e57b9051a..f33bbfbdc24f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -415,7 +415,6 @@ static unsigned int clientstr_hashval(const char *name)
  *
  * All of the above fields are protected by the client_mutex.
  */
-static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct rb_root unconf_name_tree;
 static struct list_head client_lru;
 static struct list_head close_lru;
@@ -1369,11 +1368,12 @@ static void
 add_to_unconfirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
 	add_clp_to_name_tree(clp, &unconf_name_tree);
 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
-	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
 	renew_client(clp);
 }
 
@@ -1409,12 +1409,12 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 }
 
 static struct nfs4_client *
-find_unconfirmed_client(clientid_t *clid, bool sessions)
+find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
+	list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) {
 		if (same_clid(&clp->cl_clientid, clid)) {
 			if ((bool)clp->cl_minorversion != sessions)
 				return NULL;
@@ -1799,7 +1799,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		goto out_free_session;
 
 	nfs4_lock_state();
-	unconf = find_unconfirmed_client(&cr_ses->clientid, true);
+	unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
 	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
 
 	if (conf) {
@@ -2143,7 +2143,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	nfs4_lock_state();
-	unconf = find_unconfirmed_client(&dc->clientid, true);
+	unconf = find_unconfirmed_client(&dc->clientid, true, nn);
 	conf = find_confirmed_client(&dc->clientid, true, nn);
 
 	if (conf) {
@@ -2280,7 +2280,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	nfs4_lock_state();
 
 	conf = find_confirmed_client(clid, false, nn);
-	unconf = find_unconfirmed_client(clid, false);
+	unconf = find_unconfirmed_client(clid, false, nn);
 	/*
 	 * We try hard to give out unique clientid's, so if we get an
 	 * attempt to confirm the same clientid with a different cred,
@@ -4720,9 +4720,6 @@ nfs4_state_init(void)
 {
 	int i;
 
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
-	}
 	unconf_name_tree = RB_ROOT;
 	for (i = 0; i < SESSION_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
@@ -4769,14 +4766,21 @@ static int nfs4_state_start_net(struct net *net)
 			CLIENT_HASH_SIZE, GFP_KERNEL);
 	if (!nn->conf_id_hashtbl)
 		goto err;
+	nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) *
+			CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->unconf_id_hashtbl)
+		goto err_unconf_id;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
+		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
 	}
 	nn->conf_name_tree = RB_ROOT;
 
 	return 0;
 
+err_unconf_id:
+	kfree(nn->conf_id_hashtbl);
 err:
 	return -ENOMEM;
 }
@@ -4794,6 +4798,7 @@ __nfs4_state_shutdown_net(struct net *net)
 			destroy_client(clp);
 		}
 	}
+	kfree(nn->unconf_id_hashtbl);
 	kfree(nn->conf_id_hashtbl);
 }
 

From a99454aa4ff1241a19dcb486fa302d3e8cc09e5b Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:36 +0300
Subject: [PATCH 038/111] nfsd: make unconf_name_tree per net

This hash holds nfs4_clients info, which are network namespace aware.
So let's make it allocated per network namespace.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  4 ++++
 fs/nfsd/nfs4state.c | 42 +++++++++++++++++++-----------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 1ff781f9c3d0..1e76030e1d16 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -50,12 +50,16 @@ struct nfsd_net {
 	 *
 	 * conf_id_hashtbl[], and conf_name_tree hold confirmed
 	 * setclientid_confirmed info.
+	 *
+	 * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed
+	 * setclientid info.
 	 */
 	struct list_head *reclaim_str_hashtbl;
 	int reclaim_str_hashtbl_size;
 	struct list_head *conf_id_hashtbl;
 	struct rb_root conf_name_tree;
 	struct list_head *unconf_id_hashtbl;
+	struct rb_root unconf_name_tree;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f33bbfbdc24f..b35329199e35 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -404,9 +404,6 @@ static unsigned int clientstr_hashval(const char *name)
 }
 
 /*
- * unconf_id_hashtbl[] and unconf_name_tree hold unconfirmed
- * setclientid info.
- *
  * client_lru holds client queue ordered by nfs4_client.cl_time
  * for lease renewal.
  *
@@ -415,7 +412,6 @@ static unsigned int clientstr_hashval(const char *name)
  *
  * All of the above fields are protected by the client_mutex.
  */
-static struct rb_root unconf_name_tree;
 static struct list_head client_lru;
 static struct list_head close_lru;
 
@@ -1134,7 +1130,7 @@ destroy_client(struct nfs4_client *clp)
 	if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
 		rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
 	else
-		rb_erase(&clp->cl_namenode, &unconf_name_tree);
+		rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
 	spin_lock(&client_lock);
 	unhash_client_locked(clp);
 	if (atomic_read(&clp->cl_refcount) == 0)
@@ -1371,7 +1367,7 @@ add_to_unconfirmed(struct nfs4_client *clp)
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
-	add_clp_to_name_tree(clp, &unconf_name_tree);
+	add_clp_to_name_tree(clp, &nn->unconf_name_tree);
 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
 	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
 	renew_client(clp);
@@ -1385,7 +1381,7 @@ move_to_confirmed(struct nfs4_client *clp)
 
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
 	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
-	rb_erase(&clp->cl_namenode, &unconf_name_tree);
+	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
 	add_clp_to_name_tree(clp, &nn->conf_name_tree);
 	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
 	renew_client(clp);
@@ -1436,9 +1432,9 @@ find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
 }
 
 static struct nfs4_client *
-find_unconfirmed_client_by_name(struct xdr_netobj *name)
+find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
 {
-	return find_clp_in_name_tree(name, &unconf_name_tree);
+	return find_clp_in_name_tree(name, &nn->unconf_name_tree);
 }
 
 static void
@@ -1677,7 +1673,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 		goto out;
 	}
 
-	unconf  = find_unconfirmed_client_by_name(&exid->clname);
+	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
 	if (unconf) /* case 4, possible retry or client restart */
 		expire_client(unconf);
 
@@ -2239,7 +2235,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out;
 		}
 	}
-	unconf = find_unconfirmed_client_by_name(&clname);
+	unconf = find_unconfirmed_client_by_name(&clname, nn);
 	if (unconf)
 		expire_client(unconf);
 	status = nfserr_jukebox;
@@ -4720,7 +4716,6 @@ nfs4_state_init(void)
 {
 	int i;
 
-	unconf_name_tree = RB_ROOT;
 	for (i = 0; i < SESSION_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
@@ -4776,6 +4771,7 @@ static int nfs4_state_start_net(struct net *net)
 		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
 	}
 	nn->conf_name_tree = RB_ROOT;
+	nn->unconf_name_tree = RB_ROOT;
 
 	return 0;
 
@@ -4791,6 +4787,7 @@ __nfs4_state_shutdown_net(struct net *net)
 	int i;
 	struct nfs4_client *clp = NULL;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct rb_node *node, *tmp;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -4798,6 +4795,16 @@ __nfs4_state_shutdown_net(struct net *net)
 			destroy_client(clp);
 		}
 	}
+
+	node = rb_first(&nn->unconf_name_tree);
+	while (node != NULL) {
+		tmp = node;
+		node = rb_next(tmp);
+		clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
+		rb_erase(tmp, &nn->unconf_name_tree);
+		destroy_client(clp);
+	}
+
 	kfree(nn->unconf_id_hashtbl);
 	kfree(nn->conf_id_hashtbl);
 }
@@ -4857,22 +4864,11 @@ nfs4_state_start(void)
 static void
 __nfs4_state_shutdown(struct net *net)
 {
-	struct nfs4_client *clp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
-	struct rb_node *node, *tmp;
 
 	__nfs4_state_shutdown_net(net);
 
-	node = rb_first(&unconf_name_tree);
-	while (node != NULL) {
-		tmp = node;
-		node = rb_next(tmp);
-		clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
-		rb_erase(tmp, &unconf_name_tree);
-		destroy_client(clp);
-	}
-
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {

From 9b5311374057e5c87017ea3756e566047c9b61e7 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:41 +0300
Subject: [PATCH 039/111] nfsd: make ownerstr_hashtbl allocated per net

This hash holds open owner state and closely associated with nfs4_clients
info, which are network namespace aware. So let's make it allocated per
network namespace too.

Note: this hash can be allocated in per-net operations. But it looks
better to allocate it on nfsd state start and thus don't waste resources
if server is not running.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4state.c | 41 ++++++++++++++++++++++++++---------------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 1e76030e1d16..46cca9494c7a 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -60,6 +60,7 @@ struct nfsd_net {
 	struct rb_root conf_name_tree;
 	struct list_head *unconf_id_hashtbl;
 	struct rb_root unconf_name_tree;
+	struct list_head *ownerstr_hashtbl;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b35329199e35..f68514d8210e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -176,8 +176,6 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
 	return ret & OWNER_HASH_MASK;
 }
 
-static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];
-
 /* hash table for nfs4_file */
 #define FILE_HASH_BITS                   8
 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
@@ -2428,7 +2426,9 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
 
 static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
 {
-	list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
 	list_add(&oo->oo_perclient, &clp->cl_openowners);
 }
 
@@ -2486,13 +2486,14 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
 }
 
 static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, bool sessions)
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
+			bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_stateowner *so;
 	struct nfs4_openowner *oo;
 	struct nfs4_client *clp;
 
-	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) {
 		if (!so->so_is_open_owner)
 			continue;
 		if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
@@ -2648,7 +2649,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 		return nfserr_jukebox;
 
 	strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
-	oo = find_openstateowner_str(strhashval, open, cstate->minorversion);
+	oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn);
 	open->op_openowner = oo;
 	if (!oo) {
 		clp = find_confirmed_client(clientid, cstate->minorversion,
@@ -3976,8 +3977,9 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
 	struct inode *inode = open_stp->st_file->fi_inode;
 	unsigned int inohash = lockowner_ino_hashval(inode,
 			clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+	list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
 	list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
 	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
@@ -4458,7 +4460,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	status = nfserr_locks_held;
 	INIT_LIST_HEAD(&matches);
 
-	list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) {
 		if (sop->so_is_open_owner)
 			continue;
 		if (!same_owner_str(sop, owner, clid))
@@ -4614,13 +4616,14 @@ static void release_openowner_sop(struct nfs4_stateowner *sop)
 }
 
 static int nfsd_release_n_owners(u64 num, bool is_open_owner,
-				void (*release_sop)(struct nfs4_stateowner *))
+				void (*release_sop)(struct nfs4_stateowner *),
+				struct nfsd_net *nn)
 {
 	int i, count = 0;
 	struct nfs4_stateowner *sop, *next;
 
 	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+		list_for_each_entry_safe(sop, next, &nn->ownerstr_hashtbl[i], so_strhash) {
 			if (sop->so_is_open_owner != is_open_owner)
 				continue;
 			release_sop(sop);
@@ -4634,9 +4637,10 @@ static int nfsd_release_n_owners(u64 num, bool is_open_owner,
 void nfsd_forget_locks(u64 num)
 {
 	int count;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+	count = nfsd_release_n_owners(num, false, release_lockowner_sop, nn);
 	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d locks", count);
@@ -4645,9 +4649,10 @@ void nfsd_forget_locks(u64 num)
 void nfsd_forget_openowners(u64 num)
 {
 	int count;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, true, release_openowner_sop);
+	count = nfsd_release_n_owners(num, true, release_openowner_sop, nn);
 	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
@@ -4721,9 +4726,6 @@ nfs4_state_init(void)
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
-	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
-	}
 	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
 	INIT_LIST_HEAD(&close_lru);
@@ -4765,16 +4767,24 @@ static int nfs4_state_start_net(struct net *net)
 			CLIENT_HASH_SIZE, GFP_KERNEL);
 	if (!nn->unconf_id_hashtbl)
 		goto err_unconf_id;
+	nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
+			OWNER_HASH_SIZE, GFP_KERNEL);
+	if (!nn->ownerstr_hashtbl)
+		goto err_ownerstr;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
 	}
+	for (i = 0; i < OWNER_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
 
 	return 0;
 
+err_ownerstr:
+	kfree(nn->unconf_id_hashtbl);
 err_unconf_id:
 	kfree(nn->conf_id_hashtbl);
 err:
@@ -4805,6 +4815,7 @@ __nfs4_state_shutdown_net(struct net *net)
 		destroy_client(clp);
 	}
 
+	kfree(nn->ownerstr_hashtbl);
 	kfree(nn->unconf_id_hashtbl);
 	kfree(nn->conf_id_hashtbl);
 }

From 20e9e2bc98b907efe82621797c561f6169d63d96 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:46 +0300
Subject: [PATCH 040/111] nfsd: make lockowner_ino_hashtbl allocated per net

This hash holds file lock owners and closely associated with nfs4_clients info,
which are network namespace aware. So let's make it allocated per network
namespace too.

Note: this hash can be allocated in per-net operations. But it looks
better to allocate it on nfsd state start and thus don't waste resources
if server is not running.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  4 ++++
 fs/nfsd/nfs4state.c | 27 ++++++++++++++++-----------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 46cca9494c7a..2281f6df5573 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -29,6 +29,9 @@
 #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
 #define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
 
+#define LOCKOWNER_INO_HASH_BITS		8
+#define LOCKOWNER_INO_HASH_SIZE		(1 << LOCKOWNER_INO_HASH_BITS)
+
 struct cld_net;
 
 struct nfsd_net {
@@ -61,6 +64,7 @@ struct nfsd_net {
 	struct list_head *unconf_id_hashtbl;
 	struct rb_root unconf_name_tree;
 	struct list_head *ownerstr_hashtbl;
+	struct list_head *lockowner_ino_hashtbl;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f68514d8210e..1e76d55a3e9a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3862,8 +3862,6 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
 
-#define LOCKOWNER_INO_HASH_BITS 8
-#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
 #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
 
 static inline u64
@@ -3893,8 +3891,6 @@ static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct
 		& LOCKOWNER_INO_HASH_MASK;
 }
 
-static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
-
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
  * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -3960,12 +3956,12 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c
 
 static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
-		struct xdr_netobj *owner)
+		   struct xdr_netobj *owner, struct nfsd_net *nn)
 {
 	unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
 	struct nfs4_lockowner *lo;
 
-	list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+	list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
 		if (same_lockowner_ino(lo, inode, clid, owner))
 			return lo;
 	}
@@ -3980,7 +3976,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
-	list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
+	list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]);
 	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -4054,8 +4050,10 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s
 	struct nfs4_client *cl = oo->oo_owner.so_client;
 	struct nfs4_lockowner *lo;
 	unsigned int strhashval;
+	struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id);
 
-	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid,
+				&lock->v.new.owner, nn);
 	if (lo) {
 		if (!cstate->minorversion)
 			return nfserr_bad_seqid;
@@ -4308,7 +4306,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 	}
 
-	lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner);
+	lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn);
 	if (lo)
 		file_lock->fl_owner = (fl_owner_t)lo;
 	file_lock->fl_pid = current->tgid;
@@ -4726,8 +4724,6 @@ nfs4_state_init(void)
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
-	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
@@ -4771,6 +4767,10 @@ static int nfs4_state_start_net(struct net *net)
 			OWNER_HASH_SIZE, GFP_KERNEL);
 	if (!nn->ownerstr_hashtbl)
 		goto err_ownerstr;
+	nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) *
+			LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
+	if (!nn->lockowner_ino_hashtbl)
+		goto err_lockowner_ino;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
@@ -4778,11 +4778,15 @@ static int nfs4_state_start_net(struct net *net)
 	}
 	for (i = 0; i < OWNER_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
+	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
 
 	return 0;
 
+err_lockowner_ino:
+	kfree(nn->ownerstr_hashtbl);
 err_ownerstr:
 	kfree(nn->unconf_id_hashtbl);
 err_unconf_id:
@@ -4815,6 +4819,7 @@ __nfs4_state_shutdown_net(struct net *net)
 		destroy_client(clp);
 	}
 
+	kfree(nn->lockowner_ino_hashtbl);
 	kfree(nn->ownerstr_hashtbl);
 	kfree(nn->unconf_id_hashtbl);
 	kfree(nn->conf_id_hashtbl);

From 1872de0e8171904612ee85de218fa045bc473cad Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:51 +0300
Subject: [PATCH 041/111] nfsd: make sessionid_hashtbl allocated per net

This hash holds established sessions state and closely associated with
nfs4_clients info, which are network namespace aware. So let's make it
allocated per network namespace too.

Note: this hash can be allocated in per-net operations. But it looks
better to allocate it on nfsd state start and thus don't waste resources
if server is not running.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  3 +++
 fs/nfsd/nfs4state.c | 28 +++++++++++++++++-----------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 2281f6df5573..da33d3f804b0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -32,6 +32,8 @@
 #define LOCKOWNER_INO_HASH_BITS		8
 #define LOCKOWNER_INO_HASH_SIZE		(1 << LOCKOWNER_INO_HASH_BITS)
 
+#define SESSION_HASH_SIZE	512
+
 struct cld_net;
 
 struct nfsd_net {
@@ -65,6 +67,7 @@ struct nfsd_net {
 	struct rb_root unconf_name_tree;
 	struct list_head *ownerstr_hashtbl;
 	struct list_head *lockowner_ino_hashtbl;
+	struct list_head *sessionid_hashtbl;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1e76d55a3e9a..248f217a00bc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -632,9 +632,6 @@ static void release_openowner(struct nfs4_openowner *oo)
 	nfs4_free_openowner(oo);
 }
 
-#define SESSION_HASH_SIZE	512
-static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
-
 static inline int
 hash_sessionid(struct nfs4_sessionid *sessionid)
 {
@@ -928,6 +925,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
 static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	new->se_client = clp;
 	gen_sessionid(new);
@@ -941,7 +939,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 	kref_init(&new->se_ref);
 	idx = hash_sessionid(&new->se_sessionid);
 	spin_lock(&client_lock);
-	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
 	spin_lock(&clp->cl_lock);
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
@@ -963,15 +961,16 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 
 /* caller must hold client_lock */
 static struct nfsd4_session *
-find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
 {
 	struct nfsd4_session *elem;
 	int idx;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dump_sessionid(__func__, sessionid);
 	idx = hash_sessionid(sessionid);
 	/* Search in the appropriate list */
-	list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
+	list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) {
 		if (!memcmp(elem->se_sessionid.data, sessionid->data,
 			    NFS4_MAX_SESSIONID_LEN)) {
 			return elem;
@@ -1905,7 +1904,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 	if (!nfsd4_last_compound_op(rqstp))
 		return nfserr_not_only_op;
 	spin_lock(&client_lock);
-	cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid);
+	cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
 	/* Sorta weird: we only need the refcnt'ing because new_conn acquires
 	 * client_lock iself: */
 	if (cstate->session) {
@@ -1954,7 +1953,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	}
 	dump_sessionid(__func__, &sessionid->sessionid);
 	spin_lock(&client_lock);
-	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+	ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r));
 	if (!ses) {
 		spin_unlock(&client_lock);
 		goto out;
@@ -2050,7 +2049,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 
 	spin_lock(&client_lock);
 	status = nfserr_badsession;
-	session = find_in_sessionid_hashtbl(&seq->sessionid);
+	session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
 	if (!session)
 		goto out;
 
@@ -4719,8 +4718,6 @@ nfs4_state_init(void)
 {
 	int i;
 
-	for (i = 0; i < SESSION_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
@@ -4771,6 +4768,10 @@ static int nfs4_state_start_net(struct net *net)
 			LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
 	if (!nn->lockowner_ino_hashtbl)
 		goto err_lockowner_ino;
+	nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
+			SESSION_HASH_SIZE, GFP_KERNEL);
+	if (!nn->sessionid_hashtbl)
+		goto err_sessionid;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
@@ -4780,11 +4781,15 @@ static int nfs4_state_start_net(struct net *net)
 		INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
 	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
+	for (i = 0; i < SESSION_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
 
 	return 0;
 
+err_sessionid:
+	kfree(nn->lockowner_ino_hashtbl);
 err_lockowner_ino:
 	kfree(nn->ownerstr_hashtbl);
 err_ownerstr:
@@ -4819,6 +4824,7 @@ __nfs4_state_shutdown_net(struct net *net)
 		destroy_client(clp);
 	}
 
+	kfree(nn->sessionid_hashtbl);
 	kfree(nn->lockowner_ino_hashtbl);
 	kfree(nn->ownerstr_hashtbl);
 	kfree(nn->unconf_id_hashtbl);

From 5ed58bb243484e01e82ffca8451907403168e262 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:21:56 +0300
Subject: [PATCH 042/111] nfsd: make client_lru list per net

This list holds nfs4 clients queue for lease renewal, which are network
namespace aware. So let's make this list per network namespace too.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  5 +++++
 fs/nfsd/nfs4state.c | 16 ++++++++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index da33d3f804b0..9a98a0aeee68 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -68,6 +68,11 @@ struct nfsd_net {
 	struct list_head *ownerstr_hashtbl;
 	struct list_head *lockowner_ino_hashtbl;
 	struct list_head *sessionid_hashtbl;
+	/*
+	 * client_lru holds client queue ordered by nfs4_client.cl_time
+	 * for lease renewal.
+	 */
+	struct list_head client_lru;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 248f217a00bc..9cf7e9bf3691 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -402,15 +402,11 @@ static unsigned int clientstr_hashval(const char *name)
 }
 
 /*
- * client_lru holds client queue ordered by nfs4_client.cl_time
- * for lease renewal.
- *
  * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
  * for last close replay.
  *
  * All of the above fields are protected by the client_mutex.
  */
-static struct list_head client_lru;
 static struct list_head close_lru;
 
 /*
@@ -995,6 +991,8 @@ unhash_session(struct nfsd4_session *ses)
 static inline void
 renew_client_locked(struct nfs4_client *clp)
 {
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
 	if (is_client_expired(clp)) {
 		WARN_ON(1);
 		printk("%s: client (clientid %08x/%08x) already expired\n",
@@ -1007,7 +1005,7 @@ renew_client_locked(struct nfs4_client *clp)
 	dprintk("renewing client (clientid %08x/%08x)\n", 
 			clp->cl_clientid.cl_boot, 
 			clp->cl_clientid.cl_id);
-	list_move_tail(&clp->cl_lru, &client_lru);
+	list_move_tail(&clp->cl_lru, &nn->client_lru);
 	clp->cl_time = get_seconds();
 }
 
@@ -3196,6 +3194,7 @@ nfs4_laundromat(void)
 	time_t cutoff = get_seconds() - nfsd4_lease;
 	time_t t, clientid_val = nfsd4_lease;
 	time_t u, test_val = nfsd4_lease;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	nfs4_lock_state();
 
@@ -3203,7 +3202,7 @@ nfs4_laundromat(void)
 	nfsd4_end_grace(&init_net);
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&client_lock);
-	list_for_each_safe(pos, next, &client_lru) {
+	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
 			t = clp->cl_time - cutoff;
@@ -4590,9 +4589,10 @@ void nfsd_forget_clients(u64 num)
 {
 	struct nfs4_client *clp, *next;
 	int count = 0;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	nfs4_lock_state();
-	list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
 		expire_client(clp);
 		if (++count == num)
 			break;
@@ -4722,7 +4722,6 @@ nfs4_state_init(void)
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
 	INIT_LIST_HEAD(&close_lru);
-	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
 }
 
@@ -4785,6 +4784,7 @@ static int nfs4_state_start_net(struct net *net)
 		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
+	INIT_LIST_HEAD(&nn->client_lru);
 
 	return 0;
 

From 73758fed711b847d833b9b0db59137eaeed06485 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:22:01 +0300
Subject: [PATCH 043/111] nfsd: make close_lru list per net

This list holds nfs4 clients (open) stateowner queue for last close replay,
which are network namespace aware. So let's make this list per network
namespace too.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  6 ++++++
 fs/nfsd/nfs4state.c | 20 +++++++-------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9a98a0aeee68..a356ea3dc686 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -71,8 +71,14 @@ struct nfsd_net {
 	/*
 	 * client_lru holds client queue ordered by nfs4_client.cl_time
 	 * for lease renewal.
+	 *
+	 * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+	 * for last close replay.
+	 *
+	 * All of the above fields are protected by the client_mutex.
 	 */
 	struct list_head client_lru;
+	struct list_head close_lru;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9cf7e9bf3691..a8e406449ef6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -401,14 +401,6 @@ static unsigned int clientstr_hashval(const char *name)
 	return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
 }
 
-/*
- * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
- * for last close replay.
- *
- * All of the above fields are protected by the client_mutex.
- */
-static struct list_head close_lru;
-
 /*
  * We store the NONE, READ, WRITE, and BOTH bits separately in the
  * st_{access,deny}_bmap field of the stateid, in order to track not
@@ -2465,11 +2457,13 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
 }
 
 static void
-move_to_close_lru(struct nfs4_openowner *oo)
+move_to_close_lru(struct nfs4_openowner *oo, struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
 
-	list_move_tail(&oo->oo_close_lru, &close_lru);
+	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
 	oo->oo_time = get_seconds();
 }
 
@@ -3242,7 +3236,7 @@ nfs4_laundromat(void)
 		unhash_delegation(dp);
 	}
 	test_val = nfsd4_lease;
-	list_for_each_safe(pos, next, &close_lru) {
+	list_for_each_safe(pos, next, &nn->close_lru) {
 		oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
 		if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
 			u = oo->oo_time - cutoff;
@@ -3820,7 +3814,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 * little while to handle CLOSE replay.
 			 */
 			if (list_empty(&oo->oo_owner.so_stateids))
-				move_to_close_lru(oo);
+				move_to_close_lru(oo, SVC_NET(rqstp));
 		}
 	}
 out:
@@ -4721,7 +4715,6 @@ nfs4_state_init(void)
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
-	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
 }
 
@@ -4785,6 +4778,7 @@ static int nfs4_state_start_net(struct net *net)
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
 	INIT_LIST_HEAD(&nn->client_lru);
+	INIT_LIST_HEAD(&nn->close_lru);
 
 	return 0;
 

From 3320fef19b542b8df9606bd8e63990dc2a3fb330 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:22:07 +0300
Subject: [PATCH 044/111] nfsd: use service net instead of hard-coded init_net

This patch replaces init_net by SVC_NET(), where possible and also passes
proper context to nested functions where required.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c  | 13 +++++++---
 fs/nfsd/nfs4state.c | 63 +++++++++++++++++++++++++++------------------
 fs/nfsd/state.h     |  2 +-
 fs/nfsd/xdr4.h      |  2 +-
 4 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f955176f1b6f..1d2396b79574 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -40,6 +40,7 @@
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
@@ -304,6 +305,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	__be32 status;
 	struct nfsd4_compoundres *resp;
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
 		(int)open->op_fname.len, open->op_fname.data,
@@ -331,7 +334,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	/* check seqid for replay. set nfs4_owner */
 	resp = rqstp->rq_resp;
-	status = nfsd4_process_open1(&resp->cstate, open);
+	status = nfsd4_process_open1(&resp->cstate, open, nn);
 	if (status == nfserr_replay_me) {
 		struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
 		fh_put(&cstate->current_fh);
@@ -354,10 +357,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* Openowner is now set, so sequence id will get bumped.  Now we need
 	 * these checks before we do any creates: */
 	status = nfserr_grace;
-	if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+	if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+	if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 
 	switch (open->op_claim_type) {
@@ -370,7 +373,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			break;
 		case NFS4_OPEN_CLAIM_PREVIOUS:
 			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-			status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion);
+			status = nfs4_check_open_reclaim(&open->op_clientid,
+							 cstate->minorversion,
+							 nn);
 			if (status)
 				goto out;
 		case NFS4_OPEN_CLAIM_FH:
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a8e406449ef6..996a8a58944d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2620,14 +2620,13 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
 
 __be32
 nfsd4_process_open1(struct nfsd4_compound_state *cstate,
-		    struct nfsd4_open *open)
+		    struct nfsd4_open *open, struct nfsd_net *nn)
 {
 	clientid_t *clientid = &open->op_clientid;
 	struct nfs4_client *clp = NULL;
 	unsigned int strhashval;
 	struct nfs4_openowner *oo = NULL;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	if (STALE_CLIENTID(&open->op_clientid, nn))
 		return nfserr_stale_clientid;
@@ -3408,10 +3407,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 	return nfs_ok;
 }
 
-static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, bool sessions)
+static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
+				   struct nfs4_stid **s, bool sessions,
+				   struct nfsd_net *nn)
 {
 	struct nfs4_client *cl;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
 		return nfserr_bad_stateid;
@@ -3439,6 +3439,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
 	struct nfs4_delegation *dp = NULL;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	__be32 status;
 
 	if (filpp)
@@ -3450,7 +3451,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
 		return check_special_stateids(net, current_fh, stateid, flags);
 
-	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+				      &s, cstate->minorversion, nn);
 	if (status)
 		return status;
 	status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
@@ -3591,7 +3593,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
 static __be32
 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 			 stateid_t *stateid, char typemask,
-			 struct nfs4_ol_stateid **stpp)
+			 struct nfs4_ol_stateid **stpp,
+			 struct nfsd_net *nn)
 {
 	__be32 status;
 	struct nfs4_stid *s;
@@ -3600,7 +3603,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 		seqid, STATEID_VAL(stateid));
 
 	*stpp = NULL;
-	status = nfsd4_lookup_stateid(stateid, typemask, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, typemask, &s,
+				      cstate->minorversion, nn);
 	if (status)
 		return status;
 	*stpp = openlockstateid(s);
@@ -3609,13 +3613,14 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 	return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
 }
 
-static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp)
+static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
+						 stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
 {
 	__be32 status;
 	struct nfs4_openowner *oo;
 
 	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
-						NFS4_OPEN_STID, stpp);
+						NFS4_OPEN_STID, stpp, nn);
 	if (status)
 		return status;
 	oo = openowner((*stpp)->st_stateowner);
@@ -3631,6 +3636,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3644,7 +3650,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	status = nfs4_preprocess_seqid_op(cstate,
 					oc->oc_seqid, &oc->oc_req_stateid,
-					NFS4_OPEN_STID, &stp);
+					NFS4_OPEN_STID, &stp, nn);
 	if (status)
 		goto out;
 	oo = openowner(stp->st_stateowner);
@@ -3708,6 +3714,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 {
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3720,7 +3727,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 
 	nfs4_lock_state();
 	status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
-					&od->od_stateid, &stp);
+					&od->od_stateid, &stp, nn);
 	if (status)
 		goto out; 
 	status = nfserr_inval;
@@ -3783,6 +3790,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_close on file %.*s\n", 
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3792,7 +3801,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
 					&close->cl_stateid,
 					NFS4_OPEN_STID|NFS4_CLOSED_STID,
-					&stp);
+					&stp, nn);
 	if (status)
 		goto out; 
 	oo = openowner(stp->st_stateowner);
@@ -3831,12 +3840,14 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
 	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		return status;
 
 	nfs4_lock_state();
-	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s,
+				      cstate->minorversion, nn);
 	if (status)
 		goto out;
 	dp = delegstateid(s);
@@ -4085,7 +4096,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	bool new_state = false;
 	int lkflg;
 	int err;
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
 		(long long) lock->lk_offset,
@@ -4119,7 +4131,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfs4_preprocess_confirmed_seqid_op(cstate,
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
-					&open_stp);
+					&open_stp, nn);
 		if (status)
 			goto out;
 		open_sop = openowner(open_stp->st_stateowner);
@@ -4133,7 +4145,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfs4_preprocess_seqid_op(cstate,
 				       lock->lk_old_lock_seqid,
 				       &lock->lk_old_lock_stateid,
-				       NFS4_LOCK_STID, &lock_stp);
+				       NFS4_LOCK_STID, &lock_stp, nn);
 	if (status)
 		goto out;
 	lock_sop = lockowner(lock_stp->st_stateowner);
@@ -4144,10 +4156,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 
 	status = nfserr_grace;
-	if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim)
+	if (locks_in_grace(net) && !lock->lk_reclaim)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim)
+	if (!locks_in_grace(net) && lock->lk_reclaim)
 		goto out;
 
 	file_lock = locks_alloc_lock();
@@ -4333,7 +4345,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct file_lock *file_lock = NULL;
 	__be32 status;
 	int err;
-						        
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
 		(long long) locku->lu_offset,
 		(long long) locku->lu_length);
@@ -4344,7 +4357,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	nfs4_lock_state();
 									        
 	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
-					&locku->lu_stateid, NFS4_LOCK_STID, &stp);
+					&locku->lu_stateid, NFS4_LOCK_STID,
+					&stp, nn);
 	if (status)
 		goto out;
 	filp = find_any_file(stp->st_file);
@@ -4564,10 +4578,9 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
 * Called from OPEN. Look for clientid in reclaim list.
 */
 __be32
-nfs4_check_open_reclaim(clientid_t *clid, bool sessions)
+nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	/* find clientid in conf_id_hashtbl */
 	clp = find_confirmed_client(clid, sessions, nn);
@@ -4583,7 +4596,7 @@ void nfsd_forget_clients(u64 num)
 {
 	struct nfs4_client *clp, *next;
 	int count = 0;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
 
 	nfs4_lock_state();
 	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
@@ -4897,8 +4910,8 @@ __nfs4_state_shutdown(struct net *net)
 		unhash_delegation(dp);
 	}
 
-	nfsd4_client_tracking_exit(&init_net);
-	put_net(&init_net);
+	nfsd4_client_tracking_exit(net);
+	put_net(net);
 }
 
 void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 26a912cdfe0c..bfe0106333cc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -470,7 +470,7 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *)
 extern void nfs4_release_reclaim(struct nfsd_net *);
 extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
 							struct nfsd_net *nn);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
 extern void nfs4_free_openowner(struct nfs4_openowner *);
 extern void nfs4_free_lockowner(struct nfs4_lockowner *);
 extern int set_callback_cred(void);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 71c5c47f2750..3c414c1be295 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -581,7 +581,7 @@ extern __be32 nfsd4_destroy_session(struct svc_rqst *,
 extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
 __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
-		struct nfsd4_open *open);
+		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_open *open);
 extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status);

From 12760c6685624d65f8de078485c21b6a08e83409 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:22:12 +0300
Subject: [PATCH 045/111] nfsd: pass nfsd_net instead of net to grace enders

Passing net context looks as overkill.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 14 ++++++--------
 fs/nfsd/nfs4state.c   |  8 +++-----
 fs/nfsd/state.h       |  2 +-
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 376692ab1b3b..b657b622bf5d 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops {
 	void (*create)(struct nfs4_client *);
 	void (*remove)(struct nfs4_client *);
 	int (*check)(struct nfs4_client *);
-	void (*grace_done)(struct net *, time_t);
+	void (*grace_done)(struct nfsd_net *, time_t);
 };
 
 /* Globals */
@@ -391,10 +391,9 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 }
 
 static void
-nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
+nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time)
 {
 	int status;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	in_grace = false;
 	if (!rec_file)
@@ -1017,11 +1016,10 @@ nfsd4_cld_check(struct nfs4_client *clp)
 }
 
 static void
-nfsd4_cld_grace_done(struct net *net, time_t boot_time)
+nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
 {
 	int ret;
 	struct cld_upcall *cup;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	cup = alloc_cld_upcall(cn);
@@ -1241,7 +1239,7 @@ nfsd4_umh_cltrack_check(struct nfs4_client *clp)
 }
 
 static void
-nfsd4_umh_cltrack_grace_done(struct net __attribute__((unused)) *net,
+nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn,
 				time_t boot_time)
 {
 	char *legacy;
@@ -1343,10 +1341,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
 }
 
 void
-nfsd4_record_grace_done(struct net *net, time_t boot_time)
+nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time)
 {
 	if (client_tracking_ops)
-		client_tracking_ops->grace_done(net, boot_time);
+		client_tracking_ops->grace_done(nn, boot_time);
 }
 
 static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 996a8a58944d..2e4ed691255a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3157,17 +3157,15 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 }
 
 static void
-nfsd4_end_grace(struct net *net)
+nfsd4_end_grace(struct nfsd_net *nn)
 {
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
 	/* do nothing if grace period already ended */
 	if (nn->grace_ended)
 		return;
 
 	dprintk("NFSD: end of grace period\n");
 	nn->grace_ended = true;
-	nfsd4_record_grace_done(net, nn->boot_time);
+	nfsd4_record_grace_done(nn, nn->boot_time);
 	locks_end_grace(&nn->nfsd4_manager);
 	/*
 	 * Now that every NFSv4 client has had the chance to recover and
@@ -3192,7 +3190,7 @@ nfs4_laundromat(void)
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
-	nfsd4_end_grace(&init_net);
+	nfsd4_end_grace(nn);
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&client_lock);
 	list_for_each_safe(pos, next, &nn->client_lru) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index bfe0106333cc..2deb6a88e58e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -495,5 +495,5 @@ extern void nfsd4_client_tracking_exit(struct net *net);
 extern void nfsd4_client_record_create(struct nfs4_client *clp);
 extern void nfsd4_client_record_remove(struct nfs4_client *clp);
 extern int nfsd4_client_record_check(struct nfs4_client *clp);
-extern void nfsd4_record_grace_done(struct net *net, time_t boot_time);
+extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
 #endif   /* NFSD4_STATE_H */

From 0912128149e86b48ed946371298d7fe61120d627 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 14 Nov 2012 18:22:17 +0300
Subject: [PATCH 046/111] nfsd: make laundromat network namespace aware

This patch moves laundromat_work to nfsd per-net context, thus allowing to run
multiple laundries.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  2 ++
 fs/nfsd/nfs4state.c | 21 +++++++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index a356ea3dc686..227b93ebb622 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -79,6 +79,8 @@ struct nfsd_net {
 	 */
 	struct list_head client_lru;
 	struct list_head close_lru;
+
+	struct delayed_work laundromat_work;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e4ed691255a..e75872f81e1c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3176,7 +3176,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
 }
 
 static time_t
-nfs4_laundromat(void)
+nfs4_laundromat(struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	struct nfs4_openowner *oo;
@@ -3185,7 +3185,6 @@ nfs4_laundromat(void)
 	time_t cutoff = get_seconds() - nfsd4_lease;
 	time_t t, clientid_val = nfsd4_lease;
 	time_t u, test_val = nfsd4_lease;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	nfs4_lock_state();
 
@@ -3251,16 +3250,19 @@ nfs4_laundromat(void)
 
 static struct workqueue_struct *laundry_wq;
 static void laundromat_main(struct work_struct *);
-static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
 
 static void
-laundromat_main(struct work_struct *not_used)
+laundromat_main(struct work_struct *laundry)
 {
 	time_t t;
+	struct delayed_work *dwork = container_of(laundry, struct delayed_work,
+						  work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					   laundromat_work);
 
-	t = nfs4_laundromat();
+	t = nfs4_laundromat(nn);
 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
 }
 
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
@@ -4791,6 +4793,8 @@ static int nfs4_state_start_net(struct net *net)
 	INIT_LIST_HEAD(&nn->client_lru);
 	INIT_LIST_HEAD(&nn->close_lru);
 
+	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+
 	return 0;
 
 err_sessionid:
@@ -4875,7 +4879,8 @@ nfs4_state_start(void)
 	ret = nfsd4_create_callback_queue();
 	if (ret)
 		goto out_free_laundry;
-	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, nfsd4_grace * HZ);
 	set_max_delegations();
 	return 0;
 out_free_laundry:
@@ -4918,7 +4923,7 @@ nfs4_state_shutdown(void)
 	struct net *net = &init_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	cancel_delayed_work_sync(&laundromat_work);
+	cancel_delayed_work_sync(&nn->laundromat_work);
 	destroy_workqueue(laundry_wq);
 	locks_end_grace(&nn->nfsd4_manager);
 	nfs4_lock_state();

From 57d276d71aef7d8305ff002a070cb98deb2edced Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 15:22:43 -0500
Subject: [PATCH 047/111] nfsd: fix v4 reply caching

Very embarassing: 1091006c5eb15cba56785bd5b498a8d0b9546903 "nfsd: turn
on reply cache for NFSv4" missed a line, effectively leaving the reply
cache off in the v4 case.  I thought I'd tested that, but I guess not.

This time, wrote a pynfs test to confirm it works.

Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2013aa001dab..30d3784d0280 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -640,7 +640,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	}
 
 	/* Store reply in cache. */
-	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
 	return 1;
 }
 

From 447bfcc936ce28636833e89c4b82f424a291dde9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 21:53:58 -0500
Subject: [PATCH 048/111] nfsd4: no, we're not going to check tags for utf8

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 406d0c4620f6..9dfad585d413 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1595,12 +1595,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	bool cachethis = false;
 	int i;
 
-	/*
-	 * XXX: According to spec, we should check the tag
-	 * for UTF-8 compliance.  I'm postponing this for
-	 * now because it seems that some clients do use
-	 * binary tags.
-	 */
 	READ_BUF(4);
 	READ32(argp->taglen);
 	READ_BUF(argp->taglen + 8);

From 8a61b18c9b13987310d0f3ba13aa04af51f02a1c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 22:28:38 -0500
Subject: [PATCH 049/111] nfsd4: simplify reading of opnum

The comment here is totally bogus:
	- OP_WRITE + 1 is RELEASE_LOCKOWNER.  Maybe there was some older
	  version of the spec in which that served as a sort of
	  OP_ILLEGAL?  No idea, but it's clearly wrong now.
	- In any case, I can't see that the spec says anything about
	  what to do if the client sends us less ops than promised.
	  It's clearly nutty client behavior, and we should do
	  whatever's easiest: returning an xdr error (even though it
	  won't be consistent with the error on the last op returned)
	  seems fine to me.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 34 ++--------------------------------
 1 file changed, 2 insertions(+), 32 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9dfad585d413..cfebc9c4f4c9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1624,38 +1624,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 		op = &argp->ops[i];
 		op->replay = NULL;
 
-		/*
-		 * We can't use READ_BUF() here because we need to handle
-		 * a missing opcode as an OP_WRITE + 1. So we need to check
-		 * to see if we're truly at the end of our buffer or if there
-		 * is another page we need to flip to.
-		 */
-
-		if (argp->p == argp->end) {
-			if (argp->pagelen < 4) {
-				/* There isn't an opcode still on the wire */
-				op->opnum = OP_WRITE + 1;
-				op->status = nfserr_bad_xdr;
-				argp->opcnt = i+1;
-				break;
-			}
-
-			/*
-			 * False alarm. We just hit a page boundary, but there
-			 * is still data available.  Move pointer across page
-			 * boundary.  *snip from READ_BUF*
-			 */
-			argp->p = page_address(argp->pagelist[0]);
-			argp->pagelist++;
-			if (argp->pagelen < PAGE_SIZE) {
-				argp->end = argp->p + (argp->pagelen>>2);
-				argp->pagelen = 0;
-			} else {
-				argp->end = argp->p + (PAGE_SIZE>>2);
-				argp->pagelen -= PAGE_SIZE;
-			}
-		}
-		op->opnum = ntohl(*argp->p++);
+		READ_BUF(4);
+		READ32(op->opnum);
 
 		if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP)
 			op->status = ops->decoders[op->opnum](argp, &op->u);

From 5a80a54d21c96590d013378d8c5f65f879451ab4 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 10:01:30 -0500
Subject: [PATCH 050/111] nfsd4: reorganize write decoding

In preparation for moving some of it elsewhere.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 62 +++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index cfebc9c4f4c9..579dc707bad9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1139,12 +1139,30 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
 	DECODE_TAIL;
 }
 
+static int fill_in_write_vector(struct kvec *vec, struct kvec *head, struct page **pagelist, int buflen)
+{
+	int i = 1;
+
+	vec[0].iov_base = head->iov_base;
+	vec[0].iov_len = min_t(int, buflen, head->iov_len);
+	buflen -= vec[0].iov_len;
+
+	while (buflen) {
+		vec[i].iov_base = page_address(pagelist[i - 1]);
+		vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
+		buflen -= vec[i].iov_len;
+		i++;
+	}
+	return i;
+}
+
 static __be32
 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
 	int avail;
-	int v;
 	int len;
+	struct page **pagelist;
+	struct kvec head;
 	DECODE_HEAD;
 
 	status = nfsd4_decode_stateid(argp, &write->wr_stateid);
@@ -1167,27 +1185,29 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 				__FILE__, __LINE__);
 		goto xdr_error;
 	}
-	argp->rqstp->rq_vec[0].iov_base = p;
-	argp->rqstp->rq_vec[0].iov_len = avail;
-	v = 0;
-	len = write->wr_buflen;
-	while (len > argp->rqstp->rq_vec[v].iov_len) {
-		len -= argp->rqstp->rq_vec[v].iov_len;
-		v++;
-		argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
-		argp->pagelist++;
-		if (argp->pagelen >= PAGE_SIZE) {
-			argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-			argp->pagelen -= PAGE_SIZE;
-		} else {
-			argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
-			argp->pagelen -= len;
-		}
+	head.iov_base = p;
+	head.iov_len = avail;
+	WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
+	pagelist = argp->pagelist;
+
+	len = XDR_QUADLEN(write->wr_buflen) << 2;
+	if (len >= avail) {
+		int pages;
+
+		len -= avail;
+
+		pages = len >> PAGE_SHIFT;
+		argp->pagelist += pages;
+		argp->pagelen -= pages * PAGE_SIZE;
+		len -= pages * PAGE_SIZE;
+
+		argp->p = (__be32 *)page_address(argp->pagelist[0]);
+		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
 	}
-	argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
-	argp->p = (__be32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
-	argp->rqstp->rq_vec[v].iov_len = len;
-	write->wr_vlen = v+1;
+	argp->p += XDR_QUADLEN(len);
+	write->wr_vlen = fill_in_write_vector(argp->rqstp->rq_vec,
+		&head, pagelist, write->wr_buflen);
+	WARN_ON_ONCE(write->wr_vlen > ARRAY_SIZE(argp->rqstp->rq_vec));
 
 	DECODE_TAIL;
 }

From 70cc7f75b1ee4161dfdea1012223db25712ab1a5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 14:16:46 -0500
Subject: [PATCH 051/111] nfsd4: move more write parameters into xdr argument

In preparation for moving some of this elsewhere.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 20 +++++++++-----------
 fs/nfsd/xdr4.h    |  2 ++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 579dc707bad9..cb9f9017af8f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1139,16 +1139,17 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
 	DECODE_TAIL;
 }
 
-static int fill_in_write_vector(struct kvec *vec, struct kvec *head, struct page **pagelist, int buflen)
+static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 {
 	int i = 1;
+	int buflen = write->wr_buflen;
 
-	vec[0].iov_base = head->iov_base;
-	vec[0].iov_len = min_t(int, buflen, head->iov_len);
+	vec[0].iov_base = write->wr_head.iov_base;
+	vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
 	buflen -= vec[0].iov_len;
 
 	while (buflen) {
-		vec[i].iov_base = page_address(pagelist[i - 1]);
+		vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
 		vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
 		buflen -= vec[i].iov_len;
 		i++;
@@ -1161,8 +1162,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
 	int avail;
 	int len;
-	struct page **pagelist;
-	struct kvec head;
 	DECODE_HEAD;
 
 	status = nfsd4_decode_stateid(argp, &write->wr_stateid);
@@ -1185,10 +1184,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 				__FILE__, __LINE__);
 		goto xdr_error;
 	}
-	head.iov_base = p;
-	head.iov_len = avail;
+	write->wr_head.iov_base = p;
+	write->wr_head.iov_len = avail;
 	WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
-	pagelist = argp->pagelist;
+	write->wr_pagelist = argp->pagelist;
 
 	len = XDR_QUADLEN(write->wr_buflen) << 2;
 	if (len >= avail) {
@@ -1205,8 +1204,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
 	}
 	argp->p += XDR_QUADLEN(len);
-	write->wr_vlen = fill_in_write_vector(argp->rqstp->rq_vec,
-		&head, pagelist, write->wr_buflen);
+	write->wr_vlen = fill_in_write_vector(argp->rqstp->rq_vec, write);
 	WARN_ON_ONCE(write->wr_vlen > ARRAY_SIZE(argp->rqstp->rq_vec));
 
 	DECODE_TAIL;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 3c414c1be295..152867b8125d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -386,6 +386,8 @@ struct nfsd4_write {
 	u32		wr_stable_how;      /* request */
 	u32		wr_buflen;          /* request */
 	int		wr_vlen;
+	struct kvec	wr_head;
+	struct page **	wr_pagelist;        /* request */
 
 	u32		wr_bytes_written;   /* response */
 	u32		wr_how_written;     /* response */

From ffe1137ba743cdf1c2414d5a89690aec1daa6bba Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 15 Nov 2012 14:52:19 -0500
Subject: [PATCH 052/111] nfsd4: delay filling in write iovec array till after
 xdr decoding

Our server rejects compounds containing more than one write operation.
It's unclear whether this is really permitted by the spec; with 4.0,
it's possibly OK, with 4.1 (which has clearer limits on compound
parameters), it's probably not OK.  No client that we're aware of has
ever done this, but in theory it could be useful.

The source of the limitation: we need an array of iovecs to pass to the
write operation.  In the worst case that array of iovecs could have
hundreds of elements (the maximum rwsize divided by the page size), so
it's too big to put on the stack, or in each compound op.  So we instead
keep a single such array in the compound argument.

We fill in that array at the time we decode the xdr operation.

But we decode every op in the compound before executing any of them.  So
once we've used that array we can't decode another write.

If we instead delay filling in that array till the time we actually
perform the write, we can reuse it.

Another option might be to switch to decoding compound ops one at a
time.  I considered doing that, but it has a number of other side
effects, and I'd rather fix just this one problem for now.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 .../filesystems/nfs/nfs41-server.txt          |  4 ++--
 fs/nfsd/nfs4proc.c                            | 24 ++++++++++++++++++-
 fs/nfsd/nfs4xdr.c                             | 20 ----------------
 fs/nfsd/xdr4.h                                |  1 -
 4 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 392ef637e101..01c2db769791 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -190,7 +190,7 @@ Nonstandard compound limitations:
   ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
   fail to live up to the promise we made in CREATE_SESSION fore channel
   negotiation.
-* No more than one IO operation (read, write, readdir) allowed per
-  compound.
+* No more than one read-like operation allowed per compound; encoding
+  replies that cross page boundaries (except for read data) not handled.
 
 See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1d2396b79574..87d24e5f3ca4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -881,6 +881,24 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
+static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
+{
+        int i = 1;
+        int buflen = write->wr_buflen;
+
+        vec[0].iov_base = write->wr_head.iov_base;
+        vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
+        buflen -= vec[0].iov_len;
+
+        while (buflen) {
+                vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
+                vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
+                buflen -= vec[i].iov_len;
+                i++;
+        }
+        return i;
+}
+
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    struct nfsd4_write *write)
@@ -889,6 +907,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
 	unsigned long cnt;
+	int nvecs;
 
 	/* no need to check permission - this will be done in nfsd_write() */
 
@@ -911,8 +930,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	write->wr_how_written = write->wr_stable_how;
 	gen_boot_verifier(&write->wr_verifier);
 
+	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
+	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
+
 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
-			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+			     write->wr_offset, rqstp->rq_vec, nvecs,
 			     &cnt, &write->wr_how_written);
 	if (filp)
 		fput(filp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index cb9f9017af8f..09204f590355 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1139,24 +1139,6 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
 	DECODE_TAIL;
 }
 
-static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
-{
-	int i = 1;
-	int buflen = write->wr_buflen;
-
-	vec[0].iov_base = write->wr_head.iov_base;
-	vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
-	buflen -= vec[0].iov_len;
-
-	while (buflen) {
-		vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
-		vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
-		buflen -= vec[i].iov_len;
-		i++;
-	}
-	return i;
-}
-
 static __be32
 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
@@ -1204,8 +1186,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
 	}
 	argp->p += XDR_QUADLEN(len);
-	write->wr_vlen = fill_in_write_vector(argp->rqstp->rq_vec, write);
-	WARN_ON_ONCE(write->wr_vlen > ARRAY_SIZE(argp->rqstp->rq_vec));
 
 	DECODE_TAIL;
 }
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 152867b8125d..331f8a3277ab 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -385,7 +385,6 @@ struct nfsd4_write {
 	u64		wr_offset;          /* request */
 	u32		wr_stable_how;      /* request */
 	u32		wr_buflen;          /* request */
-	int		wr_vlen;
 	struct kvec	wr_head;
 	struct page **	wr_pagelist;        /* request */
 

From 063b0fb9fadadc0caaea6c8f31e3f6bc978a4904 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 25 Nov 2012 14:48:10 -0500
Subject: [PATCH 053/111] nfsd4: downgrade some fs/nfsd/nfs4state.c BUG's

Linus has pointed out that indiscriminate use of BUG's can make it
harder to diagnose bugs because they can bring a machine down, often
before we manage to get any useful debugging information to the logs.
(Consider, for example, a BUG() that fires in a workqueue, or while
holding a spinlock).

Most of these BUG's won't do much more than kill an nfsd thread, but it
would still probably be safer to get out the warning without dying.

There's still more of this to do in nfsd/.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e75872f81e1c..41d2aed8ed06 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -190,7 +190,7 @@ static struct list_head file_hashtbl[FILE_HASH_SIZE];
 
 static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
 {
-	BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
+	WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
 	atomic_inc(&fp->fi_access[oflag]);
 }
 
@@ -249,7 +249,7 @@ static inline int get_new_stid(struct nfs4_stid *stid)
 	 * preallocations that can exist at a time, but the state lock
 	 * prevents anyone from using ours before we get here:
 	 */
-	BUG_ON(error);
+	WARN_ON_ONCE(error);
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
 	 * I don't think it is for 4.1.  But with 4.0 I worry that, for
@@ -494,7 +494,8 @@ static int nfs4_access_to_omode(u32 access)
 	case NFS4_SHARE_ACCESS_BOTH:
 		return O_RDWR;
 	}
-	BUG();
+	WARN_ON_ONCE(1);
+	return O_RDONLY;
 }
 
 /* release all access and file references for a given stateid */
@@ -1605,10 +1606,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	switch (exid->spa_how) {
 	case SP4_NONE:
 		break;
+	default:				/* checked by xdr code */
+		WARN_ON_ONCE(1);
 	case SP4_SSV:
-		return nfserr_serverfault;
-	default:
-		BUG();				/* checked by xdr code */
 	case SP4_MACH_CRED:
 		return nfserr_serverfault;	/* no excuse :-/ */
 	}
@@ -2912,7 +2912,7 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
 			open->op_why_no_deleg = WND4_CANCELLED;
 			break;
 		case NFS4_SHARE_WANT_NO_DELEG:
-			BUG();	/* not supposed to get here */
+			WARN_ON_ONCE(1);
 		}
 	}
 }
@@ -3466,7 +3466,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
 			goto out;
 		if (filpp) {
 			*filpp = dp->dl_file->fi_deleg_file;
-			BUG_ON(!*filpp);
+			if (!*filpp) {
+				WARN_ON_ONCE(1);
+				status = nfserr_serverfault;
+				goto out;
+			}
 		}
 		break;
 	case NFS4_OPEN_STID:
@@ -3693,7 +3697,7 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 	case NFS4_SHARE_ACCESS_BOTH:
 		break;
 	default:
-		BUG();
+		WARN_ON_ONCE(1);
 	}
 }
 
@@ -3882,7 +3886,7 @@ last_byte_offset(u64 start, u64 len)
 {
 	u64 end;
 
-	BUG_ON(!len);
+	WARN_ON_ONCE(!len);
 	end = start + len;
 	return end > start ? end - 1: NFS4_MAX_UINT64;
 }
@@ -4552,7 +4556,7 @@ nfs4_release_reclaim(struct nfsd_net *nn)
 			nfs4_remove_reclaim_record(crp, nn);
 		}
 	}
-	BUG_ON(nn->reclaim_str_hashtbl_size);
+	WARN_ON_ONCE(nn->reclaim_str_hashtbl_size);
 }
 
 /*

From a36b1725b342c8131a86a0238789d8e7bcb490dd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 25 Nov 2012 16:31:00 -0500
Subject: [PATCH 054/111] nfsd4: return badname, not inval, on "." or "..", or
 "/"

The spec requires badname, not inval, in these cases.

Some callers want us to return enoent, but I can see no justification
for that.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 09204f590355..250171c5c311 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -65,17 +65,17 @@
 #define NFS4_REFERRAL_FSID_MINOR	0x8000000ULL
 
 static __be32
-check_filename(char *str, int len, __be32 err)
+check_filename(char *str, int len)
 {
 	int i;
 
 	if (len == 0)
 		return nfserr_inval;
 	if (isdotent(str, len))
-		return err;
+		return nfserr_badname;
 	for (i = 0; i < len; i++)
 		if (str[i] == '/')
-			return err;
+			return nfserr_badname;
 	return 0;
 }
 
@@ -570,7 +570,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 	READ32(create->cr_namelen);
 	READ_BUF(create->cr_namelen);
 	SAVEMEM(create->cr_name, create->cr_namelen);
-	if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+	if ((status = check_filename(create->cr_name, create->cr_namelen)))
 		return status;
 
 	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
@@ -602,7 +602,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 	READ32(link->li_namelen);
 	READ_BUF(link->li_namelen);
 	SAVEMEM(link->li_name, link->li_namelen);
-	if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval)))
+	if ((status = check_filename(link->li_name, link->li_namelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -696,7 +696,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
 	READ32(lookup->lo_len);
 	READ_BUF(lookup->lo_len);
 	SAVEMEM(lookup->lo_name, lookup->lo_len);
-	if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent)))
+	if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
 		return status;
 
 	DECODE_TAIL;
@@ -860,7 +860,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 		READ32(open->op_fname.len);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
-		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_PREVIOUS:
@@ -875,7 +875,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 		READ32(open->op_fname.len);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
-		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_FH:
@@ -987,7 +987,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove
 	READ32(remove->rm_namelen);
 	READ_BUF(remove->rm_namelen);
 	SAVEMEM(remove->rm_name, remove->rm_namelen);
-	if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent)))
+	if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -1005,9 +1005,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
 	READ32(rename->rn_tnamelen);
 	READ_BUF(rename->rn_tnamelen);
 	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
-	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent)))
+	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
 		return status;
-	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval)))
+	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -1034,8 +1034,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
 	READ32(secinfo->si_namelen);
 	READ_BUF(secinfo->si_namelen);
 	SAVEMEM(secinfo->si_name, secinfo->si_namelen);
-	status = check_filename(secinfo->si_name, secinfo->si_namelen,
-								nfserr_noent);
+	status = check_filename(secinfo->si_name, secinfo->si_namelen);
 	if (status)
 		return status;
 	DECODE_TAIL;

From dba88ba55a06ff8bef467f2ca3f7904aeab8762a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 16 Nov 2012 11:45:12 -0500
Subject: [PATCH 055/111] nfsd4: remove state lock from
 nfsd4_load_reboot_recovery_data

That function is only called under nfsd_mutex: we know that because the
only caller is nfsd_svc, via

        nfsd_svc
          nfsd_startup
            nfs4_state_start
              nfsd4_client_tracking_init
                client_tracking_ops->init == nfsd4_load_reboot_recovery_data

The shared state accessed here includes:

        - user_recovery_dirname: used here, modified only by
          nfs4_reset_recoverydir, which can be verified to only be
          called under nfsd_mutex.
        - filesystem state, protected by i_mutex (handwaving slightly
	  here)
        - rec_file, reclaim_str_hashtbl, reclaim_str_hashtbl_size: other
          than here, used only from code called from nfsd or laundromat
          threads, both of which should be started only after this runs
          (see nfsd_svc) and stopped before this could run again (see
          nfsd_shutdown, called from nfsd_last_thread).

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b657b622bf5d..651d5134e74c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -509,11 +509,9 @@ nfsd4_load_reboot_recovery_data(struct net *net)
 {
 	int status;
 
-	nfs4_lock_state();
 	status = nfsd4_init_recdir();
 	if (!status)
 		status = nfsd4_recdir_load(net);
-	nfs4_unlock_state();
 	if (status)
 		printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
 	return status;

From ec28e02ca5f2a4287c19c585f8be2d9b3ba123ea Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 21 Nov 2012 18:07:38 +0300
Subject: [PATCH 056/111] nfsd4: remove state lock from nfs4_state_shutdown

Protection of __nfs4_state_shutdown() with nfs4_lock_state() looks redundant.

This function is called by the last NFSd thread on it's exit and state lock
protects actually two functions (del_recall_lru is protected by recall_lock):
1) nfsd4_client_tracking_exit
2) __nfs4_state_shutdown_net

"nfsd4_client_tracking_exit" doesn't require state lock protection, because it's
state can be modified only by tracker callbacks.
Here a re they:
1) create: is called only from nfsd4_proc_compound.
2) remove: is called from either nfsd4_proc_compound or nfs4_laundromat.
3) check: is called only from nfsd4_proc_compound.
4) grace_done; called only from nfs4_laundromat.

nfsd4_proc_compound is called onll by NFSd kthread, which is exiting right
now.
nfs4_laundromat is called by laundry_wq. But laundromat_work was canceled
already.

"__nfs4_state_shutdown_net" also doesn't require state lock protection,
because all NFSd kthreads are dead, and no race can happen with NFSd start,
because "nfsd_up" flag is still set.
Moreover, all Nfsd shutdown is protected with global nfsd_mutex.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 41d2aed8ed06..ffec73cdfaca 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4930,9 +4930,7 @@ nfs4_state_shutdown(void)
 	cancel_delayed_work_sync(&nn->laundromat_work);
 	destroy_workqueue(laundry_wq);
 	locks_end_grace(&nn->nfsd4_manager);
-	nfs4_lock_state();
 	__nfs4_state_shutdown(net);
-	nfs4_unlock_state();
 	nfsd4_destroy_callback_queue();
 }
 

From c9a4962881929df7f1ef6e63e1b9da304faca4dd Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 15:21:58 +0300
Subject: [PATCH 057/111] nfsd: make client_lock per net

This lock protects the client lru list and session hash table, which are
allocated per network namespace already.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  3 ++
 fs/nfsd/nfs4state.c | 73 ++++++++++++++++++++++++++-------------------
 2 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 227b93ebb622..08d5fa1ce82a 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -81,6 +81,9 @@ struct nfsd_net {
 	struct list_head close_lru;
 
 	struct delayed_work laundromat_work;
+
+	/* client_lock protects the client lru list and session hash table */
+	spinlock_t client_lock;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ffec73cdfaca..0e7e174de209 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -388,9 +388,6 @@ unhash_delegation(struct nfs4_delegation *dp)
  * SETCLIENTID state 
  */
 
-/* client_lock protects the client lru list and session hash table */
-static DEFINE_SPINLOCK(client_lock);
-
 static unsigned int clientid_hashval(u32 id)
 {
 	return id & CLIENT_HASH_MASK;
@@ -872,18 +869,23 @@ static void __free_session(struct nfsd4_session *ses)
 static void free_session(struct kref *kref)
 {
 	struct nfsd4_session *ses;
+	struct nfsd_net *nn;
 
-	lockdep_assert_held(&client_lock);
 	ses = container_of(kref, struct nfsd4_session, se_ref);
+	nn = net_generic(ses->se_client->net, nfsd_net_id);
+
+	lockdep_assert_held(&nn->client_lock);
 	nfsd4_del_conns(ses);
 	__free_session(ses);
 }
 
 void nfsd4_put_session(struct nfsd4_session *ses)
 {
-	spin_lock(&client_lock);
+	struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
 	nfsd4_put_session_locked(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
@@ -927,12 +929,12 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 	new->se_cb_sec = cses->cb_sec;
 	kref_init(&new->se_ref);
 	idx = hash_sessionid(&new->se_sessionid);
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
 	spin_lock(&clp->cl_lock);
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 
 	if (cses->flags & SESSION4_BACK_CHAN) {
 		struct sockaddr *sa = svc_addr(rqstp);
@@ -1005,9 +1007,11 @@ renew_client_locked(struct nfs4_client *clp)
 static inline void
 renew_client(struct nfs4_client *clp)
 {
-	spin_lock(&client_lock);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
 	renew_client_locked(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -1045,7 +1049,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static inline void
 free_client(struct nfs4_client *clp)
 {
-	lockdep_assert_held(&client_lock);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	lockdep_assert_held(&nn->client_lock);
 	while (!list_empty(&clp->cl_sessions)) {
 		struct nfsd4_session *ses;
 		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -1062,15 +1068,16 @@ void
 release_session_client(struct nfsd4_session *session)
 {
 	struct nfs4_client *clp = session->se_client;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
+	if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
 		return;
 	if (is_client_expired(clp)) {
 		free_client(clp);
 		session->se_client = NULL;
 	} else
 		renew_client_locked(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 /* must be called under the client_lock */
@@ -1119,11 +1126,11 @@ destroy_client(struct nfs4_client *clp)
 		rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
 	else
 		rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	unhash_client_locked(clp);
 	if (atomic_read(&clp->cl_refcount) == 0)
 		free_client(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 static void expire_client(struct nfs4_client *clp)
@@ -1274,6 +1281,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	struct sockaddr *sa = svc_addr(rqstp);
 	int ret;
 	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	clp = alloc_client(name);
 	if (clp == NULL)
@@ -1282,9 +1290,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
 	if (ret) {
-		spin_lock(&client_lock);
+		spin_lock(&nn->client_lock);
 		free_client(clp);
-		spin_unlock(&client_lock);
+		spin_unlock(&nn->client_lock);
 		return NULL;
 	}
 	idr_init(&clp->cl_stateids);
@@ -1873,11 +1881,12 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
 {
 	struct nfsd4_session *session = cstate->session;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	session->se_cb_prog = bc->bc_cb_program;
 	session->se_cb_sec = bc->bc_cb_sec;
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 
 	nfsd4_probe_callback(session->se_client);
 
@@ -1890,10 +1899,11 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 {
 	__be32 status;
 	struct nfsd4_conn *conn;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (!nfsd4_last_compound_op(rqstp))
 		return nfserr_not_only_op;
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
 	/* Sorta weird: we only need the refcnt'ing because new_conn acquires
 	 * client_lock iself: */
@@ -1901,7 +1911,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		nfsd4_get_session(cstate->session);
 		atomic_inc(&cstate->session->se_client->cl_refcount);
 	}
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	if (!cstate->session)
 		return nfserr_badsession;
 
@@ -1929,6 +1939,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 {
 	struct nfsd4_session *ses;
 	__be32 status = nfserr_badsession;
+	struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id);
 
 	/* Notes:
 	 * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
@@ -1942,24 +1953,24 @@ nfsd4_destroy_session(struct svc_rqst *r,
 			return nfserr_not_only_op;
 	}
 	dump_sessionid(__func__, &sessionid->sessionid);
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r));
 	if (!ses) {
-		spin_unlock(&client_lock);
+		spin_unlock(&nn->client_lock);
 		goto out;
 	}
 
 	unhash_session(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 
 	nfs4_lock_state();
 	nfsd4_probe_callback_sync(ses->se_client);
 	nfs4_unlock_state();
 
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	nfsd4_del_conns(ses);
 	nfsd4_put_session_locked(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	status = nfs_ok;
 out:
 	dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -2025,6 +2036,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	struct nfsd4_slot *slot;
 	struct nfsd4_conn *conn;
 	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (resp->opcnt != 1)
 		return nfserr_sequence_pos;
@@ -2037,7 +2049,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (!conn)
 		return nfserr_jukebox;
 
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	status = nfserr_badsession;
 	session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
 	if (!session)
@@ -2113,7 +2125,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		}
 	}
 	kfree(conn);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	dprintk("%s: return %d\n", __func__, ntohl(status));
 	return status;
 }
@@ -3191,7 +3203,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 	dprintk("NFSD: laundromat service - starting\n");
 	nfsd4_end_grace(nn);
 	INIT_LIST_HEAD(&reaplist);
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -3208,7 +3220,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		unhash_client_locked(clp);
 		list_add(&clp->cl_lru, &reaplist);
 	}
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
@@ -4796,6 +4808,7 @@ static int nfs4_state_start_net(struct net *net)
 	nn->unconf_name_tree = RB_ROOT;
 	INIT_LIST_HEAD(&nn->client_lru);
 	INIT_LIST_HEAD(&nn->close_lru);
+	spin_lock_init(&nn->client_lock);
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
 

From 4e37a7c2075baa2a15a2ab90fcc44173888016ed Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 15:22:03 +0300
Subject: [PATCH 058/111] nfsd: make delegations shutdown network namespace
 aware

NFSv4 delegations are stored in global list. But they are nfs4_client
dependent, which is network namespace aware already.
State shutdown and laundromat are done per network namespace as well.
So, delegations unhash have to be done in network namespace context.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0e7e174de209..bc2fc9f076fc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3230,6 +3230,8 @@ nfs4_laundromat(struct nfsd_net *nn)
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
+			continue;
 		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
 			u = dp->dl_time - cutoff;
 			if (test_val > u)
@@ -4922,6 +4924,8 @@ __nfs4_state_shutdown(struct net *net)
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (dp->dl_stid.sc_client->net != net)
+			continue;
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&recall_lock);

From 4dce0ac9069bbebfd34f890f599ccdb92fa76e9f Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 15:22:08 +0300
Subject: [PATCH 059/111] nfsd: cleanup NFSd state shutdown a bit

This patch renames __nfs4_state_shutdown_net() into nfs4_state_shutdown_net(),
__nfs4_state_shutdown() into nfs4_state_shutdown_net() and moves all network
related shutdown operations to nfs4_state_shutdown_net().

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bc2fc9f076fc..84a27a24b806 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4829,7 +4829,7 @@ static int nfs4_state_start_net(struct net *net)
 }
 
 static void
-__nfs4_state_shutdown_net(struct net *net)
+nfs4_state_destroy_net(struct net *net)
 {
 	int i;
 	struct nfs4_client *clp = NULL;
@@ -4857,6 +4857,7 @@ __nfs4_state_shutdown_net(struct net *net)
 	kfree(nn->ownerstr_hashtbl);
 	kfree(nn->unconf_id_hashtbl);
 	kfree(nn->conf_id_hashtbl);
+	put_net(net);
 }
 
 /* initialization to perform when the nfsd service is started: */
@@ -4906,19 +4907,20 @@ nfs4_state_start(void)
 	destroy_workqueue(laundry_wq);
 out_recovery:
 	nfsd4_client_tracking_exit(net);
-	__nfs4_state_shutdown_net(net);
-	put_net(net);
+	nfs4_state_destroy_net(net);
 	return ret;
 }
 
 /* should be called with the state lock held */
 static void
-__nfs4_state_shutdown(struct net *net)
+nfs4_state_shutdown_net(struct net *net)
 {
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	__nfs4_state_shutdown_net(net);
+	cancel_delayed_work_sync(&nn->laundromat_work);
+	locks_end_grace(&nn->nfsd4_manager);
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
@@ -4935,19 +4937,16 @@ __nfs4_state_shutdown(struct net *net)
 	}
 
 	nfsd4_client_tracking_exit(net);
-	put_net(net);
+	nfs4_state_destroy_net(net);
 }
 
 void
 nfs4_state_shutdown(void)
 {
 	struct net *net = &init_net;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	cancel_delayed_work_sync(&nn->laundromat_work);
+	nfs4_state_shutdown_net(net);
 	destroy_workqueue(laundry_wq);
-	locks_end_grace(&nn->nfsd4_manager);
-	__nfs4_state_shutdown(net);
 	nfsd4_destroy_callback_queue();
 }
 

From d85ed443052570b25ea4b5f5fa70c57e0129fbc4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 15:22:13 +0300
Subject: [PATCH 060/111] nfsd: cleanup NFSd state start a bit

This patch renames nfs4_state_start_net() into nfs4_state_create_net(), where
get_net() now performed.
Also it introduces new nfs4_state_start_net(), which is now responsible for
state creation and initializing all per-net data and which is now called from
nfs4_state_start().

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 59 +++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 84a27a24b806..6f5798623eb1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4770,7 +4770,7 @@ set_max_delegations(void)
 	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
 }
 
-static int nfs4_state_start_net(struct net *net)
+static int nfs4_state_create_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int i;
@@ -4813,6 +4813,7 @@ static int nfs4_state_start_net(struct net *net)
 	spin_lock_init(&nn->client_lock);
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+	get_net(net);
 
 	return 0;
 
@@ -4860,37 +4861,35 @@ nfs4_state_destroy_net(struct net *net)
 	put_net(net);
 }
 
-/* initialization to perform when the nfsd service is started: */
-
-int
-nfs4_state_start(void)
+static int
+nfs4_state_start_net(struct net *net)
 {
-	struct net *net = &init_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
-	/*
-	 * FIXME: For now, we hang most of the pernet global stuff off of
-	 * init_net until nfsd is fully containerized. Eventually, we'll
-	 * need to pass a net pointer into this function, take a reference
-	 * to that instead and then do most of the rest of this on a per-net
-	 * basis.
-	 */
-	get_net(net);
-	ret = nfs4_state_start_net(net);
+	ret = nfs4_state_create_net(net);
 	if (ret)
 		return ret;
 	nfsd4_client_tracking_init(net);
 	nn->boot_time = get_seconds();
 	locks_start_grace(net, &nn->nfsd4_manager);
 	nn->grace_ended = false;
-	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
-	       nfsd4_grace);
+	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
+	       nfsd4_grace, net);
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, nfsd4_grace * HZ);
+	return 0;
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+int
+nfs4_state_start(void)
+{
+	int ret;
+
 	ret = set_callback_cred();
-	if (ret) {
-		ret = -ENOMEM;
-		goto out_recovery;
-	}
+	if (ret)
+		return -ENOMEM;
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	if (laundry_wq == NULL) {
 		ret = -ENOMEM;
@@ -4900,14 +4899,26 @@ nfs4_state_start(void)
 	if (ret)
 		goto out_free_laundry;
 
-	queue_delayed_work(laundry_wq, &nn->laundromat_work, nfsd4_grace * HZ);
 	set_max_delegations();
+
+	/*
+	 * FIXME: For now, we hang most of the pernet global stuff off of
+	 * init_net until nfsd is fully containerized. Eventually, we'll
+	 * need to pass a net pointer into this function, take a reference
+	 * to that instead and then do most of the rest of this on a per-net
+	 * basis.
+	 */
+	ret = nfs4_state_start_net(&init_net);
+	if (ret)
+		goto out_free_callback;
+
 	return 0;
+
+out_free_callback:
+	nfsd4_destroy_callback_queue();
 out_free_laundry:
 	destroy_workqueue(laundry_wq);
 out_recovery:
-	nfsd4_client_tracking_exit(net);
-	nfs4_state_destroy_net(net);
 	return ret;
 }
 

From f252bc6806a9428f2e3a429e4cdffbd012de9839 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 15:22:18 +0300
Subject: [PATCH 061/111] nfsd: call state init and shutdown twice

Split NFSv4 state init and shutdown into two different calls: per-net one and
generic one.
Per-net cwinit/shutdown pair have to be called for any namespace, generic pair
- only once on NSFd kthreads start and shutdown respectively.

Refresh of diff-nfsd-call-state-init-twice

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 30 ++++++++++++------------------
 fs/nfsd/nfsd.h      |  4 ++++
 fs/nfsd/nfssvc.c    | 15 +++++++++++++--
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6f5798623eb1..fb98f291aac2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4861,12 +4861,22 @@ nfs4_state_destroy_net(struct net *net)
 	put_net(net);
 }
 
-static int
+int
 nfs4_state_start_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
+	/*
+	 * FIXME: For now, we hang most of the pernet global stuff off of
+	 * init_net until nfsd is fully containerized. Eventually, we'll
+	 * need to pass a net pointer into this function, take a reference
+	 * to that instead and then do most of the rest of this on a per-net
+	 * basis.
+	 */
+	if (net != &init_net)
+		return -EINVAL;
+
 	ret = nfs4_state_create_net(net);
 	if (ret)
 		return ret;
@@ -4901,21 +4911,8 @@ nfs4_state_start(void)
 
 	set_max_delegations();
 
-	/*
-	 * FIXME: For now, we hang most of the pernet global stuff off of
-	 * init_net until nfsd is fully containerized. Eventually, we'll
-	 * need to pass a net pointer into this function, take a reference
-	 * to that instead and then do most of the rest of this on a per-net
-	 * basis.
-	 */
-	ret = nfs4_state_start_net(&init_net);
-	if (ret)
-		goto out_free_callback;
-
 	return 0;
 
-out_free_callback:
-	nfsd4_destroy_callback_queue();
 out_free_laundry:
 	destroy_workqueue(laundry_wq);
 out_recovery:
@@ -4923,7 +4920,7 @@ nfs4_state_start(void)
 }
 
 /* should be called with the state lock held */
-static void
+void
 nfs4_state_shutdown_net(struct net *net)
 {
 	struct nfs4_delegation *dp = NULL;
@@ -4954,9 +4951,6 @@ nfs4_state_shutdown_net(struct net *net)
 void
 nfs4_state_shutdown(void)
 {
-	struct net *net = &init_net;
-
-	nfs4_state_shutdown_net(net);
 	destroy_workqueue(laundry_wq);
 	nfsd4_destroy_callback_queue();
 }
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 80d5ce40aadb..d7b210b735e1 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -121,7 +121,9 @@ void nfs4_state_init(void);
 int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
+int nfs4_state_start_net(struct net *net);
 void nfs4_state_shutdown(void);
+void nfs4_state_shutdown_net(struct net *net);
 void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 char * nfs4_recoverydir(void);
@@ -130,7 +132,9 @@ static inline void nfs4_state_init(void) { }
 static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }
+static inline int nfs4_state_start_net(struct net *net) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
+static inline void nfs4_state_shutdown_net(struct net *net) { }
 static inline void nfs4_reset_lease(time_t leasetime) { }
 static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
 static inline char * nfs4_recoverydir(void) {return NULL; }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 30d3784d0280..b34a67d8ec44 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -207,6 +207,7 @@ static bool nfsd_up = false;
 static int nfsd_startup(int nrservs)
 {
 	int ret;
+	struct net *net = &init_net;
 
 	if (nfsd_up)
 		return 0;
@@ -221,14 +222,21 @@ static int nfsd_startup(int nrservs)
 	ret = nfsd_init_socks();
 	if (ret)
 		goto out_racache;
-	ret = lockd_up(&init_net);
+	ret = lockd_up(net);
 	if (ret)
 		goto out_racache;
 	ret = nfs4_state_start();
 	if (ret)
 		goto out_lockd;
+
+	ret = nfs4_state_start_net(net);
+	if (ret)
+		goto out_net_state;
+
 	nfsd_up = true;
 	return 0;
+out_net_state:
+	nfs4_state_shutdown();
 out_lockd:
 	lockd_down(&init_net);
 out_racache:
@@ -238,6 +246,8 @@ static int nfsd_startup(int nrservs)
 
 static void nfsd_shutdown(void)
 {
+	struct net *net = &init_net;
+
 	/*
 	 * write_ports can create the server without actually starting
 	 * any threads--if we get shut down before any threads are
@@ -246,8 +256,9 @@ static void nfsd_shutdown(void)
 	 */
 	if (!nfsd_up)
 		return;
+	nfs4_state_shutdown_net(net);
 	nfs4_state_shutdown();
-	lockd_down(&init_net);
+	lockd_down(net);
 	nfsd_racache_shutdown();
 	nfsd_up = false;
 }

From 3a0733692f6665a28c50ebadb6d9db2b183bcb91 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 16:16:25 +0300
Subject: [PATCH 062/111] nfsd: recovery - make rec_file per net

Opening and closing of this file is done in client tracking init and exit
operations.
Client tracking is done in network namespace context already. So let's make
this file opened and closed per network context - this will simlify it's
management.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h       |  2 ++
 fs/nfsd/nfs4recover.c | 70 +++++++++++++++++++++----------------------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 08d5fa1ce82a..130563210c68 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -84,6 +84,8 @@ struct nfsd_net {
 
 	/* client_lock protects the client lru list and session hash table */
 	spinlock_t client_lock;
+
+	struct file *rec_file;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 651d5134e74c..3e76d281bba8 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -62,7 +62,6 @@ struct nfsd4_client_tracking_ops {
 };
 
 /* Globals */
-static struct file *rec_file;
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 static struct nfsd4_client_tracking_ops *client_tracking_ops;
 static bool in_grace;
@@ -182,7 +181,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
 	if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
-	if (!rec_file)
+	if (!nn->rec_file)
 		return;
 
 	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
@@ -193,11 +192,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	if (status < 0)
 		return;
 
-	status = mnt_want_write_file(rec_file);
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		return;
 
-	dir = rec_file->f_path.dentry;
+	dir = nn->rec_file->f_path.dentry;
 	/* lock the parent */
 	mutex_lock(&dir->d_inode->i_mutex);
 
@@ -227,14 +226,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 			if (crp)
 				crp->cr_clp = clp;
 		}
-		vfs_fsync(rec_file, 0);
+		vfs_fsync(nn->rec_file, 0);
 	} else {
 		printk(KERN_ERR "NFSD: failed to write recovery record"
 				" (err %d); please check that %s exists"
 				" and is writeable", status,
 				user_recovery_dirname);
 	}
-	mnt_drop_write_file(rec_file);
+	mnt_drop_write_file(nn->rec_file);
 	nfs4_reset_creds(original_cred);
 }
 
@@ -267,7 +266,7 @@ static int
 nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 {
 	const struct cred *original_cred;
-	struct dentry *dir = rec_file->f_path.dentry;
+	struct dentry *dir = nn->rec_file->f_path.dentry;
 	LIST_HEAD(names);
 	int status;
 
@@ -275,13 +274,13 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 	if (status < 0)
 		return status;
 
-	status = vfs_llseek(rec_file, 0, SEEK_SET);
+	status = vfs_llseek(nn->rec_file, 0, SEEK_SET);
 	if (status < 0) {
 		nfs4_reset_creds(original_cred);
 		return status;
 	}
 
-	status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
+	status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names);
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	while (!list_empty(&names)) {
 		struct name_list *entry;
@@ -305,14 +304,14 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 }
 
 static int
-nfsd4_unlink_clid_dir(char *name, int namlen)
+nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 {
 	struct dentry *dir, *dentry;
 	int status;
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	dir = rec_file->f_path.dentry;
+	dir = nn->rec_file->f_path.dentry;
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(name, dir, namlen);
 	if (IS_ERR(dentry)) {
@@ -339,14 +338,14 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	int status;
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+	if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
 	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
 	if (status)
 		return legacy_recdir_name_error(status);
 
-	status = mnt_want_write_file(rec_file);
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		goto out;
 	clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
@@ -355,10 +354,10 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	if (status < 0)
 		goto out_drop_write;
 
-	status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1);
+	status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
 	nfs4_reset_creds(original_cred);
 	if (status == 0) {
-		vfs_fsync(rec_file, 0);
+		vfs_fsync(nn->rec_file, 0);
 		if (in_grace) {
 			/* remove reclaim record */
 			crp = nfsd4_find_reclaim_client(dname, nn);
@@ -367,7 +366,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 		}
 	}
 out_drop_write:
-	mnt_drop_write_file(rec_file);
+	mnt_drop_write_file(nn->rec_file);
 out:
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
@@ -396,20 +395,20 @@ nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time)
 	int status;
 
 	in_grace = false;
-	if (!rec_file)
+	if (!nn->rec_file)
 		return;
-	status = mnt_want_write_file(rec_file);
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		goto out;
 	status = nfsd4_list_rec_dir(purge_old, nn);
 	if (status == 0)
-		vfs_fsync(rec_file, 0);
-	mnt_drop_write_file(rec_file);
+		vfs_fsync(nn->rec_file, 0);
+	mnt_drop_write_file(nn->rec_file);
 out:
 	nfs4_release_reclaim(nn);
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
-			" directory %s\n", rec_file->f_path.dentry->d_name.name);
+			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
 }
 
 static int
@@ -430,13 +429,13 @@ nfsd4_recdir_load(struct net *net) {
 	int status;
 	struct nfsd_net *nn =  net_generic(net, nfsd_net_id);
 
-	if (!rec_file)
+	if (!nn->rec_file)
 		return 0;
 
 	status = nfsd4_list_rec_dir(load_recdir, nn);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
-			" directory %s\n", rec_file->f_path.dentry->d_name.name);
+			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
 	return status;
 }
 
@@ -445,15 +444,16 @@ nfsd4_recdir_load(struct net *net) {
  */
 
 static int
-nfsd4_init_recdir(void)
+nfsd4_init_recdir(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	const struct cred *original_cred;
 	int status;
 
 	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
 			user_recovery_dirname);
 
-	BUG_ON(rec_file);
+	BUG_ON(nn->rec_file);
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0) {
@@ -463,12 +463,12 @@ nfsd4_init_recdir(void)
 		return status;
 	}
 
-	rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
-	if (IS_ERR(rec_file)) {
+	nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
+	if (IS_ERR(nn->rec_file)) {
 		printk("NFSD: unable to find recovery directory %s\n",
 				user_recovery_dirname);
-		status = PTR_ERR(rec_file);
-		rec_file = NULL;
+		status = PTR_ERR(nn->rec_file);
+		nn->rec_file = NULL;
 	}
 
 	nfs4_reset_creds(original_cred);
@@ -509,7 +509,7 @@ nfsd4_load_reboot_recovery_data(struct net *net)
 {
 	int status;
 
-	status = nfsd4_init_recdir();
+	status = nfsd4_init_recdir(net);
 	if (!status)
 		status = nfsd4_recdir_load(net);
 	if (status)
@@ -544,12 +544,12 @@ nfsd4_legacy_tracking_init(struct net *net)
 }
 
 static void
-nfsd4_shutdown_recdir(void)
+nfsd4_shutdown_recdir(struct nfsd_net *nn)
 {
-	if (!rec_file)
+	if (!nn->rec_file)
 		return;
-	fput(rec_file);
-	rec_file = NULL;
+	fput(nn->rec_file);
+	nn->rec_file = NULL;
 }
 
 static void
@@ -558,7 +558,7 @@ nfsd4_legacy_tracking_exit(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	nfs4_release_reclaim(nn);
-	nfsd4_shutdown_recdir();
+	nfsd4_shutdown_recdir(nn);
 	nfs4_legacy_state_shutdown(net);
 }
 

From f141f79d709de447c8c92ba54821740ae53a5d07 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 26 Nov 2012 16:16:30 +0300
Subject: [PATCH 063/111] nfsd: recovery - make in_grace per net

Flag in_grace is a part of client tracking state, which is network namesapce
aware. So let'a replace global static variable with per-net one.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h       | 1 +
 fs/nfsd/nfs4recover.c | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 130563210c68..9047706b3e10 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -86,6 +86,7 @@ struct nfsd_net {
 	spinlock_t client_lock;
 
 	struct file *rec_file;
+	bool in_grace;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3e76d281bba8..359793f89493 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -64,7 +64,6 @@ struct nfsd4_client_tracking_ops {
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 static struct nfsd4_client_tracking_ops *client_tracking_ops;
-static bool in_grace;
 
 static int
 nfs4_save_creds(const struct cred **original_creds)
@@ -221,7 +220,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
-		if (in_grace) {
+		if (nn->in_grace) {
 			crp = nfs4_client_to_reclaim(dname, nn);
 			if (crp)
 				crp->cr_clp = clp;
@@ -358,7 +357,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	nfs4_reset_creds(original_cred);
 	if (status == 0) {
 		vfs_fsync(nn->rec_file, 0);
-		if (in_grace) {
+		if (nn->in_grace) {
 			/* remove reclaim record */
 			crp = nfsd4_find_reclaim_client(dname, nn);
 			if (crp)
@@ -394,7 +393,7 @@ nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time)
 {
 	int status;
 
-	in_grace = false;
+	nn->in_grace = false;
 	if (!nn->rec_file)
 		return;
 	status = mnt_want_write_file(nn->rec_file);
@@ -473,7 +472,7 @@ nfsd4_init_recdir(struct net *net)
 
 	nfs4_reset_creds(original_cred);
 	if (!status)
-		in_grace = true;
+		nn->in_grace = true;
 	return status;
 }
 

From 864aee5c6f90533984c356494e6b0a8070e5d5f2 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 27 Nov 2012 14:42:20 +0300
Subject: [PATCH 064/111] nfsd: remove redundant declarations

This is a cleanup patch. Functions nfsd_pool_stats_open() and
nfsd_pool_stats_release() are declared in fs/nfsd/nfsd.h.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index dab350dfc376..f5ab74af6ce2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -186,9 +186,6 @@ static struct file_operations supported_enctypes_ops = {
 };
 #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 
-extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
-extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
-
 static const struct file_operations pool_stats_operations = {
 	.open		= nfsd_pool_stats_open,
 	.read		= seq_read,

From 3d7337115d06f21970e23684f4d2e62e3a44c572 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 27 Nov 2012 14:11:44 +0300
Subject: [PATCH 065/111] nfsd: make NFSv4 lease time per net

Lease time is a part of NFSv4 state engine, which is constructed per network
namespace.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h        |  2 ++
 fs/nfsd/nfs4callback.c |  8 +++++---
 fs/nfsd/nfs4state.c    | 11 +++++------
 fs/nfsd/nfs4xdr.c      |  4 +++-
 fs/nfsd/nfsctl.c       |  5 ++++-
 fs/nfsd/nfsd.h         |  1 -
 6 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9047706b3e10..0c20be82cb01 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -87,6 +87,8 @@ struct nfsd_net {
 
 	struct file *rec_file;
 	bool in_grace;
+
+	time_t nfsd4_lease;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 826cc269c445..99bc85ff0217 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include "nfsd.h"
 #include "state.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -625,9 +626,10 @@ static const struct rpc_program cb_program = {
 	.pipe_dir_name		= "nfsd4_cb",
 };
 
-static int max_cb_time(void)
+static int max_cb_time(struct net *net)
 {
-	return max(nfsd4_lease/10, (time_t)1) * HZ;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
 }
 
 static struct rpc_cred *callback_cred;
@@ -659,7 +661,7 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
 	struct rpc_timeout	timeparms = {
-		.to_initval	= max_cb_time(),
+		.to_initval	= max_cb_time(clp->net),
 		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb98f291aac2..932b2ca6f203 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -51,7 +51,6 @@
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
-time_t nfsd4_lease = 90;     /* default lease time */
 time_t nfsd4_grace = 90;
 
 #define all_ones {{~0,~0},~0}
@@ -3184,7 +3183,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
 	 * to see the (possibly new, possibly shorter) lease time, we
 	 * can safely set the next grace time to the current lease time:
 	 */
-	nfsd4_grace = nfsd4_lease;
+	nfsd4_grace = nn->nfsd4_lease;
 }
 
 static time_t
@@ -3194,9 +3193,9 @@ nfs4_laundromat(struct nfsd_net *nn)
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct list_head *pos, *next, reaplist;
-	time_t cutoff = get_seconds() - nfsd4_lease;
-	time_t t, clientid_val = nfsd4_lease;
-	time_t u, test_val = nfsd4_lease;
+	time_t cutoff = get_seconds() - nn->nfsd4_lease;
+	time_t t, clientid_val = nn->nfsd4_lease;
+	time_t u, test_val = nn->nfsd4_lease;
 
 	nfs4_lock_state();
 
@@ -3245,7 +3244,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		unhash_delegation(dp);
 	}
-	test_val = nfsd4_lease;
+	test_val = nn->nfsd4_lease;
 	list_for_each_safe(pos, next, &nn->close_lru) {
 		oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
 		if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 250171c5c311..b775366a0a68 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -53,6 +53,7 @@
 #include "vfs.h"
 #include "state.h"
 #include "cache.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
@@ -2052,6 +2053,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 		.mnt	= exp->ex_path.mnt,
 		.dentry	= dentry,
 	};
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
 	BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -2212,7 +2214,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32(nfsd4_lease);
+		WRITE32(nn->nfsd4_lease);
 	}
 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
 		if ((buflen -= 4) < 0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f5ab74af6ce2..09d909a42ece 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -909,7 +909,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_
  */
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
-	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease);
 }
 
 /**
@@ -1060,6 +1061,7 @@ int nfsd_net_id;
 static __net_init int nfsd_init_net(struct net *net)
 {
 	int retval;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	retval = nfsd_export_init(net);
 	if (retval)
@@ -1067,6 +1069,7 @@ static __net_init int nfsd_init_net(struct net *net)
 	retval = nfsd_idmap_init(net);
 	if (retval)
 		goto out_idmap_error;
+	nn->nfsd4_lease = 90;	/* default lease time */
 	return 0;
 
 out_idmap_error:
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d7b210b735e1..a8f7325a9124 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -276,7 +276,6 @@ extern struct timeval	nfssvc_boot;
 
 #ifdef CONFIG_NFSD_V4
 
-extern time_t nfsd4_lease;
 extern time_t nfsd4_grace;
 
 /* before processing a COMPOUND operation, we have to check that there

From 5284b44e438580a50e8cc5189297a73a48a45ecb Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 27 Nov 2012 14:11:49 +0300
Subject: [PATCH 066/111] nfsd: make NFSv4 grace time per net

Grace time is a part of NFSv4 state engine, which is constructed per network
namespace.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     | 1 +
 fs/nfsd/nfs4state.c | 9 +++------
 fs/nfsd/nfsctl.c    | 4 +++-
 fs/nfsd/nfsd.h      | 2 --
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 0c20be82cb01..2c4b2e2896dd 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -89,6 +89,7 @@ struct nfsd_net {
 	bool in_grace;
 
 	time_t nfsd4_lease;
+	time_t nfsd4_grace;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 932b2ca6f203..3db7617e6d39 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -50,9 +50,6 @@
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
-/* Globals */
-time_t nfsd4_grace = 90;
-
 #define all_ones {{~0,~0},~0}
 static const stateid_t one_stateid = {
 	.si_generation = ~0,
@@ -3183,7 +3180,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
 	 * to see the (possibly new, possibly shorter) lease time, we
 	 * can safely set the next grace time to the current lease time:
 	 */
-	nfsd4_grace = nn->nfsd4_lease;
+	nn->nfsd4_grace = nn->nfsd4_lease;
 }
 
 static time_t
@@ -4884,8 +4881,8 @@ nfs4_state_start_net(struct net *net)
 	locks_start_grace(net, &nn->nfsd4_manager);
 	nn->grace_ended = false;
 	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
-	       nfsd4_grace, net);
-	queue_delayed_work(laundry_wq, &nn->laundromat_work, nfsd4_grace * HZ);
+	       nn->nfsd4_grace, net);
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
 	return 0;
 }
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 09d909a42ece..d902f83681e7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -925,7 +925,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
  */
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
 {
-	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace);
 }
 
 static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
@@ -1070,6 +1071,7 @@ static __net_init int nfsd_init_net(struct net *net)
 	if (retval)
 		goto out_idmap_error;
 	nn->nfsd4_lease = 90;	/* default lease time */
+	nn->nfsd4_grace = 90;
 	return 0;
 
 out_idmap_error:
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index a8f7325a9124..5eea0f5021fd 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -276,8 +276,6 @@ extern struct timeval	nfssvc_boot;
 
 #ifdef CONFIG_NFSD_V4
 
-extern time_t nfsd4_grace;
-
 /* before processing a COMPOUND operation, we have to check that there
  * is enough space in the buffer for XDR encode to succeed.  otherwise,
  * we might process an operation with side effects, and be unable to

From f3c7521fe53a7892d8c8c4715f7c0f4add7b2e19 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 27 Nov 2012 09:35:10 -0500
Subject: [PATCH 067/111] NFSD: Fold fault_inject.h into state.h

There were only a small number of functions in this file and since they
all affect stored state I think it makes sense to put them in state.h
instead.  I also dropped most static inline declarations since there are
no callers when fault injection is not enabled.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c |  1 -
 fs/nfsd/fault_inject.h | 28 ----------------------------
 fs/nfsd/nfs4state.c    |  1 -
 fs/nfsd/nfsctl.c       |  2 +-
 fs/nfsd/state.h        | 15 +++++++++++++++
 5 files changed, 16 insertions(+), 31 deletions(-)
 delete mode 100644 fs/nfsd/fault_inject.h

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index e6c38159622f..02781121c6b0 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 
 #include "state.h"
-#include "fault_inject.h"
 
 struct nfsd_fault_inject_op {
 	char *file;
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
deleted file mode 100644
index 90bd0570956c..000000000000
--- a/fs/nfsd/fault_inject.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
- *
- * Function definitions for fault injection
- */
-
-#ifndef LINUX_NFSD_FAULT_INJECT_H
-#define LINUX_NFSD_FAULT_INJECT_H
-
-#ifdef CONFIG_NFSD_FAULT_INJECTION
-int nfsd_fault_inject_init(void);
-void nfsd_fault_inject_cleanup(void);
-void nfsd_forget_clients(u64);
-void nfsd_forget_locks(u64);
-void nfsd_forget_openowners(u64);
-void nfsd_forget_delegations(u64);
-void nfsd_recall_delegations(u64);
-#else /* CONFIG_NFSD_FAULT_INJECTION */
-static inline int nfsd_fault_inject_init(void) { return 0; }
-static inline void nfsd_fault_inject_cleanup(void) {}
-static inline void nfsd_forget_clients(u64 num) {}
-static inline void nfsd_forget_locks(u64 num) {}
-static inline void nfsd_forget_openowners(u64 num) {}
-static inline void nfsd_forget_delegations(u64 num) {}
-static inline void nfsd_recall_delegations(u64 num) {}
-#endif /* CONFIG_NFSD_FAULT_INJECTION */
-
-#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3db7617e6d39..b1aa577dd869 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -44,7 +44,6 @@
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
-#include "fault_inject.h"
 
 #include "netns.h"
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d902f83681e7..e13cbddcdbd0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -19,7 +19,7 @@
 #include "idmap.h"
 #include "nfsd.h"
 #include "cache.h"
-#include "fault_inject.h"
+#include "state.h"
 #include "netns.h"
 
 /*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2deb6a88e58e..b542bf2c0fe7 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -496,4 +496,19 @@ extern void nfsd4_client_record_create(struct nfs4_client *clp);
 extern void nfsd4_client_record_remove(struct nfs4_client *clp);
 extern int nfsd4_client_record_check(struct nfs4_client *clp);
 extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
+
+/* nfs fault injection functions */
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
 #endif   /* NFSD4_STATE_H */

From e5f9570319771bb0a5afc792b34fbd5564b935c8 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 30 Nov 2012 17:24:18 -0500
Subject: [PATCH 068/111] nfsd4: discard some unused nfsd4_verify xdr code

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b775366a0a68..3bf8a9d7f217 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1106,31 +1106,14 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
 static __be32
 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
 {
-#if 0
-	struct nfsd4_compoundargs save = {
-		.p = argp->p,
-		.end = argp->end,
-		.rqstp = argp->rqstp,
-	};
-	u32             ve_bmval[2];
-	struct iattr    ve_iattr;           /* request */
-	struct nfs4_acl *ve_acl;            /* request */
-#endif
 	DECODE_HEAD;
 
 	if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
 		goto out;
 
 	/* For convenience's sake, we compare raw xdr'd attributes in
-	 * nfsd4_proc_verify; however we still decode here just to return
-	 * correct error in case of bad xdr. */
-#if 0
-	status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl);
-	if (status == nfserr_inval) {
-		status = nfserrno(status);
-		goto out;
-	}
-#endif
+	 * nfsd4_proc_verify */
+
 	READ_BUF(4);
 	READ32(verify->ve_attrlen);
 	READ_BUF(verify->ve_attrlen);

From 043958395a6b91863046b0cd7cae9c67fa845144 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:38 -0500
Subject: [PATCH 069/111] NFSD: Lock state before calling fault injection
 function

Each function touches state in some way, so getting the lock earlier
can help simplify code.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c |  2 ++
 fs/nfsd/nfs4state.c    | 18 ++----------------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 02781121c6b0..4b385a14cf96 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -51,7 +51,9 @@ static int nfsd_inject_set(void *op_ptr, u64 val)
 	else
 		printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
 
+	nfs4_lock_state();
 	op->func(val);
+	nfs4_unlock_state();
 	return 0;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b1aa577dd869..8e19c692649c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4611,13 +4611,11 @@ void nfsd_forget_clients(u64 num)
 	int count = 0;
 	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
 
-	nfs4_lock_state();
 	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
 		expire_client(clp);
 		if (++count == num)
 			break;
 	}
-	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d clients", count);
 }
@@ -4653,25 +4651,15 @@ static int nfsd_release_n_owners(u64 num, bool is_open_owner,
 
 void nfsd_forget_locks(u64 num)
 {
-	int count;
 	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-
-	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, false, release_lockowner_sop, nn);
-	nfs4_unlock_state();
-
+	int count = nfsd_release_n_owners(num, false, release_lockowner_sop, nn);
 	printk(KERN_INFO "NFSD: Forgot %d locks", count);
 }
 
 void nfsd_forget_openowners(u64 num)
 {
-	int count;
 	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-
-	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, true, release_openowner_sop, nn);
-	nfs4_unlock_state();
-
+	int count = nfsd_release_n_owners(num, true, release_openowner_sop, nn);
 	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
 }
 
@@ -4704,10 +4692,8 @@ void nfsd_forget_delegations(u64 num)
 	count = nfsd_process_n_delegations(num, &victims);
 	spin_unlock(&recall_lock);
 
-	nfs4_lock_state();
 	list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru)
 		unhash_delegation(dp);
-	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d delegations", count);
 }

From 44e34da60b24ca14666534b61cc9579aa4e1eac5 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:39 -0500
Subject: [PATCH 070/111] NFSD: Clean up forgetting clients

I added in a generic for-each loop that takes a pass over the client_lru
list for the current net namespace and calls some function.  The next few
patches will update other operations to use this function as well.  A value
of 0 still means "forget everything that is found".

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  3 +++
 fs/nfsd/nfs4state.c | 25 ++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 2c4b2e2896dd..964b5542f027 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -92,5 +92,8 @@ struct nfsd_net {
 	time_t nfsd4_grace;
 };
 
+/* Simple check to find out if a given net was properly initialized */
+#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+
 extern int nfsd_net_id;
 #endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8e19c692649c..2478c8996bda 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4605,19 +4605,34 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 
 #ifdef CONFIG_NFSD_FAULT_INJECTION
 
-void nfsd_forget_clients(u64 num)
+u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
+{
+	expire_client(clp);
+	return 1;
+}
+
+u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 {
 	struct nfs4_client *clp, *next;
-	int count = 0;
+	u64 count = 0;
 	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
 
+	if (!nfsd_netns_ready(nn))
+		return 0;
+
 	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
-		expire_client(clp);
-		if (++count == num)
+		count += func(clp, max - count);
+		if ((max != 0) && (count >= max))
 			break;
 	}
 
-	printk(KERN_INFO "NFSD: Forgot %d clients", count);
+	return count;
+}
+
+void nfsd_forget_clients(u64 num)
+{
+	u64 count = nfsd_for_n_state(num, nfsd_forget_client);
+	printk(KERN_INFO "NFSD: Forgot %llu clients", count);
 }
 
 static void release_lockowner_sop(struct nfs4_stateowner *sop)

From fc29171f5b3257694bf508cf4ae51970c97af78c Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:40 -0500
Subject: [PATCH 071/111] NFSD: Clean up forgetting locks

I use the new "forget_n_state()" function to iterate through each client
first when searching for locks.  This may slow down forgetting locks a
little bit, but it implements most of the code needed to forget a
specified client's locks.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2478c8996bda..46bece40b1ce 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4611,6 +4611,32 @@ u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
 	return 1;
 }
 
+static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *))
+{
+	struct nfs4_openowner *oop;
+	struct nfs4_lockowner *lop, *lo_next;
+	struct nfs4_ol_stateid *stp, *st_next;
+	u64 count = 0;
+
+	list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
+		list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) {
+			list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) {
+				if (func)
+					func(lop);
+				if (++count == max)
+					return count;
+			}
+		}
+	}
+
+	return count;
+}
+
+u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max)
+{
+	return nfsd_foreach_client_lock(clp, max, release_lockowner);
+}
+
 u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 {
 	struct nfs4_client *clp, *next;
@@ -4635,11 +4661,6 @@ void nfsd_forget_clients(u64 num)
 	printk(KERN_INFO "NFSD: Forgot %llu clients", count);
 }
 
-static void release_lockowner_sop(struct nfs4_stateowner *sop)
-{
-	release_lockowner(lockowner(sop));
-}
-
 static void release_openowner_sop(struct nfs4_stateowner *sop)
 {
 	release_openowner(openowner(sop));
@@ -4666,9 +4687,8 @@ static int nfsd_release_n_owners(u64 num, bool is_open_owner,
 
 void nfsd_forget_locks(u64 num)
 {
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-	int count = nfsd_release_n_owners(num, false, release_lockowner_sop, nn);
-	printk(KERN_INFO "NFSD: Forgot %d locks", count);
+	u64 count = nfsd_for_n_state(num, nfsd_forget_client_locks);
+	printk(KERN_INFO "NFSD: Forgot %llu locks", count);
 }
 
 void nfsd_forget_openowners(u64 num)

From 4dbdbda84f963312e0b5dfdf2dfbf64de047dd44 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:41 -0500
Subject: [PATCH 072/111] NFSD: Clean up forgetting openowners

Using "forget_n_state()" forces me to implement the code needed to
forget a specific client's openowners.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 49 ++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 46bece40b1ce..00d4398e2324 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4637,6 +4637,26 @@ u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max)
 	return nfsd_foreach_client_lock(clp, max, release_lockowner);
 }
 
+static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *))
+{
+	struct nfs4_openowner *oop, *next;
+	u64 count = 0;
+
+	list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
+		if (func)
+			func(oop);
+		if (++count == max)
+			break;
+	}
+
+	return count;
+}
+
+u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max)
+{
+	return nfsd_foreach_client_open(clp, max, release_openowner);
+}
+
 u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 {
 	struct nfs4_client *clp, *next;
@@ -4661,30 +4681,6 @@ void nfsd_forget_clients(u64 num)
 	printk(KERN_INFO "NFSD: Forgot %llu clients", count);
 }
 
-static void release_openowner_sop(struct nfs4_stateowner *sop)
-{
-	release_openowner(openowner(sop));
-}
-
-static int nfsd_release_n_owners(u64 num, bool is_open_owner,
-				void (*release_sop)(struct nfs4_stateowner *),
-				struct nfsd_net *nn)
-{
-	int i, count = 0;
-	struct nfs4_stateowner *sop, *next;
-
-	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		list_for_each_entry_safe(sop, next, &nn->ownerstr_hashtbl[i], so_strhash) {
-			if (sop->so_is_open_owner != is_open_owner)
-				continue;
-			release_sop(sop);
-			if (++count == num)
-				return count;
-		}
-	}
-	return count;
-}
-
 void nfsd_forget_locks(u64 num)
 {
 	u64 count = nfsd_for_n_state(num, nfsd_forget_client_locks);
@@ -4693,9 +4689,8 @@ void nfsd_forget_locks(u64 num)
 
 void nfsd_forget_openowners(u64 num)
 {
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-	int count = nfsd_release_n_owners(num, true, release_openowner_sop, nn);
-	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+	u64 count = nfsd_for_n_state(num, nfsd_forget_client_openowners);
+	printk(KERN_INFO "NFSD: Forgot %llu open owners", count);
 }
 
 static int nfsd_process_n_delegations(u64 num, struct list_head *list)

From 269de30f10604710dde8d544748b5b6c748b7de8 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:42 -0500
Subject: [PATCH 073/111] NFSD: Clean up forgetting and recalling delegations

Once I have a client, I can easily use its delegation list rather than
searching the file hash table for delegations to remove.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 44 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 00d4398e2324..dc7c22f14fef 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4657,6 +4657,52 @@ u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max)
 	return nfsd_foreach_client_open(clp, max, release_openowner);
 }
 
+static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
+				     struct list_head *victims)
+{
+	struct nfs4_delegation *dp, *next;
+	u64 count = 0;
+
+	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
+		if (victims)
+			list_move(&dp->dl_recall_lru, victims);
+		if (++count == max)
+			break;
+	}
+	return count;
+}
+
+u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	struct nfs4_delegation *dp, *next;
+	LIST_HEAD(victims);
+	u64 count;
+
+	spin_lock(&recall_lock);
+	count = nfsd_find_all_delegations(clp, max, &victims);
+	spin_unlock(&recall_lock);
+
+	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
+		unhash_delegation(dp);
+
+	return count;
+}
+
+u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	struct nfs4_delegation *dp, *next;
+	LIST_HEAD(victims);
+	u64 count;
+
+	spin_lock(&recall_lock);
+	count = nfsd_find_all_delegations(clp, max, &victims);
+	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
+		nfsd_break_one_deleg(dp);
+	spin_unlock(&recall_lock);
+
+	return count;
+}
+
 u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 {
 	struct nfs4_client *clp, *next;
@@ -4693,56 +4739,16 @@ void nfsd_forget_openowners(u64 num)
 	printk(KERN_INFO "NFSD: Forgot %llu open owners", count);
 }
 
-static int nfsd_process_n_delegations(u64 num, struct list_head *list)
-{
-	int i, count = 0;
-	struct nfs4_file *fp, *fnext;
-	struct nfs4_delegation *dp, *dnext;
-
-	for (i = 0; i < FILE_HASH_SIZE; i++) {
-		list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
-			list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
-				list_move(&dp->dl_recall_lru, list);
-				if (++count == num)
-					return count;
-			}
-		}
-	}
-
-	return count;
-}
-
 void nfsd_forget_delegations(u64 num)
 {
-	unsigned int count;
-	LIST_HEAD(victims);
-	struct nfs4_delegation *dp, *dnext;
-
-	spin_lock(&recall_lock);
-	count = nfsd_process_n_delegations(num, &victims);
-	spin_unlock(&recall_lock);
-
-	list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru)
-		unhash_delegation(dp);
-
-	printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+	u64 count = nfsd_for_n_state(num, nfsd_forget_client_delegations);
+	printk(KERN_INFO "NFSD: Forgot %llu delegations", count);
 }
 
 void nfsd_recall_delegations(u64 num)
 {
-	unsigned int count;
-	LIST_HEAD(victims);
-	struct nfs4_delegation *dp, *dnext;
-
-	spin_lock(&recall_lock);
-	count = nfsd_process_n_delegations(num, &victims);
-	list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) {
-		list_del(&dp->dl_recall_lru);
-		nfsd_break_one_deleg(dp);
-	}
-	spin_unlock(&recall_lock);
-
-	printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+	u64 count = nfsd_for_n_state(num, nfsd_recall_client_delegations);
+	printk(KERN_INFO "NFSD: Recalled %llu delegations", count);
 }
 
 #endif /* CONFIG_NFSD_FAULT_INJECTION */

From 8ce54e0d82730ece61737c9fd7b61b28ab8c3390 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:43 -0500
Subject: [PATCH 074/111] NFSD: Fault injection operations take a per-client
 forget function

The eventual goal is to forget state based on ip address, so it makes
sense to call this function in a for-each-client loop until the correct
amount of state is forgotten.  I also use this patch as an opportunity
to rename the forget function from "func()" to "forget()".

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 16 +++++++++-------
 fs/nfsd/nfs4state.c    | 30 ------------------------------
 fs/nfsd/state.h        | 12 +++++++-----
 3 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 4b385a14cf96..bf6161adf663 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -13,29 +13,29 @@
 
 struct nfsd_fault_inject_op {
 	char *file;
-	void (*func)(u64);
+	u64 (*forget)(struct nfs4_client *, u64);
 };
 
 static struct nfsd_fault_inject_op inject_ops[] = {
 	{
 		.file   = "forget_clients",
-		.func   = nfsd_forget_clients,
+		.forget = nfsd_forget_client,
 	},
 	{
 		.file   = "forget_locks",
-		.func   = nfsd_forget_locks,
+		.forget = nfsd_forget_client_locks,
 	},
 	{
 		.file   = "forget_openowners",
-		.func   = nfsd_forget_openowners,
+		.forget = nfsd_forget_client_openowners,
 	},
 	{
 		.file   = "forget_delegations",
-		.func   = nfsd_forget_delegations,
+		.forget = nfsd_forget_client_delegations,
 	},
 	{
 		.file   = "recall_delegations",
-		.func   = nfsd_recall_delegations,
+		.forget = nfsd_recall_client_delegations,
 	},
 };
 
@@ -44,6 +44,7 @@ static struct dentry *debug_dir;
 
 static int nfsd_inject_set(void *op_ptr, u64 val)
 {
+	u64 count = 0;
 	struct nfsd_fault_inject_op *op = op_ptr;
 
 	if (val == 0)
@@ -52,8 +53,9 @@ static int nfsd_inject_set(void *op_ptr, u64 val)
 		printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
 
 	nfs4_lock_state();
-	op->func(val);
+	count = nfsd_for_n_state(val, op->forget);
 	nfs4_unlock_state();
+	printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
 	return 0;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dc7c22f14fef..ab45cdd7b3da 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4721,36 +4721,6 @@ u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 	return count;
 }
 
-void nfsd_forget_clients(u64 num)
-{
-	u64 count = nfsd_for_n_state(num, nfsd_forget_client);
-	printk(KERN_INFO "NFSD: Forgot %llu clients", count);
-}
-
-void nfsd_forget_locks(u64 num)
-{
-	u64 count = nfsd_for_n_state(num, nfsd_forget_client_locks);
-	printk(KERN_INFO "NFSD: Forgot %llu locks", count);
-}
-
-void nfsd_forget_openowners(u64 num)
-{
-	u64 count = nfsd_for_n_state(num, nfsd_forget_client_openowners);
-	printk(KERN_INFO "NFSD: Forgot %llu open owners", count);
-}
-
-void nfsd_forget_delegations(u64 num)
-{
-	u64 count = nfsd_for_n_state(num, nfsd_forget_client_delegations);
-	printk(KERN_INFO "NFSD: Forgot %llu delegations", count);
-}
-
-void nfsd_recall_delegations(u64 num)
-{
-	u64 count = nfsd_for_n_state(num, nfsd_recall_client_delegations);
-	printk(KERN_INFO "NFSD: Recalled %llu delegations", count);
-}
-
 #endif /* CONFIG_NFSD_FAULT_INJECTION */
 
 /* initialization to perform at module load time: */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b542bf2c0fe7..423ac64ceb74 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -501,11 +501,13 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
 #ifdef CONFIG_NFSD_FAULT_INJECTION
 int nfsd_fault_inject_init(void);
 void nfsd_fault_inject_cleanup(void);
-void nfsd_forget_clients(u64);
-void nfsd_forget_locks(u64);
-void nfsd_forget_openowners(u64);
-void nfsd_forget_delegations(u64);
-void nfsd_recall_delegations(u64);
+u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64));
+
+u64 nfsd_forget_client(struct nfs4_client *, u64);
+u64 nfsd_forget_client_locks(struct nfs4_client*, u64);
+u64 nfsd_forget_client_openowners(struct nfs4_client *, u64);
+u64 nfsd_forget_client_delegations(struct nfs4_client *, u64);
+u64 nfsd_recall_client_delegations(struct nfs4_client *, u64);
 #else /* CONFIG_NFSD_FAULT_INJECTION */
 static inline int nfsd_fault_inject_init(void) { return 0; }
 static inline void nfsd_fault_inject_cleanup(void) {}

From 184c18471f7d0963ad5752692c4b441a546d88f1 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:44 -0500
Subject: [PATCH 075/111] NFSD: Reading a fault injection file prints a state
 count

I also log basic information that I can figure out about the type of
state (such as number of locks for each client IP address).  This can be
useful for checking that state was actually dropped and later for
checking if the client was able to recover.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 13 +++++++++++--
 fs/nfsd/nfs4state.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h        |  5 +++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index bf6161adf663..545f8e4ed101 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -14,28 +14,34 @@
 struct nfsd_fault_inject_op {
 	char *file;
 	u64 (*forget)(struct nfs4_client *, u64);
+	u64 (*print)(struct nfs4_client *, u64);
 };
 
 static struct nfsd_fault_inject_op inject_ops[] = {
 	{
 		.file   = "forget_clients",
 		.forget = nfsd_forget_client,
+		.print  = nfsd_print_client,
 	},
 	{
 		.file   = "forget_locks",
 		.forget = nfsd_forget_client_locks,
+		.print  = nfsd_print_client_locks,
 	},
 	{
 		.file   = "forget_openowners",
 		.forget = nfsd_forget_client_openowners,
+		.print  = nfsd_print_client_openowners,
 	},
 	{
 		.file   = "forget_delegations",
 		.forget = nfsd_forget_client_delegations,
+		.print  = nfsd_print_client_delegations,
 	},
 	{
 		.file   = "recall_delegations",
 		.forget = nfsd_recall_client_delegations,
+		.print  = nfsd_print_client_delegations,
 	},
 };
 
@@ -59,9 +65,12 @@ static int nfsd_inject_set(void *op_ptr, u64 val)
 	return 0;
 }
 
-static int nfsd_inject_get(void *data, u64 *val)
+static int nfsd_inject_get(void *op_ptr, u64 *val)
 {
-	*val = 0;
+	struct nfsd_fault_inject_op *op = op_ptr;
+	nfs4_lock_state();
+	*val = nfsd_for_n_state(0, op->print);
+	nfs4_unlock_state();
 	return 0;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ab45cdd7b3da..9fb8e52580f3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4611,6 +4611,22 @@ u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
 	return 1;
 }
 
+u64 nfsd_print_client(struct nfs4_client *clp, u64 num)
+{
+	char buf[INET6_ADDRSTRLEN];
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+	printk(KERN_INFO "NFS Client: %s\n", buf);
+	return 1;
+}
+
+static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
+			     const char *type)
+{
+	char buf[INET6_ADDRSTRLEN];
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+	printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
+}
+
 static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *))
 {
 	struct nfs4_openowner *oop;
@@ -4637,6 +4653,13 @@ u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max)
 	return nfsd_foreach_client_lock(clp, max, release_lockowner);
 }
 
+u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max)
+{
+	u64 count = nfsd_foreach_client_lock(clp, max, NULL);
+	nfsd_print_count(clp, count, "locked files");
+	return count;
+}
+
 static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *))
 {
 	struct nfs4_openowner *oop, *next;
@@ -4657,6 +4680,13 @@ u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max)
 	return nfsd_foreach_client_open(clp, max, release_openowner);
 }
 
+u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max)
+{
+	u64 count = nfsd_foreach_client_open(clp, max, NULL);
+	nfsd_print_count(clp, count, "open files");
+	return count;
+}
+
 static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
 				     struct list_head *victims)
 {
@@ -4703,6 +4733,18 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
 	return count;
 }
 
+u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	u64 count = 0;
+
+	spin_lock(&recall_lock);
+	count = nfsd_find_all_delegations(clp, max, NULL);
+	spin_unlock(&recall_lock);
+
+	nfsd_print_count(clp, count, "delegations");
+	return count;
+}
+
 u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 {
 	struct nfs4_client *clp, *next;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 423ac64ceb74..4017f3553a63 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -508,6 +508,11 @@ u64 nfsd_forget_client_locks(struct nfs4_client*, u64);
 u64 nfsd_forget_client_openowners(struct nfs4_client *, u64);
 u64 nfsd_forget_client_delegations(struct nfs4_client *, u64);
 u64 nfsd_recall_client_delegations(struct nfs4_client *, u64);
+
+u64 nfsd_print_client(struct nfs4_client *, u64);
+u64 nfsd_print_client_locks(struct nfs4_client *, u64);
+u64 nfsd_print_client_openowners(struct nfs4_client *, u64);
+u64 nfsd_print_client_delegations(struct nfs4_client *, u64);
 #else /* CONFIG_NFSD_FAULT_INJECTION */
 static inline int nfsd_fault_inject_init(void) { return 0; }
 static inline void nfsd_fault_inject_cleanup(void) {}

From d7cc431edd0a6c69a88b5ff1e304af50bfb2270e Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:45 -0500
Subject: [PATCH 076/111] NFSD: Add a custom file operations structure for
 fault injection

Controlling the read and write functions allows me to add in "forget
client w.x.y.z", since we won't be limited to reading and writing only
u64 values.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 56 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 545f8e4ed101..19f9094bbb07 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
+#include <asm/uaccess.h>
 
 #include "state.h"
 
@@ -48,10 +49,9 @@ static struct nfsd_fault_inject_op inject_ops[] = {
 static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
 static struct dentry *debug_dir;
 
-static int nfsd_inject_set(void *op_ptr, u64 val)
+static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
 {
 	u64 count = 0;
-	struct nfsd_fault_inject_op *op = op_ptr;
 
 	if (val == 0)
 		printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
@@ -62,19 +62,61 @@ static int nfsd_inject_set(void *op_ptr, u64 val)
 	count = nfsd_for_n_state(val, op->forget);
 	nfs4_unlock_state();
 	printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
-	return 0;
 }
 
-static int nfsd_inject_get(void *op_ptr, u64 *val)
+static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
 {
-	struct nfsd_fault_inject_op *op = op_ptr;
 	nfs4_lock_state();
 	*val = nfsd_for_n_state(0, op->print);
 	nfs4_unlock_state();
-	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+static ssize_t fault_inject_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	static u64 val;
+	char read_buf[25];
+	size_t size, ret;
+	loff_t pos = *ppos;
+
+	if (!pos)
+		nfsd_inject_get(file->f_dentry->d_inode->i_private, &val);
+	size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= size || !len)
+		return 0;
+	if (len > size - pos)
+		len = size - pos;
+	ret = copy_to_user(buf, read_buf + pos, len);
+	if (ret == len)
+		return -EFAULT;
+	len -= ret;
+	*ppos = pos + len;
+	return len;
+}
+
+static ssize_t fault_inject_write(struct file *file, const char __user *buf,
+				  size_t len, loff_t *ppos)
+{
+	char write_buf[24];
+	size_t size = min(sizeof(write_buf), len) - 1;
+	u64 val;
+
+	if (copy_from_user(write_buf, buf, size))
+		return -EFAULT;
+
+	val = simple_strtoll(write_buf, NULL, 0);
+	nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+	return len; /* on success, claim we got the whole input */
+}
+
+static const struct file_operations fops_nfsd = {
+	.owner   = THIS_MODULE,
+	.read    = fault_inject_read,
+	.write   = fault_inject_write,
+};
 
 void nfsd_fault_inject_cleanup(void)
 {

From 6c1e82a4b74ad0c8b45c833a4409f153199d9be4 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 29 Nov 2012 11:40:46 -0500
Subject: [PATCH 077/111] NFSD: Forget state for a specific client

Write the client's ip address to any state file and all appropriate
state for that client will be forgotten.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 35 ++++++++++++++++++++++++++++++++---
 fs/nfsd/nfs4state.c    | 15 +++++++++++++++
 fs/nfsd/state.h        |  1 +
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 19f9094bbb07..96ffdf55dcec 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -8,9 +8,12 @@
 #include <linux/fs.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
+#include <linux/sunrpc/clnt.h>
 #include <asm/uaccess.h>
 
 #include "state.h"
+#include "netns.h"
 
 struct nfsd_fault_inject_op {
 	char *file;
@@ -64,6 +67,24 @@ static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
 	printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
 }
 
+static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
+				   struct sockaddr_storage *addr,
+				   size_t addr_size)
+{
+	char buf[INET6_ADDRSTRLEN];
+	struct nfs4_client *clp;
+	u64 count;
+
+	nfs4_lock_state();
+	clp = nfsd_find_client(addr, addr_size);
+	if (clp) {
+		count = op->forget(clp, 0);
+		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+		printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
+	}
+	nfs4_unlock_state();
+}
+
 static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
 {
 	nfs4_lock_state();
@@ -100,15 +121,23 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
 static ssize_t fault_inject_write(struct file *file, const char __user *buf,
 				  size_t len, loff_t *ppos)
 {
-	char write_buf[24];
+	char write_buf[INET6_ADDRSTRLEN];
 	size_t size = min(sizeof(write_buf), len) - 1;
+	struct net *net = current->nsproxy->net_ns;
+	struct sockaddr_storage sa;
 	u64 val;
 
 	if (copy_from_user(write_buf, buf, size))
 		return -EFAULT;
+	write_buf[size] = '\0';
 
-	val = simple_strtoll(write_buf, NULL, 0);
-	nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+	size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
+	if (size > 0)
+		nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size);
+	else {
+		val = simple_strtoll(write_buf, NULL, 0);
+		nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+	}
 	return len; /* on success, claim we got the whole input */
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9fb8e52580f3..eff734033437 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4763,6 +4763,21 @@ u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
 	return count;
 }
 
+struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
+{
+	struct nfs4_client *clp;
+	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
+
+	if (!nfsd_netns_ready(nn))
+		return NULL;
+
+	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+		if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
+			return clp;
+	}
+	return NULL;
+}
+
 #endif /* CONFIG_NFSD_FAULT_INJECTION */
 
 /* initialization to perform at module load time: */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4017f3553a63..d1c229feed52 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -502,6 +502,7 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
 int nfsd_fault_inject_init(void);
 void nfsd_fault_inject_cleanup(void);
 u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64));
+struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t);
 
 u64 nfsd_forget_client(struct nfs4_client *, u64);
 u64 nfsd_forget_client_locks(struct nfs4_client*, u64);

From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:11:13 -0500
Subject: [PATCH 078/111] svcrpc: don't byte-swap sk_reclen in place

Byte-swapping in place is always a little dubious.

Let's instead define this field to always be big-endian, and do the
swapping on demand where we need it.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 12 +++++++++++-
 net/sunrpc/svcsock.c           | 26 +++++++++++---------------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92ad02f0dcc0..613cf42227aa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,11 +26,21 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	u32			sk_reclen;	/* length of record */
+	__be32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
+static inline u32 svc_sock_reclen(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+}
+
+static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+}
+
 /*
  * Function prototypes.
  */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03827cef1fa7..d50de2b95036 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -950,8 +950,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			return -EAGAIN;
 		}
 
-		svsk->sk_reclen = ntohl(svsk->sk_reclen);
-		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
+		if (!(svc_sock_final_rec(svsk))) {
 			/* FIXME: technically, a record can be fragmented,
 			 *  and non-terminal fragments will not have the top
 			 *  bit set in the fragment length header.
@@ -961,21 +960,18 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			goto err_delete;
 		}
 
-		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
-		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
-		if (svsk->sk_reclen > serv->sv_max_mesg) {
+		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+		if (svc_sock_reclen(svsk) > serv->sv_max_mesg) {
 			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					       (unsigned long)svsk->sk_reclen);
+					(unsigned long)svc_sock_reclen(svsk));
 			goto err_delete;
 		}
 	}
 
-	if (svsk->sk_reclen < 8)
+	if (svc_sock_reclen(svsk) < 8)
 		goto err_delete; /* client is nuts. */
 
-	len = svsk->sk_reclen;
-
-	return len;
+	return svc_sock_reclen(svsk);
 error:
 	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
@@ -1019,7 +1015,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	if (dst->iov_len < src->iov_len)
 		return -EAGAIN; /* whatever; just giving up. */
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
-	xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
 	return 0;
 }
@@ -1064,12 +1060,12 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto error;
 
 	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = svsk->sk_reclen - base;
+	want = svc_sock_reclen(svsk) - base;
 
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svsk->sk_reclen);
+						svc_sock_reclen(svsk));
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
@@ -1082,11 +1078,11 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len < 0 && len != -EAGAIN)
 			goto err_other;
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen, svsk->sk_reclen);
+			svsk->sk_tcplen, svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
-	rqstp->rq_arg.len = svsk->sk_reclen;
+	rqstp->rq_arg.len = svc_sock_reclen(svsk);
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;

From ad46ccf09440975618e8fc1ead53d0a27b9bcf4c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:30:42 -0500
Subject: [PATCH 079/111] svcrpc: delay minimum-rpc-size check till later

Soon we want to support multiple fragments, in which case it may be
legal for a single fragment to be smaller than 8 bytes, so we'll want to
delay this check till we've reached the last fragment.

Also fix an outdated comment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d50de2b95036..15571790dc9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -921,10 +921,8 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 }
 
 /*
- * Receive data.
+ * Receive fragment record header.
  * If we haven't gotten the record length yet, get the next four bytes.
- * Otherwise try to gobble up as much as possible up to the complete
- * record length.
  */
 static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
@@ -968,9 +966,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		}
 	}
 
-	if (svc_sock_reclen(svsk) < 8)
-		goto err_delete; /* client is nuts. */
-
 	return svc_sock_reclen(svsk);
 error:
 	dprintk("RPC: TCP recv_record got %d\n", len);
@@ -1076,12 +1071,15 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len != want) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
-			goto err_other;
+			goto err_delete;
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			svsk->sk_tcplen, svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
+	if (svc_sock_reclen(svsk) < 8)
+		goto err_delete; /* client is nuts. */
+
 	rqstp->rq_arg.len = svc_sock_reclen(svsk);
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
@@ -1117,10 +1115,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 error:
 	if (len != -EAGAIN)
-		goto err_other;
+		goto err_delete;
 	dprintk("RPC: TCP recvfrom got EAGAIN\n");
 	return 0;
-err_other:
+err_delete:
 	printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 	       svsk->sk_xprt.xpt_server->sv_name, -len);
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);

From 6a72ae2e23922bc96e8f3de24a5203be6edc2539 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:35:35 -0500
Subject: [PATCH 080/111] svcrpc: fix off-by-4 error in "incomplete TCP record"
 dprintk

The full reclen doesn't include the fragment header, but sk_tcplen does.
Fix this to make it an apples-to-apples comparison.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 15571790dc9e..1db42b1ffe28 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1073,7 +1073,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len < 0 && len != -EAGAIN)
 			goto err_delete;
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen, svc_sock_reclen(svsk));
+			svsk->sk_tcplen - sizeof(rpc_fraghdr),
+			svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 

From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:45:35 -0500
Subject: [PATCH 081/111] svcrpc: track rpc data length separately from
 sk_tcplen

Keep a separate field, sk_datalen, that tracks only the data contained
in a fragment, not including the fragment header.

For now, this is always just max(0, sk_tcplen - 4), but after we allow
multiple fragments sk_datalen will accumulate the total rpc data size
while sk_tcplen only tracks progress receiving the current fragment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 11 +++++++++--
 net/sunrpc/svcsock.c           | 19 ++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 613cf42227aa..62fd1b756e99 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,8 +26,15 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	__be32			sk_reclen;	/* length of record */
-	u32			sk_tcplen;	/* current read length */
+	/* On-the-wire fragment header: */
+	__be32			sk_reclen;
+	/* As we receive a record, this includes the length received so
+	 * far (including the fragment header): */
+	u32			sk_tcplen;
+	/* Total length of the data (not including fragment headers)
+	 * received so far in the fragments making up this rpc: */
+	u32			sk_datalen;
+
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 1db42b1ffe28..2b09e2306bfa 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -874,9 +874,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return 0;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
@@ -893,9 +893,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
@@ -907,9 +907,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		goto out;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		BUG_ON(svsk->sk_pages[i] == NULL);
@@ -918,6 +918,7 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 	}
 out:
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 }
 
 /*
@@ -1066,8 +1067,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
-	if (len >= 0)
+	if (len >= 0) {
 		svsk->sk_tcplen += len;
+		svsk->sk_datalen += len;
+	}
 	if (len != want) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
@@ -1100,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 	/* If we have more data, signal svc_xprt_enqueue() to try again */
 	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1296,6 +1300,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		svsk->sk_datalen = 0;
 		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;

From 836fbadb96a8308e6283eee5c7b3bdae818b58ca Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 15:50:38 -0500
Subject: [PATCH 082/111] svcrpc: support multiple-fragment rpc's

Over TCP, RPC's are preceded by a single 4-byte field telling you how
long the rpc is (in bytes).  The spec also allows you to send an RPC in
multiple such records (the high bit of the length field is used to tell
you whether this is the final record).

We've survived for years without supporting this because in practice the
clients we care about don't use it.  But the userland rpc libraries do,
and every now and then an experimental client will run into this.  (Most
recently I noticed it while trying to write a pynfs check.)  And we're
really on the wrong side of the spec here--let's fix this.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 50 ++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b09e2306bfa..38ec968ca1c6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -949,20 +949,11 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			return -EAGAIN;
 		}
 
-		if (!(svc_sock_final_rec(svsk))) {
-			/* FIXME: technically, a record can be fragmented,
-			 *  and non-terminal fragments will not have the top
-			 *  bit set in the fragment length header.
-			 *  But apparently no known nfs clients send fragmented
-			 *  records. */
-			net_notice_ratelimited("RPC: multiple fragments per record not supported\n");
-			goto err_delete;
-		}
-
 		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
-		if (svc_sock_reclen(svsk) > serv->sv_max_mesg) {
+		if (svc_sock_reclen(svsk) + svsk->sk_datalen >
+							serv->sv_max_mesg) {
 			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					(unsigned long)svc_sock_reclen(svsk));
+					       (unsigned long)svsk->sk_reclen);
 			goto err_delete;
 		}
 	}
@@ -1030,6 +1021,17 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
 	return i;
 }
 
+static void svc_tcp_fragment_received(struct svc_sock *svsk)
+{
+	/* If we have more data, signal svc_xprt_enqueue() to try again */
+	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+	dprintk("svc: TCP %s record (%d bytes)\n",
+		svc_sock_final_rec(svsk) ? "final" : "nonfinal",
+		svc_sock_reclen(svsk));
+	svsk->sk_tcplen = 0;
+	svsk->sk_reclen = 0;
+}
 
 /*
  * Receive data from a TCP socket.
@@ -1056,12 +1058,12 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto error;
 
 	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = svc_sock_reclen(svsk) - base;
+	want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
 
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svc_sock_reclen(svsk));
+						svsk->sk_datalen + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
@@ -1071,20 +1073,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		svsk->sk_tcplen += len;
 		svsk->sk_datalen += len;
 	}
-	if (len != want) {
+	if (len != want || !svc_sock_final_rec(svsk)) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
 			goto err_delete;
-		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen - sizeof(rpc_fraghdr),
-			svc_sock_reclen(svsk));
+		if (len == want)
+			svc_tcp_fragment_received(svsk);
+		else
+			dprintk("svc: incomplete TCP record (%ld of %d)\n",
+				svsk->sk_tcplen - sizeof(rpc_fraghdr),
+				svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
 	if (svc_sock_reclen(svsk) < 8)
 		goto err_delete; /* client is nuts. */
 
-	rqstp->rq_arg.len = svc_sock_reclen(svsk);
+	rqstp->rq_arg.len = svsk->sk_datalen;
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
@@ -1101,12 +1106,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		len = receive_cb_reply(svsk, rqstp);
 
 	/* Reset TCP read info */
-	svsk->sk_reclen = 0;
-	svsk->sk_tcplen = 0;
 	svsk->sk_datalen = 0;
-	/* If we have more data, signal svc_xprt_enqueue() to try again */
-	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
-		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+	svc_tcp_fragment_received(svsk);
 
 	if (len < 0)
 		goto error;
@@ -1115,7 +1116,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
-	dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
 	return rqstp->rq_arg.len;
 
 error:

From 9b2ef62b1541f176ea1b1f6e13b16df14bb16e99 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 17:24:41 -0500
Subject: [PATCH 083/111] nfsd4: lockt, release_lockowner should renew clients

Fix nfsd4_lockt and release_lockowner to lookup the referenced client,
so that it can renew it, or correctly return "expired", as appropriate.

Also share some code while we're here.

Reported-by: Frank Filz <ffilzlnx@us.ibm.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eff734033437..16e954c1c911 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3132,6 +3132,18 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status)
 		free_generic_stateid(open->op_stp);
 }
 
+static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp)
+{
+	struct nfs4_client *found;
+
+	if (STALE_CLIENTID(clid, nn))
+		return nfserr_stale_clientid;
+	found = find_confirmed_client(clid, session, nn);
+	if (clp)
+		*clp = found;
+	return found ? nfs_ok : nfserr_expired;
+}
+
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    clientid_t *clid)
@@ -3143,16 +3155,9 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	nfs4_lock_state();
 	dprintk("process_renew(%08x/%08x): starting\n", 
 			clid->cl_boot, clid->cl_id);
-	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid, nn))
+	status = lookup_clientid(clid, cstate->minorversion, nn, &clp);
+	if (status)
 		goto out;
-	clp = find_confirmed_client(clid, cstate->minorversion, nn);
-	status = nfserr_expired;
-	if (clp == NULL) {
-		/* We assume the client took too long to RENEW. */
-		dprintk("nfsd4_renew: clientid not found!\n");
-		goto out;
-	}
 	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
 			&& clp->cl_cb_state != NFSD4_CB_UP)
@@ -4293,9 +4298,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	nfs4_lock_state();
 
-	status = nfserr_stale_clientid;
-	if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn))
-		goto out;
+	if (!nfsd4_has_session(cstate)) {
+		status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL);
+		if (status)
+			goto out;
+	}
 
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		goto out;
@@ -4466,14 +4473,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
 
-	/* XXX check for lease expiration */
-
-	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid, nn))
-		return status;
-
 	nfs4_lock_state();
 
+	status = lookup_clientid(clid, cstate->minorversion, nn, NULL);
+	if (status)
+		goto out;
+
 	status = nfserr_locks_held;
 	INIT_LIST_HEAD(&matches);
 

From 9a9c6478a8b6ce8b6da6b6d1e15f365b505895cd Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 4 Dec 2012 14:29:27 +0300
Subject: [PATCH 084/111] nfsd: make NFSv4 recovery client tracking options per
 net

Pointer to client tracking operations - client_tracking_ops - have to be
containerized, because different environment can support different trackers
(for example, legacy tracker currently is not suported in container).

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h       |  2 ++
 fs/nfsd/nfs4recover.c | 48 +++++++++++++++++++++++++------------------
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 964b5542f027..fac4123c918c 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -35,6 +35,7 @@
 #define SESSION_HASH_SIZE	512
 
 struct cld_net;
+struct nfsd4_client_tracking_ops;
 
 struct nfsd_net {
 	struct cld_net *cld_net;
@@ -87,6 +88,7 @@ struct nfsd_net {
 
 	struct file *rec_file;
 	bool in_grace;
+	struct nfsd4_client_tracking_ops *client_tracking_ops;
 
 	time_t nfsd4_lease;
 	time_t nfsd4_grace;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 359793f89493..ba6fdd4a0455 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -63,7 +63,6 @@ struct nfsd4_client_tracking_ops {
 
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
-static struct nfsd4_client_tracking_ops *client_tracking_ops;
 
 static int
 nfs4_save_creds(const struct cred **original_creds)
@@ -1262,17 +1261,18 @@ nfsd4_client_tracking_init(struct net *net)
 {
 	int status;
 	struct path path;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	/* just run the init if it the method is already decided */
-	if (client_tracking_ops)
+	if (nn->client_tracking_ops)
 		goto do_init;
 
 	/*
 	 * First, try a UMH upcall. It should succeed or fail quickly, so
 	 * there's little harm in trying that first.
 	 */
-	client_tracking_ops = &nfsd4_umh_tracking_ops;
-	status = client_tracking_ops->init(net);
+	nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
+	status = nn->client_tracking_ops->init(net);
 	if (!status)
 		return status;
 
@@ -1280,7 +1280,7 @@ nfsd4_client_tracking_init(struct net *net)
 	 * See if the recoverydir exists and is a directory. If it is,
 	 * then use the legacy ops.
 	 */
-	client_tracking_ops = &nfsd4_legacy_tracking_ops;
+	nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
 	status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
 	if (!status) {
 		status = S_ISDIR(path.dentry->d_inode->i_mode);
@@ -1290,16 +1290,16 @@ nfsd4_client_tracking_init(struct net *net)
 	}
 
 	/* Finally, try to use nfsdcld */
-	client_tracking_ops = &nfsd4_cld_tracking_ops;
+	nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
 	printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
 			"removed in 3.10. Please transition to using "
 			"nfsdcltrack.\n");
 do_init:
-	status = client_tracking_ops->init(net);
+	status = nn->client_tracking_ops->init(net);
 	if (status) {
 		printk(KERN_WARNING "NFSD: Unable to initialize client "
 				    "recovery tracking! (%d)\n", status);
-		client_tracking_ops = NULL;
+		nn->client_tracking_ops = NULL;
 	}
 	return status;
 }
@@ -1307,32 +1307,40 @@ nfsd4_client_tracking_init(struct net *net)
 void
 nfsd4_client_tracking_exit(struct net *net)
 {
-	if (client_tracking_ops) {
-		if (client_tracking_ops->exit)
-			client_tracking_ops->exit(net);
-		client_tracking_ops = NULL;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->client_tracking_ops) {
+		if (nn->client_tracking_ops->exit)
+			nn->client_tracking_ops->exit(net);
+		nn->client_tracking_ops = NULL;
 	}
 }
 
 void
 nfsd4_client_record_create(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->create(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->create(clp);
 }
 
 void
 nfsd4_client_record_remove(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->remove(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->remove(clp);
 }
 
 int
 nfsd4_client_record_check(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		return client_tracking_ops->check(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		return nn->client_tracking_ops->check(clp);
 
 	return -EOPNOTSUPP;
 }
@@ -1340,8 +1348,8 @@ nfsd4_client_record_check(struct nfs4_client *clp)
 void
 nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->grace_done(nn, boot_time);
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->grace_done(nn, boot_time);
 }
 
 static int

From 756933ee8a75970ca7d8a10e922f5d44720457a4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 4 Dec 2012 15:18:58 +0300
Subject: [PATCH 085/111] SUNRPC: remove redundant "linux/nsproxy.h" includes

This is a cleanup patch.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/rpcb_clnt.c | 1 -
 net/sunrpc/svc.c       | 1 -
 2 files changed, 2 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a70acae496e4..109a67a37a8c 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -23,7 +23,6 @@
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/nsproxy.h>
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index be301e1c1290..529400d59755 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
-#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>

From 7007c90fb9fef593b4aeaeee57e6a6754276c97c Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Fri, 7 Dec 2012 15:40:55 -0500
Subject: [PATCH 086/111] nfsd: avoid permission checks on EXCLUSIVE_CREATE
 replay

With NFSv4, if we create a file then open it we explicit avoid checking
the permissions on the file during the open because the fact that we
created it ensures we should be allow to open it (the create and the
open should appear to be a single operation).

However if the reply to an EXCLUSIVE create gets lots and the client
resends the create, the current code will perform the permission check -
because it doesn't realise that it did the open already..

This patch should fix this.

Note that I haven't actually seen this cause a problem.  I was just
looking at the code trying to figure out a different EXCLUSIVE open
related issue, and this looked wrong.

(Fix confirmed with pynfs 4.0 test OPEN4--bfields)

Cc: stable@kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
[bfields: use OWNER_OVERRIDE and update for 4.1]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c |  8 +++++---
 fs/nfsd/vfs.c      | 10 ++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 87d24e5f3ca4..1a0b1fdb5ad3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -195,6 +195,7 @@ static __be32
 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct svc_fh *resfh;
+	int accmode;
 	__be32 status;
 
 	resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
@@ -254,9 +255,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	/* set reply cache */
 	fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
 			&resfh->fh_handle);
-	if (!open->op_created)
-		status = do_open_permission(rqstp, resfh, open,
-					    NFSD_MAY_NOP);
+	accmode = NFSD_MAY_NOP;
+	if (open->op_created)
+		accmode |= NFSD_MAY_OWNER_OVERRIDE;
+	status = do_open_permission(rqstp, resfh, open, accmode);
 	set_change_info(&open->op_cinfo, current_fh);
 	fh_dup2(current_fh, resfh);
 out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b584205b25b4..0ef9b6b410a2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1471,13 +1471,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		case NFS3_CREATE_EXCLUSIVE:
 			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
 			    && dchild->d_inode->i_atime.tv_sec == v_atime
-			    && dchild->d_inode->i_size  == 0 )
+			    && dchild->d_inode->i_size  == 0 ) {
+				if (created)
+					*created = 1;
 				break;
+			}
 		case NFS4_CREATE_EXCLUSIVE4_1:
 			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
 			    && dchild->d_inode->i_atime.tv_sec == v_atime
-			    && dchild->d_inode->i_size  == 0 )
+			    && dchild->d_inode->i_size  == 0 ) {
+				if (created)
+					*created = 1;
 				goto set_attr;
+			}
 			 /* fallthru */
 		case NFS3_CREATE_GUARDED:
 			err = nfserr_exist;

From f7fb86c6e639360ad9c253cec534819ef928a674 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:04 +0300
Subject: [PATCH 087/111] nfsd: use "init_net" for portmapper

There could be a situation, when NFSd was started in one network namespace, but
stopped in another one.
This will trigger kernel panic, because RPCBIND client is stored on per-net
NFSd data, and will be NULL on NFSd shutdown.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b34a67d8ec44..9beace6a868c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/fs_struct.h>
 #include <linux/swap.h>
-#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
@@ -341,7 +340,7 @@ static int nfsd_get_default_max_blksize(void)
 int nfsd_create_serv(void)
 {
 	int error;
-	struct net *net = current->nsproxy->net_ns;
+	struct net *net = &init_net;
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {

From db6e182c17cb1a7069f7f8924721ce58ac05d9a3 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:09 +0300
Subject: [PATCH 088/111] nfsd: pass net to nfsd_init_socks()

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9beace6a868c..9fd8496d5b84 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -182,18 +182,18 @@ int nfsd_nrthreads(void)
 	return rv;
 }
 
-static int nfsd_init_socks(void)
+static int nfsd_init_socks(struct net *net)
 {
 	int error;
 	if (!list_empty(&nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nfsd_serv, "udp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
 
-	error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
@@ -218,7 +218,7 @@ static int nfsd_startup(int nrservs)
 	ret = nfsd_racache_init(2*nrservs);
 	if (ret)
 		return ret;
-	ret = nfsd_init_socks();
+	ret = nfsd_init_socks(net);
 	if (ret)
 		goto out_racache;
 	ret = lockd_up(net);

From db42d1a76a8dfcaba7a2dc9c591fa4e231db22b3 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:14 +0300
Subject: [PATCH 089/111] nfsd: pass net to nfsd_startup() and nfsd_shutdown()

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9fd8496d5b84..21cba3d7c865 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -203,10 +203,9 @@ static int nfsd_init_socks(struct net *net)
 
 static bool nfsd_up = false;
 
-static int nfsd_startup(int nrservs)
+static int nfsd_startup(int nrservs, struct net *net)
 {
 	int ret;
-	struct net *net = &init_net;
 
 	if (nfsd_up)
 		return 0;
@@ -237,16 +236,14 @@ static int nfsd_startup(int nrservs)
 out_net_state:
 	nfs4_state_shutdown();
 out_lockd:
-	lockd_down(&init_net);
+	lockd_down(net);
 out_racache:
 	nfsd_racache_shutdown();
 	return ret;
 }
 
-static void nfsd_shutdown(void)
+static void nfsd_shutdown(struct net *net)
 {
-	struct net *net = &init_net;
-
 	/*
 	 * write_ports can create the server without actually starting
 	 * any threads--if we get shut down before any threads are
@@ -264,7 +261,7 @@ static void nfsd_shutdown(void)
 
 static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 {
-	nfsd_shutdown();
+	nfsd_shutdown(net);
 
 	svc_rpcb_cleanup(serv, net);
 
@@ -468,7 +465,7 @@ nfsd_svc(int nrservs)
 
 	nfsd_up_before = nfsd_up;
 
-	error = nfsd_startup(nrservs);
+	error = nfsd_startup(nrservs, net);
 	if (error)
 		goto out_destroy;
 	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
@@ -481,7 +478,7 @@ nfsd_svc(int nrservs)
 	error = nfsd_serv->sv_nrthreads - 1;
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
-		nfsd_shutdown();
+		nfsd_shutdown(net);
 out_destroy:
 	nfsd_destroy(net);		/* Release server */
 out:

From 6777436b0f072fb20a025a73e9b67a35ad8a5451 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:20 +0300
Subject: [PATCH 090/111] nfsd: pass net to nfsd_create_serv()

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 4 ++--
 fs/nfsd/nfsd.h   | 2 +-
 fs/nfsd/nfssvc.c | 5 ++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e13cbddcdbd0..ae1d14313ef8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -664,7 +664,7 @@ static ssize_t __write_ports_addfd(char *buf)
 	if (err != 0 || fd < 0)
 		return -EINVAL;
 
-	err = nfsd_create_serv();
+	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
@@ -696,7 +696,7 @@ static ssize_t __write_ports_addxprt(char *buf)
 	if (port < 1 || port > USHRT_MAX)
 		return -EINVAL;
 
-	err = nfsd_create_serv();
+	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 5eea0f5021fd..acddf71abd51 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -103,7 +103,7 @@ enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
 int nfsd_vers(int vers, enum vers_op change);
 int nfsd_minorversion(u32 minorversion, enum vers_op change);
 void nfsd_reset_versions(void);
-int nfsd_create_serv(void);
+int nfsd_create_serv(struct net *net);
 
 extern int nfsd_max_blksize;
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 21cba3d7c865..6448391cde54 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -334,10 +334,9 @@ static int nfsd_get_default_max_blksize(void)
 	return ret;
 }
 
-int nfsd_create_serv(void)
+int nfsd_create_serv(struct net *net)
 {
 	int error;
-	struct net *net = &init_net;
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
@@ -459,7 +458,7 @@ nfsd_svc(int nrservs)
 	if (nrservs == 0 && nfsd_serv == NULL)
 		goto out;
 
-	error = nfsd_create_serv();
+	error = nfsd_create_serv(net);
 	if (error)
 		goto out;
 

From d41a9417cd89a69f58a26935034b4264a2d882d6 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:25 +0300
Subject: [PATCH 091/111] nfsd: pass net to nfsd_svc()

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 4 +++-
 fs/nfsd/nfsd.h   | 2 +-
 fs/nfsd/nfssvc.c | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ae1d14313ef8..68e229cdfd63 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -396,6 +396,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
 	int rv;
+	struct net *net = &init_net;
+
 	if (size > 0) {
 		int newthreads;
 		rv = get_int(&mesg, &newthreads);
@@ -403,7 +405,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 			return rv;
 		if (newthreads < 0)
 			return -EINVAL;
-		rv = nfsd_svc(newthreads);
+		rv = nfsd_svc(newthreads, net);
 		if (rv < 0)
 			return rv;
 	} else
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index acddf71abd51..8226c1b02558 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -65,7 +65,7 @@ extern const struct seq_operations nfs_exports_op;
 /*
  * Function prototypes.
  */
-int		nfsd_svc(int nrservs);
+int		nfsd_svc(int nrservs, struct net *net);
 int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 
 int		nfsd_nrthreads(void);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6448391cde54..f199b537cc81 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -442,11 +442,10 @@ int nfsd_set_nrthreads(int n, int *nthreads)
  * this is the first time nrservs is nonzero.
  */
 int
-nfsd_svc(int nrservs)
+nfsd_svc(int nrservs, struct net *net)
 {
 	int	error;
 	bool	nfsd_up_before;
-	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");

From 3938a0d5eb5effcc89c6909741403f4e6a37252d Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:30 +0300
Subject: [PATCH 092/111] nfsd: pass net to nfsd_set_nrthreads()

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 3 ++-
 fs/nfsd/nfsd.h   | 2 +-
 fs/nfsd/nfssvc.c | 3 +--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 68e229cdfd63..58f0ae44779d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -447,6 +447,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	int len;
 	int npools;
 	int *nthreads;
+	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
 	npools = nfsd_nrpools();
@@ -477,7 +478,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 			if (nthreads[i] < 0)
 				goto out_free;
 		}
-		rv = nfsd_set_nrthreads(i, nthreads);
+		rv = nfsd_set_nrthreads(i, nthreads, net);
 		if (rv)
 			goto out_free;
 	}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8226c1b02558..18f999665546 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -71,7 +71,7 @@ int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 int		nfsd_nrthreads(void);
 int		nfsd_nrpools(void);
 int		nfsd_get_nrthreads(int n, int *);
-int		nfsd_set_nrthreads(int n, int *);
+int		nfsd_set_nrthreads(int n, int *, struct net *);
 int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index f199b537cc81..b144658c49b2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -382,12 +382,11 @@ int nfsd_get_nrthreads(int n, int *nthreads)
 	return 0;
 }
 
-int nfsd_set_nrthreads(int n, int *nthreads)
+int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
 	int tot = 0;
 	int err = 0;
-	struct net *net = &init_net;
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 

From 081603520b25f7b35ef63a363376a17c36ef74ed Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 10 Dec 2012 12:19:35 +0300
Subject: [PATCH 093/111] nfsd: pass net to __write_ports() and down

Precursor patch. Hard-coded "init_net" will be replaced by proper one in
future.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 58f0ae44779d..8536100b7fc1 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -657,11 +657,10 @@ static ssize_t __write_ports_names(char *buf)
  * a socket of a supported family/protocol, and we use it as an
  * nfsd listener.
  */
-static ssize_t __write_ports_addfd(char *buf)
+static ssize_t __write_ports_addfd(char *buf, struct net *net)
 {
 	char *mesg = buf;
 	int fd, err;
-	struct net *net = &init_net;
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
@@ -686,12 +685,11 @@ static ssize_t __write_ports_addfd(char *buf)
  * A transport listener is added by writing it's transport name and
  * a port number.
  */
-static ssize_t __write_ports_addxprt(char *buf)
+static ssize_t __write_ports_addxprt(char *buf, struct net *net)
 {
 	char transport[16];
 	struct svc_xprt *xprt;
 	int port, err;
-	struct net *net = &init_net;
 
 	if (sscanf(buf, "%15s %5u", transport, &port) != 2)
 		return -EINVAL;
@@ -727,16 +725,17 @@ static ssize_t __write_ports_addxprt(char *buf)
 	return err;
 }
 
-static ssize_t __write_ports(struct file *file, char *buf, size_t size)
+static ssize_t __write_ports(struct file *file, char *buf, size_t size,
+			     struct net *net)
 {
 	if (size == 0)
 		return __write_ports_names(buf);
 
 	if (isdigit(buf[0]))
-		return __write_ports_addfd(buf);
+		return __write_ports_addfd(buf, net);
 
 	if (isalpha(buf[0]))
-		return __write_ports_addxprt(buf);
+		return __write_ports_addxprt(buf, net);
 
 	return -EINVAL;
 }
@@ -787,9 +786,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
 static ssize_t write_ports(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
+	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
-	rv = __write_ports(file, buf, size);
+	rv = __write_ports(file, buf, size, net);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }

From 6ff50b3dea9a242b50642a703b513986bffb8ce9 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:09 +0300
Subject: [PATCH 094/111] nfsd: move per-net startup code to separated function

NFSd resources are partially per-net and partially globally used.
This patch splits resources init and shutdown and moves per-net code to
separated functions.
Generic and per-net init and shutdown are called sequentially for a while.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 48 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b144658c49b2..038348bc1a09 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -203,6 +203,27 @@ static int nfsd_init_socks(struct net *net)
 
 static bool nfsd_up = false;
 
+static int nfsd_startup_net(struct net *net)
+{
+	int ret;
+
+	ret = nfsd_init_socks(net);
+	if (ret)
+		return ret;
+	ret = lockd_up(net);
+	if (ret)
+		return ret;
+	ret = nfs4_state_start_net(net);
+	if (ret)
+		goto out_lockd;
+
+	return 0;
+
+out_lockd:
+	lockd_down(net);
+	return ret;
+}
+
 static int nfsd_startup(int nrservs, struct net *net)
 {
 	int ret;
@@ -217,31 +238,29 @@ static int nfsd_startup(int nrservs, struct net *net)
 	ret = nfsd_racache_init(2*nrservs);
 	if (ret)
 		return ret;
-	ret = nfsd_init_socks(net);
-	if (ret)
-		goto out_racache;
-	ret = lockd_up(net);
-	if (ret)
-		goto out_racache;
 	ret = nfs4_state_start();
 	if (ret)
-		goto out_lockd;
-
-	ret = nfs4_state_start_net(net);
+		goto out_racache;
+	ret = nfsd_startup_net(net);
 	if (ret)
-		goto out_net_state;
+		goto out_net;
 
 	nfsd_up = true;
 	return 0;
-out_net_state:
+
+out_net:
 	nfs4_state_shutdown();
-out_lockd:
-	lockd_down(net);
 out_racache:
 	nfsd_racache_shutdown();
 	return ret;
 }
 
+static void nfsd_shutdown_net(struct net *net)
+{
+	nfs4_state_shutdown_net(net);
+	lockd_down(net);
+}
+
 static void nfsd_shutdown(struct net *net)
 {
 	/*
@@ -252,9 +271,8 @@ static void nfsd_shutdown(struct net *net)
 	 */
 	if (!nfsd_up)
 		return;
-	nfs4_state_shutdown_net(net);
+	nfsd_shutdown_net(net);
 	nfs4_state_shutdown();
-	lockd_down(net);
 	nfsd_racache_shutdown();
 	nfsd_up = false;
 }

From 2c2fe2909e124c32a34dbbb3ac129112524fc540 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:14 +0300
Subject: [PATCH 095/111] nfsd: per-net NFSd up flag introduced

This patch introduces introduces per-net "nfsd_net_up" boolean flag, which has
the same purpose as general "nfsd_up" flag - skip init or shutdown of per-net
resources in case of they are inited on shutted down respectively.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h  |  2 ++
 fs/nfsd/nfssvc.c | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index fac4123c918c..543ac486fd5d 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -92,6 +92,8 @@ struct nfsd_net {
 
 	time_t nfsd4_lease;
 	time_t nfsd4_grace;
+
+	bool nfsd_net_up;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 038348bc1a09..6e17efdd8afe 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
 #include "nfsd.h"
 #include "cache.h"
 #include "vfs.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -205,8 +206,12 @@ static bool nfsd_up = false;
 
 static int nfsd_startup_net(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
+	if (nn->nfsd_net_up)
+		return 0;
+
 	ret = nfsd_init_socks(net);
 	if (ret)
 		return ret;
@@ -217,6 +222,7 @@ static int nfsd_startup_net(struct net *net)
 	if (ret)
 		goto out_lockd;
 
+	nn->nfsd_net_up = true;
 	return 0;
 
 out_lockd:
@@ -257,8 +263,14 @@ static int nfsd_startup(int nrservs, struct net *net)
 
 static void nfsd_shutdown_net(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (!nn->nfsd_net_up)
+		return;
+
 	nfs4_state_shutdown_net(net);
 	lockd_down(net);
+	nn->nfsd_net_up = false;
 }
 
 static void nfsd_shutdown(struct net *net)

From b9c0ef8571c6ae33465dcf41d496ce2ad783c49d Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:19 +0300
Subject: [PATCH 096/111] nfsd: make NFSd service boot time per-net

This is simple: an NFSd service can be started at different times in
different network environments. So, its "boot time" has to be assigned
per net.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h    |  5 +++++
 fs/nfsd/nfs3xdr.c  | 14 ++++++++++----
 fs/nfsd/nfs4proc.c | 11 ++++++-----
 fs/nfsd/nfsd.h     |  5 -----
 fs/nfsd/nfssvc.c   |  4 ++--
 5 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 543ac486fd5d..3b283eaab10d 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -94,6 +94,11 @@ struct nfsd_net {
 	time_t nfsd4_grace;
 
 	bool nfsd_net_up;
+
+	/*
+	 * Time of server startup
+	 */
+	struct timeval nfssvc_boot;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 43f46cd9edea..2b8618de6c27 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -7,8 +7,10 @@
  */
 
 #include <linux/namei.h>
+#include <linux/sunrpc/svc_xprt.h>
 #include "xdr3.h"
 #include "auth.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
@@ -720,12 +722,14 @@ int
 nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeres *resp)
 {
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
 		*p++ = htonl(resp->committed);
-		*p++ = htonl(nfssvc_boot.tv_sec);
-		*p++ = htonl(nfssvc_boot.tv_usec);
+		*p++ = htonl(nn->nfssvc_boot.tv_sec);
+		*p++ = htonl(nn->nfssvc_boot.tv_usec);
 	}
 	return xdr_ressize_check(rqstp, p);
 }
@@ -1082,11 +1086,13 @@ int
 nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitres *resp)
 {
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	/* Write verifier */
 	if (resp->status == 0) {
-		*p++ = htonl(nfssvc_boot.tv_sec);
-		*p++ = htonl(nfssvc_boot.tv_usec);
+		*p++ = htonl(nn->nfssvc_boot.tv_sec);
+		*p++ = htonl(nn->nfssvc_boot.tv_usec);
 	}
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1a0b1fdb5ad3..bd67f4d6dfc6 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -497,12 +497,13 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			   &access->ac_supported);
 }
 
-static void gen_boot_verifier(nfs4_verifier *verifier)
+static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 {
 	__be32 verf[2];
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	verf[0] = (__be32)nfssvc_boot.tv_sec;
-	verf[1] = (__be32)nfssvc_boot.tv_usec;
+	verf[0] = (__be32)nn->nfssvc_boot.tv_sec;
+	verf[1] = (__be32)nn->nfssvc_boot.tv_usec;
 	memcpy(verifier->data, verf, sizeof(verifier->data));
 }
 
@@ -510,7 +511,7 @@ static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_commit *commit)
 {
-	gen_boot_verifier(&commit->co_verf);
+	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count);
 }
@@ -930,7 +931,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	cnt = write->wr_buflen;
 	write->wr_how_written = write->wr_stable_how;
-	gen_boot_verifier(&write->wr_verifier);
+	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
 	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 18f999665546..71ba60d36234 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -269,11 +269,6 @@ void		nfsd_lockd_shutdown(void);
 /* Check for dir entries '.' and '..' */
 #define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
 
-/*
- * Time of server startup
- */
-extern struct timeval	nfssvc_boot;
-
 #ifdef CONFIG_NFSD_V4
 
 /* before processing a COMPOUND operation, we have to check that there
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6e17efdd8afe..40992cd5bff9 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -27,7 +27,6 @@
 
 extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
-struct timeval			nfssvc_boot;
 
 /*
  * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
@@ -367,6 +366,7 @@ static int nfsd_get_default_max_blksize(void)
 int nfsd_create_serv(struct net *net)
 {
 	int error;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
@@ -388,7 +388,7 @@ int nfsd_create_serv(struct net *net)
 	}
 
 	set_max_drc();
-	do_gettimeofday(&nfssvc_boot);		/* record boot time */
+	do_gettimeofday(&nn->nfssvc_boot);		/* record boot time */
 	return 0;
 }
 

From 9dd9845f084cda07ce00cca32a5ba8fbcbbfbcaf Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:24 +0300
Subject: [PATCH 097/111] nfsd: make NFSd service structure allocated per net

This patch makes main step in NFSd containerisation.

There could be different approaches to how to make NFSd able to handle
incoming RPC request from different network namespaces.  The two main
options are:

1) Share NFSd kthreads betwween all network namespaces.
2) Create separated pool of threads for each namespace.

While first approach looks more flexible, second one is simpler and
non-racy.  This patch implements the second option.

To make it possible to allocate separate pools of threads, we have to
make it possible to allocate separate NFSd service structures per net.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/netns.h     |  2 +
 fs/nfsd/nfs4state.c | 14 ++++---
 fs/nfsd/nfsctl.c    | 63 ++++++++++++++++++-------------
 fs/nfsd/nfsd.h      | 18 ++-------
 fs/nfsd/nfssvc.c    | 91 ++++++++++++++++++++++++++++-----------------
 5 files changed, 110 insertions(+), 78 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3b283eaab10d..1051bebff1b0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -99,6 +99,8 @@ struct nfsd_net {
 	 * Time of server startup
 	 */
 	struct timeval nfssvc_boot;
+
+	struct svc_serv *nfsd_serv;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 16e954c1c911..3d27f08e2297 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -743,9 +743,12 @@ static struct nfsd4_session *__alloc_session(int slotsize, int numslots)
 	return NULL;
 }
 
-static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
+static void init_forechannel_attrs(struct nfsd4_channel_attrs *new,
+				   struct nfsd4_channel_attrs *req,
+				   int numslots, int slotsize,
+				   struct nfsd_net *nn)
 {
-	u32 maxrpc = nfsd_serv->sv_max_mesg;
+	u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
 
 	new->maxreqs = numslots;
 	new->maxresp_cached = min_t(u32, req->maxresp_cached,
@@ -883,7 +886,8 @@ void nfsd4_put_session(struct nfsd4_session *ses)
 	spin_unlock(&nn->client_lock);
 }
 
-static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
+static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan,
+					   struct nfsd_net *nn)
 {
 	struct nfsd4_session *new;
 	int numslots, slotsize;
@@ -904,7 +908,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
 		nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
 		return NULL;
 	}
-	init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
+	init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn);
 	return new;
 }
 
@@ -1776,7 +1780,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		return nfserr_inval;
 	if (check_forechannel_attrs(cr_ses->fore_channel))
 		return nfserr_toosmall;
-	new = alloc_session(&cr_ses->fore_channel);
+	new = alloc_session(&cr_ses->fore_channel, nn);
 	if (!new)
 		return nfserr_jukebox;
 	status = nfserr_jukebox;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8536100b7fc1..74934284d9a7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -409,7 +409,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 		if (rv < 0)
 			return rv;
 	} else
-		rv = nfsd_nrthreads();
+		rv = nfsd_nrthreads(net);
 
 	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
 }
@@ -450,7 +450,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
-	npools = nfsd_nrpools();
+	npools = nfsd_nrpools(net);
 	if (npools == 0) {
 		/*
 		 * NFS is shut down.  The admin can start it by
@@ -483,7 +483,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 			goto out_free;
 	}
 
-	rv = nfsd_get_nrthreads(npools, nthreads);
+	rv = nfsd_get_nrthreads(npools, nthreads, net);
 	if (rv)
 		goto out_free;
 
@@ -510,11 +510,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	if (size>0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			/* Cannot change versions without updating
-			 * nfsd_serv->sv_xdrsize, and reallocing
+			 * nn->nfsd_serv->sv_xdrsize, and reallocing
 			 * rq_argp and rq_resp
 			 */
 			return -EBUSY;
@@ -645,11 +647,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
  * Zero-length write.  Return a list of NFSD's current listener
  * transports.
  */
-static ssize_t __write_ports_names(char *buf)
+static ssize_t __write_ports_names(char *buf, struct net *net)
 {
-	if (nfsd_serv == NULL)
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->nfsd_serv == NULL)
 		return 0;
-	return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
+	return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
 }
 
 /*
@@ -661,6 +665,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
 {
 	char *mesg = buf;
 	int fd, err;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
@@ -670,14 +675,14 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
 	if (err != 0)
 		return err;
 
-	err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
 	if (err < 0) {
 		nfsd_destroy(net);
 		return err;
 	}
 
 	/* Decrease the count, but don't shut down the service */
-	nfsd_serv->sv_nrthreads--;
+	nn->nfsd_serv->sv_nrthreads--;
 	return err;
 }
 
@@ -690,6 +695,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net)
 	char transport[16];
 	struct svc_xprt *xprt;
 	int port, err;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	if (sscanf(buf, "%15s %5u", transport, &port) != 2)
 		return -EINVAL;
@@ -701,21 +707,21 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net)
 	if (err != 0)
 		return err;
 
-	err = svc_create_xprt(nfsd_serv, transport, net,
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
 				PF_INET, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_create_xprt(nfsd_serv, transport, net,
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
 				PF_INET6, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
 
 	/* Decrease the count, but don't shut down the service */
-	nfsd_serv->sv_nrthreads--;
+	nn->nfsd_serv->sv_nrthreads--;
 	return 0;
 out_close:
-	xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port);
+	xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
 	if (xprt != NULL) {
 		svc_close_xprt(xprt);
 		svc_xprt_put(xprt);
@@ -729,7 +735,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
 			     struct net *net)
 {
 	if (size == 0)
-		return __write_ports_names(buf);
+		return __write_ports_names(buf, net);
 
 	if (isdigit(buf[0]))
 		return __write_ports_addfd(buf, net);
@@ -821,6 +827,9 @@ int nfsd_max_blksize;
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	if (size > 0) {
 		int bsize;
 		int rv = get_int(&mesg, &bsize);
@@ -835,7 +844,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 			bsize = NFSSVC_MAXBLKSIZE;
 		bsize &= ~(1024-1);
 		mutex_lock(&nfsd_mutex);
-		if (nfsd_serv) {
+		if (nn->nfsd_serv) {
 			mutex_unlock(&nfsd_mutex);
 			return -EBUSY;
 		}
@@ -848,13 +857,14 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 }
 
 #ifdef CONFIG_NFSD_V4
-static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
+				  time_t *time, struct nfsd_net *nn)
 {
 	char *mesg = buf;
 	int rv, i;
 
 	if (size > 0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			return -EBUSY;
 		rv = get_int(&mesg, &i);
 		if (rv)
@@ -879,12 +889,13 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim
 	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
 }
 
-static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
+				time_t *time, struct nfsd_net *nn)
 {
 	ssize_t rv;
 
 	mutex_lock(&nfsd_mutex);
-	rv = __nfsd4_write_time(file, buf, size, time);
+	rv = __nfsd4_write_time(file, buf, size, time, nn);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -913,7 +924,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
 	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-	return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
 }
 
 /**
@@ -929,17 +940,18 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
 {
 	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
-	return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn);
 }
 
-static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
+				   struct nfsd_net *nn)
 {
 	char *mesg = buf;
 	char *recdir;
 	int len, status;
 
 	if (size > 0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			return -EBUSY;
 		if (size > PATH_MAX || buf[size-1] != '\n')
 			return -EINVAL;
@@ -983,9 +995,10 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	mutex_lock(&nfsd_mutex);
-	rv = __write_recoverydir(file, buf, size);
+	rv = __write_recoverydir(file, buf, size, nn);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 71ba60d36234..de23db255c69 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -55,7 +55,6 @@ extern struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
-extern struct svc_serv		*nfsd_serv;
 extern spinlock_t		nfsd_drc_lock;
 extern unsigned int		nfsd_drc_max_mem;
 extern unsigned int		nfsd_drc_mem_used;
@@ -68,23 +67,14 @@ extern const struct seq_operations nfs_exports_op;
 int		nfsd_svc(int nrservs, struct net *net);
 int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 
-int		nfsd_nrthreads(void);
-int		nfsd_nrpools(void);
-int		nfsd_get_nrthreads(int n, int *);
+int		nfsd_nrthreads(struct net *);
+int		nfsd_nrpools(struct net *);
+int		nfsd_get_nrthreads(int n, int *, struct net *);
 int		nfsd_set_nrthreads(int n, int *, struct net *);
 int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
 
-static inline void nfsd_destroy(struct net *net)
-{
-	int destroy = (nfsd_serv->sv_nrthreads == 1);
-
-	if (destroy)
-		svc_shutdown_net(nfsd_serv, net);
-	svc_destroy(nfsd_serv);
-	if (destroy)
-		nfsd_serv = NULL;
-}
+void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 40992cd5bff9..0e8622a4341c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -29,11 +29,11 @@ extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
 
 /*
- * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
  * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
  * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
  *
- * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
  * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
  * of nfsd threads must exist and each must listed in ->sp_all_threads in each
  * entry of ->sv_pools[].
@@ -51,7 +51,6 @@ static int			nfsd(void *vrqstp);
  *	nfsd_versions
  */
 DEFINE_MUTEX(nfsd_mutex);
-struct svc_serv 		*nfsd_serv;
 
 /*
  * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
@@ -172,12 +171,14 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
  */
 #define	NFSD_MAXSERVS		8192
 
-int nfsd_nrthreads(void)
+int nfsd_nrthreads(struct net *net)
 {
 	int rv = 0;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	mutex_lock(&nfsd_mutex);
-	if (nfsd_serv)
-		rv = nfsd_serv->sv_nrthreads;
+	if (nn->nfsd_serv)
+		rv = nn->nfsd_serv->sv_nrthreads;
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -185,15 +186,17 @@ int nfsd_nrthreads(void)
 static int nfsd_init_socks(struct net *net)
 {
 	int error;
-	if (!list_empty(&nfsd_serv->sv_permsocks))
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_create_xprt(nfsd_serv, "udp", net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
 
-	error = svc_create_xprt(nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
@@ -369,21 +372,21 @@ int nfsd_create_serv(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
-	if (nfsd_serv) {
-		svc_get(nfsd_serv);
+	if (nn->nfsd_serv) {
+		svc_get(nn->nfsd_serv);
 		return 0;
 	}
 	if (nfsd_max_blksize == 0)
 		nfsd_max_blksize = nfsd_get_default_max_blksize();
 	nfsd_reset_versions();
-	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+	nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
 				      nfsd_last_thread, nfsd, THIS_MODULE);
-	if (nfsd_serv == NULL)
+	if (nn->nfsd_serv == NULL)
 		return -ENOMEM;
 
-	error = svc_bind(nfsd_serv, net);
+	error = svc_bind(nn->nfsd_serv, net);
 	if (error < 0) {
-		svc_destroy(nfsd_serv);
+		svc_destroy(nn->nfsd_serv);
 		return error;
 	}
 
@@ -392,39 +395,55 @@ int nfsd_create_serv(struct net *net)
 	return 0;
 }
 
-int nfsd_nrpools(void)
+int nfsd_nrpools(struct net *net)
 {
-	if (nfsd_serv == NULL)
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->nfsd_serv == NULL)
 		return 0;
 	else
-		return nfsd_serv->sv_nrpools;
+		return nn->nfsd_serv->sv_nrpools;
 }
 
-int nfsd_get_nrthreads(int n, int *nthreads)
+int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	if (nfsd_serv != NULL) {
-		for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
-			nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
+	if (nn->nfsd_serv != NULL) {
+		for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++)
+			nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads;
 	}
 
 	return 0;
 }
 
+void nfsd_destroy(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
+
+	if (destroy)
+		svc_shutdown_net(nn->nfsd_serv, net);
+	svc_destroy(nn->nfsd_serv);
+	if (destroy)
+		nn->nfsd_serv = NULL;
+}
+
 int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
 	int tot = 0;
 	int err = 0;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 
-	if (nfsd_serv == NULL || n <= 0)
+	if (nn->nfsd_serv == NULL || n <= 0)
 		return 0;
 
-	if (n > nfsd_serv->sv_nrpools)
-		n = nfsd_serv->sv_nrpools;
+	if (n > nn->nfsd_serv->sv_nrpools)
+		n = nn->nfsd_serv->sv_nrpools;
 
 	/* enforce a global maximum number of threads */
 	tot = 0;
@@ -454,9 +473,9 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 		nthreads[0] = 1;
 
 	/* apply the new numbers */
-	svc_get(nfsd_serv);
+	svc_get(nn->nfsd_serv);
 	for (i = 0; i < n; i++) {
-		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
+		err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i],
 				    	  nthreads[i]);
 		if (err)
 			break;
@@ -475,6 +494,7 @@ nfsd_svc(int nrservs, struct net *net)
 {
 	int	error;
 	bool	nfsd_up_before;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
@@ -483,7 +503,7 @@ nfsd_svc(int nrservs, struct net *net)
 	if (nrservs > NFSD_MAXSERVS)
 		nrservs = NFSD_MAXSERVS;
 	error = 0;
-	if (nrservs == 0 && nfsd_serv == NULL)
+	if (nrservs == 0 && nn->nfsd_serv == NULL)
 		goto out;
 
 	error = nfsd_create_serv(net);
@@ -495,14 +515,14 @@ nfsd_svc(int nrservs, struct net *net)
 	error = nfsd_startup(nrservs, net);
 	if (error)
 		goto out_destroy;
-	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
+	error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
 	if (error)
 		goto out_shutdown;
-	/* We are holding a reference to nfsd_serv which
+	/* We are holding a reference to nn->nfsd_serv which
 	 * we don't want to count in the return value,
 	 * so subtract 1
 	 */
-	error = nfsd_serv->sv_nrthreads - 1;
+	error = nn->nfsd_serv->sv_nrthreads - 1;
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
 		nfsd_shutdown(net);
@@ -681,14 +701,17 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 {
 	int ret;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	mutex_lock(&nfsd_mutex);
-	if (nfsd_serv == NULL) {
+	if (nn->nfsd_serv == NULL) {
 		mutex_unlock(&nfsd_mutex);
 		return -ENODEV;
 	}
 	/* bump up the psudo refcount while traversing */
-	svc_get(nfsd_serv);
-	ret = svc_pool_stats_open(nfsd_serv, file);
+	svc_get(nn->nfsd_serv);
+	ret = svc_pool_stats_open(nn->nfsd_serv, file);
 	mutex_unlock(&nfsd_mutex);
 	return ret;
 }

From bda9cac1db8ab044e9edbfe5730283016b67d451 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:29 +0300
Subject: [PATCH 098/111] nfsd: introduce helpers for generic resources init
 and shutdown

NFSd have per-net resources and resources, used globally.
Let's move generic resources init and shutdown to separated functions since
they are going to be allocated on first NFSd service start and destroyed after
last NFSd service shutdown.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 50 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0e8622a4341c..f9d147f6dfd4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -206,6 +206,37 @@ static int nfsd_init_socks(struct net *net)
 
 static bool nfsd_up = false;
 
+static int nfsd_startup_generic(int nrservs)
+{
+	int ret;
+
+	if (nfsd_up)
+		return 0;
+
+	/*
+	 * Readahead param cache - will no-op if it already exists.
+	 * (Note therefore results will be suboptimal if number of
+	 * threads is modified after nfsd start.)
+	 */
+	ret = nfsd_racache_init(2*nrservs);
+	if (ret)
+		return ret;
+	ret = nfs4_state_start();
+	if (ret)
+		goto out_racache;
+	return 0;
+
+out_racache:
+	nfsd_racache_shutdown();
+	return ret;
+}
+
+static void nfsd_shutdown_generic(void)
+{
+	nfs4_state_shutdown();
+	nfsd_racache_shutdown();
+}
+
 static int nfsd_startup_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -236,19 +267,9 @@ static int nfsd_startup(int nrservs, struct net *net)
 {
 	int ret;
 
-	if (nfsd_up)
-		return 0;
-	/*
-	 * Readahead param cache - will no-op if it already exists.
-	 * (Note therefore results will be suboptimal if number of
-	 * threads is modified after nfsd start.)
-	 */
-	ret = nfsd_racache_init(2*nrservs);
+	ret = nfsd_startup_generic(nrservs);
 	if (ret)
 		return ret;
-	ret = nfs4_state_start();
-	if (ret)
-		goto out_racache;
 	ret = nfsd_startup_net(net);
 	if (ret)
 		goto out_net;
@@ -257,9 +278,7 @@ static int nfsd_startup(int nrservs, struct net *net)
 	return 0;
 
 out_net:
-	nfs4_state_shutdown();
-out_racache:
-	nfsd_racache_shutdown();
+	nfsd_shutdown_generic();
 	return ret;
 }
 
@@ -286,8 +305,7 @@ static void nfsd_shutdown(struct net *net)
 	if (!nfsd_up)
 		return;
 	nfsd_shutdown_net(net);
-	nfs4_state_shutdown();
-	nfsd_racache_shutdown();
+	nfsd_shutdown_generic();
 	nfsd_up = false;
 }
 

From 903d9bf0edebc9d9f06df125ab2bd57b4aa4e78e Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:34 +0300
Subject: [PATCH 099/111] nfsd: simplify NFSv4 state init and shutdown

This patch moves nfsd_startup_generic() and nfsd_shutdown_generic()
calls to nfsd_startup_net() and nfsd_shutdown_net() respectively, which
allows us to call nfsd_startup_net() instead of nfsd_startup() and makes
the code look clearer.  It also modifies nfsd_svc() and nfsd_shutdown()
to check nn->nfsd_net_up instead of global nfsd_up.  The latter is now
used only for generic resources shutdown and is currently useless.  It
will replaced by NFSd users counter later in this series.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 44 +++++++++++++++-----------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index f9d147f6dfd4..0c87b4e7d1b5 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -235,9 +235,10 @@ static void nfsd_shutdown_generic(void)
 {
 	nfs4_state_shutdown();
 	nfsd_racache_shutdown();
+	nfsd_up = false;
 }
 
-static int nfsd_startup_net(struct net *net)
+static int nfsd_startup_net(int nrservs, struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
@@ -245,39 +246,26 @@ static int nfsd_startup_net(struct net *net)
 	if (nn->nfsd_net_up)
 		return 0;
 
+	ret = nfsd_startup_generic(nrservs);
+	if (ret)
+		return ret;
 	ret = nfsd_init_socks(net);
 	if (ret)
-		return ret;
+		goto out_socks;
 	ret = lockd_up(net);
 	if (ret)
-		return ret;
+		goto out_socks;
 	ret = nfs4_state_start_net(net);
 	if (ret)
 		goto out_lockd;
 
 	nn->nfsd_net_up = true;
+	nfsd_up = true;
 	return 0;
 
 out_lockd:
 	lockd_down(net);
-	return ret;
-}
-
-static int nfsd_startup(int nrservs, struct net *net)
-{
-	int ret;
-
-	ret = nfsd_startup_generic(nrservs);
-	if (ret)
-		return ret;
-	ret = nfsd_startup_net(net);
-	if (ret)
-		goto out_net;
-
-	nfsd_up = true;
-	return 0;
-
-out_net:
+out_socks:
 	nfsd_shutdown_generic();
 	return ret;
 }
@@ -286,27 +274,25 @@ static void nfsd_shutdown_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	if (!nn->nfsd_net_up)
-		return;
-
 	nfs4_state_shutdown_net(net);
 	lockd_down(net);
 	nn->nfsd_net_up = false;
+	nfsd_shutdown_generic();
 }
 
 static void nfsd_shutdown(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	/*
 	 * write_ports can create the server without actually starting
 	 * any threads--if we get shut down before any threads are
 	 * started, then nfsd_last_thread will be run before any of this
 	 * other initialization has been done.
 	 */
-	if (!nfsd_up)
+	if (!nn->nfsd_net_up)
 		return;
 	nfsd_shutdown_net(net);
-	nfsd_shutdown_generic();
-	nfsd_up = false;
 }
 
 static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
@@ -528,9 +514,9 @@ nfsd_svc(int nrservs, struct net *net)
 	if (error)
 		goto out;
 
-	nfsd_up_before = nfsd_up;
+	nfsd_up_before = nn->nfsd_net_up;
 
-	error = nfsd_startup(nrservs, net);
+	error = nfsd_startup_net(nrservs, net);
 	if (error)
 		goto out_destroy;
 	error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);

From 4539f14981ce02d48b212786a41c8bcfb62851b4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:39 +0300
Subject: [PATCH 100/111] nfsd: replace boolean nfsd_up flag by users counter

Since we have generic NFSd resurces, we have to introduce some way how to
allocate and destroy those resources on first per-net NFSd start and on
last per-net NFSd stop respectively.
This patch replaces global boolean nfsd_up flag (which is unused now) by users
counter and use it to determine either we need to allocate generic resources
or destroy them.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0c87b4e7d1b5..5bb4a33211c7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -204,13 +204,13 @@ static int nfsd_init_socks(struct net *net)
 	return 0;
 }
 
-static bool nfsd_up = false;
+static int nfsd_users = 0;
 
 static int nfsd_startup_generic(int nrservs)
 {
 	int ret;
 
-	if (nfsd_up)
+	if (nfsd_users++)
 		return 0;
 
 	/*
@@ -233,9 +233,11 @@ static int nfsd_startup_generic(int nrservs)
 
 static void nfsd_shutdown_generic(void)
 {
+	if (--nfsd_users)
+		return;
+
 	nfs4_state_shutdown();
 	nfsd_racache_shutdown();
-	nfsd_up = false;
 }
 
 static int nfsd_startup_net(int nrservs, struct net *net)
@@ -260,7 +262,6 @@ static int nfsd_startup_net(int nrservs, struct net *net)
 		goto out_lockd;
 
 	nn->nfsd_net_up = true;
-	nfsd_up = true;
 	return 0;
 
 out_lockd:

From 541e864f00d0062c98c1e743265b0a60cada3755 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 14:23:44 +0300
Subject: [PATCH 101/111] nfsd: simplify service shutdown

Function nfsd_shutdown is called from two places: nfsd_last_thread (when last
kernel thread is exiting) and nfsd_svc (in case of kthreads starting error).
When calling from nfsd_svc(), we can be sure that per-net resources are
allocated, so we don't need to check per-net nfsd_net_up boolean flag.
This allows us to remove nfsd_shutdown function at all and move check for
per-net nfsd_net_up boolean flag to nfsd_last_thread.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5bb4a33211c7..2cfd9c69503e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -281,7 +281,7 @@ static void nfsd_shutdown_net(struct net *net)
 	nfsd_shutdown_generic();
 }
 
-static void nfsd_shutdown(struct net *net)
+static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
@@ -294,11 +294,6 @@ static void nfsd_shutdown(struct net *net)
 	if (!nn->nfsd_net_up)
 		return;
 	nfsd_shutdown_net(net);
-}
-
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
-{
-	nfsd_shutdown(net);
 
 	svc_rpcb_cleanup(serv, net);
 
@@ -530,7 +525,7 @@ nfsd_svc(int nrservs, struct net *net)
 	error = nn->nfsd_serv->sv_nrthreads - 1;
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
-		nfsd_shutdown(net);
+		nfsd_shutdown_net(net);
 out_destroy:
 	nfsd_destroy(net);		/* Release server */
 out:

From 88c47666171989ed4c5b1a5687df09511e8c5e35 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 6 Dec 2012 18:34:42 +0300
Subject: [PATCH 102/111] nfsd: pass proper net to nfsd_destroy() from NFSd
 kthreads

Since NFSd service is per-net now, we have to pass proper network
context in nfsd_shutdown() from NFSd kthreads.

The simplest way I found is to get proper net from one of transports
with permanent sockets.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2cfd9c69503e..cee62ab9d4a3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -541,6 +541,8 @@ static int
 nfsd(void *vrqstp)
 {
 	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
+	struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
+	struct net *net = perm_sock->xpt_net;
 	int err;
 
 	/* Lock module and set up kernel thread */
@@ -605,7 +607,7 @@ nfsd(void *vrqstp)
 	/* Release the thread */
 	svc_exit_thread(rqstp);
 
-	nfsd_destroy(&init_net);
+	nfsd_destroy(net);
 
 	/* Release module */
 	mutex_unlock(&nfsd_mutex);

From 0a5c33e23c4d781ecc815002c54f1f91012c703d Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Fri, 7 Dec 2012 16:17:28 -0500
Subject: [PATCH 103/111] NFSD: Pass correct buffer size to rpc_ntop

I honestly have no idea where I got 129 from, but it's a much bigger
value than the actual buffer size (INET6_ADDRSTRLEN).

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 2 +-
 fs/nfsd/nfs4state.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 96ffdf55dcec..7a7b079fbdb1 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -79,7 +79,7 @@ static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
 	clp = nfsd_find_client(addr, addr_size);
 	if (clp) {
 		count = op->forget(clp, 0);
-		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
 		printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
 	}
 	nfs4_unlock_state();
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3d27f08e2297..8e2555112966 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4623,7 +4623,7 @@ u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
 u64 nfsd_print_client(struct nfs4_client *clp, u64 num)
 {
 	char buf[INET6_ADDRSTRLEN];
-	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
 	printk(KERN_INFO "NFS Client: %s\n", buf);
 	return 1;
 }
@@ -4632,7 +4632,7 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
 			     const char *type)
 {
 	char buf[INET6_ADDRSTRLEN];
-	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, 129);
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
 	printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
 }
 

From 18d9a2ca2ea1aa963a077fb49e7efcc3b0237a9b Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Fri, 7 Dec 2012 16:17:29 -0500
Subject: [PATCH 104/111] NFSD: Correct the size calculation in
 fault_inject_write

If len == 0 we end up with size = (0 - 1), which could cause bad things
to happen in copy_from_user().

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/fault_inject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 7a7b079fbdb1..e761ee95617f 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -122,7 +122,7 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
 				  size_t len, loff_t *ppos)
 {
 	char write_buf[INET6_ADDRSTRLEN];
-	size_t size = min(sizeof(write_buf), len) - 1;
+	size_t size = min(sizeof(write_buf) - 1, len);
 	struct net *net = current->nsproxy->net_ns;
 	struct sockaddr_storage sa;
 	u64 val;

From 3a28e331115be5130fe061e6b4183b29385116a6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 10 Dec 2012 18:15:07 -0500
Subject: [PATCH 105/111] svcrpc: fix some printks

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 38ec968ca1c6..d8e5adfeac30 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -952,8 +952,8 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
 		if (svc_sock_reclen(svsk) + svsk->sk_datalen >
 							serv->sv_max_mesg) {
-			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					       (unsigned long)svsk->sk_reclen);
+			net_notice_ratelimited("RPC: fragment too large: %d\n",
+					svc_sock_reclen(svsk));
 			goto err_delete;
 		}
 	}
@@ -1080,8 +1080,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len == want)
 			svc_tcp_fragment_received(svsk);
 		else
-			dprintk("svc: incomplete TCP record (%ld of %d)\n",
-				svsk->sk_tcplen - sizeof(rpc_fraghdr),
+			dprintk("svc: incomplete TCP record (%d of %d)\n",
+				(int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
 				svc_sock_reclen(svsk));
 		goto err_noclose;
 	}

From 9b3234b9220aae5387b60bc35a424ab6748b2b59 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 4 Dec 2012 18:03:46 -0500
Subject: [PATCH 106/111] nfsd4: disable zero-copy on non-final read ops

To ensure ordering of read data with any following operations, turn off
zero copy if the read is not the final operation in the compound.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c  | 11 +++++++++++
 fs/nfsd/nfs4state.c |  8 --------
 fs/nfsd/xdr4.h      |  8 ++++++++
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bd67f4d6dfc6..2a2d9b06a413 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -692,6 +692,17 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
+	/*
+	 * If we do a zero copy read, then a client will see read data
+	 * that reflects the state of the file *after* performing the
+	 * following compound.
+	 *
+	 * To ensure proper ordering, we therefore turn off zero copy if
+	 * the client wants us to do more in this compound:
+	 */
+	if (!nfsd4_last_compound_op(rqstp))
+		rqstp->rq_splice_ok = false;
+
 	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8e2555112966..8e127b39d323 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1855,14 +1855,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	goto out;
 }
 
-static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
-{
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
-
-	return argp->opcnt == resp->opcnt;
-}
-
 static __be32 nfsd4_map_bcts_dir(u32 *dir)
 {
 	switch (*dir) {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 331f8a3277ab..0889bfb43dc9 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -528,6 +528,14 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 		|| nfsd4_is_solo_sequence(resp);
 }
 
+static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
+{
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
+	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+	return argp->opcnt == resp->opcnt;
+}
+
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
 
 static inline void

From d5f50b0c290431c65377c4afa1c764e2c3fe5305 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 4 Dec 2012 18:25:10 -0500
Subject: [PATCH 107/111] nfsd4: fix oops on unusual readlike compound

If the argument and reply together exceed the maximum payload size, then
a reply with a read-like operation can overlow the rq_pages array.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3bf8a9d7f217..d7a3be5ab777 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2925,11 +2925,16 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	len = maxcount;
 	v = 0;
 	while (len > 0) {
-		pn = resp->rqstp->rq_resused++;
+		pn = resp->rqstp->rq_resused;
+		if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */
+			maxcount -= len;
+			break;
+		}
 		resp->rqstp->rq_vec[v].iov_base =
 			page_address(resp->rqstp->rq_respages[pn]);
 		resp->rqstp->rq_vec[v].iov_len =
 			len < PAGE_SIZE ? len : PAGE_SIZE;
+		resp->rqstp->rq_resused++;
 		v++;
 		len -= PAGE_SIZE;
 	}
@@ -2975,6 +2980,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
+	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+		return nfserr_resource;
 
 	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
 
@@ -3024,6 +3031,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
+	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+		return nfserr_resource;
 
 	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
 	savep = p;

From 79f77bf9a4e3dd5ead006b8f17e7c4ff07d8374e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 10 Dec 2012 18:04:53 -0500
Subject: [PATCH 108/111] nfsd: warn on odd reply state in nfsd_vfs_read

As far as I can tell this shouldn't currently happen--or if it does,
something is wrong and data is going to be corrupted.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0ef9b6b410a2..b31e46eeb026 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -936,6 +936,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			.u.data		= rqstp,
 		};
 
+		WARN_ON_ONCE(rqstp->rq_resused != 1);
 		rqstp->rq_resused = 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {

From afc59400d6c65bad66d4ad0b2daf879cbff8e23e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 10 Dec 2012 18:01:37 -0500
Subject: [PATCH 109/111] nfsd4: cleanup: replace rq_resused count by
 rq_next_page pointer

It may be a matter of personal taste, but I find this makes the code
clearer.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs2acl.c                       |  2 +-
 fs/nfsd/nfs3acl.c                       |  2 +-
 fs/nfsd/nfs3proc.c                      |  6 ++---
 fs/nfsd/nfs3xdr.c                       | 33 ++++++++++++-------------
 fs/nfsd/nfs4xdr.c                       | 24 +++++++++---------
 fs/nfsd/nfsxdr.c                        | 11 +++++----
 fs/nfsd/vfs.c                           | 18 ++++++--------
 include/linux/sunrpc/svc.h              |  6 ++---
 net/sunrpc/svc.c                        |  2 +-
 net/sunrpc/svcsock.c                    |  2 ++
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 ++++----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c   |  4 ++-
 12 files changed, 61 insertions(+), 59 deletions(-)

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index b314888825d5..9170861c804a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -253,7 +253,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	while (w > 0) {
-		if (!rqstp->rq_respages[rqstp->rq_resused++])
+		if (!*(rqstp->rq_next_page++))
 			return 0;
 		w -= PAGE_SIZE;
 	}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index a596e9d987e4..9cbc1a841f87 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -184,7 +184,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
-			if (!rqstp->rq_respages[rqstp->rq_resused++])
+			if (!*(rqstp->rq_next_page++))
 				return 0;
 			w -= PAGE_SIZE;
 		}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 97d90d1c8608..1fc02dfdc5c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -460,7 +460,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
-	int	i;
+	struct page **p;
 	caddr_t	page_addr = NULL;
 
 	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
@@ -484,8 +484,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 				     &resp->common,
 				     nfs3svc_encode_entry_plus);
 	memcpy(resp->verf, argp->verf, 8);
-	for (i=1; i<rqstp->rq_resused ; i++) {
-		page_addr = page_address(rqstp->rq_respages[i]);
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
 
 		if (((caddr_t)resp->buffer >= page_addr) &&
 		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2b8618de6c27..324c0baf7cda 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -325,7 +325,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -340,8 +340,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	/* set up the kvec */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -463,8 +464,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	len = ntohl(*p++);
 	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
 		return 0;
-	args->tname = new =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->tname = new = page_address(*(rqstp->rq_next_page++));
 	args->tlen = len;
 	/* first copy and check from the first page */
 	old = (char*)p;
@@ -535,8 +535,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -567,8 +566,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -577,7 +575,7 @@ int
 nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
-	int len, pn;
+	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -592,9 +590,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->count = len;
 
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
+		struct page *p = *(rqstp->rq_next_page++);
 		if (!args->buffer)
-			args->buffer = page_address(rqstp->rq_respages[pn]);
+			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
 
@@ -880,7 +878,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		       					common);
 	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
-	int		pn;		/* current page number */
+	struct page **	page;
 	int		slen;		/* string (name) length */
 	int		elen;		/* estimated entry length in words */
 	int		num_entry_words = 0;	/* actual number of words */
@@ -917,8 +915,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 	}
 
 	/* determine which page in rq_respages[] we are currently filling */
-	for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
-		curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+	for (page = cd->rqstp->rq_respages + 1;
+				page < cd->rqstp->rq_next_page; page++) {
+		curr_page_addr = page_address(*page);
 
 		if (((caddr_t)cd->buffer >= curr_page_addr) &&
 		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
@@ -933,14 +932,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		if (plus)
 			p = encode_entryplus_baggage(cd, p, name, namlen);
 		num_entry_words = p - cd->buffer;
-	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+	} else if (*(page+1) != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
 		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
-		p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+		p1 = tmp = page_address(*(page+1));
 
 		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d7a3be5ab777..0dc11586682f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2906,7 +2906,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
 	u32 eof;
-	int v, pn;
+	int v;
+	struct page *page;
 	unsigned long maxcount; 
 	long len;
 	__be32 *p;
@@ -2925,16 +2926,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	len = maxcount;
 	v = 0;
 	while (len > 0) {
-		pn = resp->rqstp->rq_resused;
-		if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */
+		page = *(resp->rqstp->rq_next_page);
+		if (!page) { /* ran out of pages */
 			maxcount -= len;
 			break;
 		}
-		resp->rqstp->rq_vec[v].iov_base =
-			page_address(resp->rqstp->rq_respages[pn]);
+		resp->rqstp->rq_vec[v].iov_base = page_address(page);
 		resp->rqstp->rq_vec[v].iov_len =
 			len < PAGE_SIZE ? len : PAGE_SIZE;
-		resp->rqstp->rq_resused++;
+		resp->rqstp->rq_next_page++;
 		v++;
 		len -= PAGE_SIZE;
 	}
@@ -2980,10 +2980,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 
 	maxcount = PAGE_SIZE;
 	RESERVE_SPACE(4);
@@ -3031,7 +3031,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
 	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
@@ -3059,7 +3059,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		goto err_no_verf;
 	}
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 	readdir->common.err = 0;
 	readdir->buflen = maxcount;
 	readdir->buffer = page;
@@ -3082,8 +3082,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	p = readdir->buffer;
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xbuf->page_len = ((char*)p) - (char*)page_address(
-		resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->xbuf->page_len = ((char*)p) -
+		(char*)page_address(*(resp->rqstp->rq_next_page-1));
 
 	/* Use rest of head for padding and remaining ops: */
 	resp->xbuf->tail[0].iov_base = tailbase;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 65ec595e2226..979b42106979 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -246,7 +246,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
 
@@ -262,8 +262,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	 */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -355,7 +356,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -396,7 +397,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b31e46eeb026..f0a6d88d7fff 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -886,7 +886,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		  struct splice_desc *sd)
 {
 	struct svc_rqst *rqstp = sd->u.data;
-	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+	struct page **pp = rqstp->rq_next_page;
 	struct page *page = buf->page;
 	size_t size;
 
@@ -894,17 +894,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 
 	if (rqstp->rq_res.page_len == 0) {
 		get_page(page);
-		put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_base = buf->offset;
 		rqstp->rq_res.page_len = size;
 	} else if (page != pp[-1]) {
 		get_page(page);
-		if (*pp)
-			put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		if (*rqstp->rq_next_page)
+			put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_len += size;
 	} else
 		rqstp->rq_res.page_len += size;
@@ -936,8 +934,8 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			.u.data		= rqstp,
 		};
 
-		WARN_ON_ONCE(rqstp->rq_resused != 1);
-		rqstp->rq_resused = 1;
+		WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1);
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d83db800fe02..676ddf53b3ee 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -243,6 +243,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	int			rq_resused;	/* number of pages used for result */
+	struct page *		*rq_next_page; /* next reply page to use */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
 
 static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 {
-	while (rqstp->rq_resused) {
-		struct page **pp = (rqstp->rq_respages +
-				    --rqstp->rq_resused);
+	while (rqstp->rq_next_page != rqstp->rq_respages) {
+		struct page **pp = --rqstp->rq_next_page;
 		if (*pp) {
 			put_page(*pp);
 			*pp = NULL;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 529400d59755..c6abf1a6ba95 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1297,7 +1297,7 @@ svc_process(struct svc_rqst *rqstp)
 	 * Setup response xdr_buf.
 	 * Initially it has just one page
 	 */
-	rqstp->rq_resused = 1;
+	rqstp->rq_next_page = &rqstp->rq_respages[1];
 	resv->iov_base = page_address(rqstp->rq_respages[0]);
 	resv->iov_len = 0;
 	rqstp->rq_res.pages = rqstp->rq_respages + 1;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d8e5adfeac30..dcd5669c5154 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -601,6 +601,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		rqstp->rq_respages = rqstp->rq_pages + 1 +
 			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
+	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
@@ -1066,6 +1067,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 						svsk->sk_datalen + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 41cb63b623df..0ce75524ed21 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -521,11 +521,11 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 		rqstp->rq_pages[ch_no] = NULL;
 
 	/*
-	 * Detach res pages. svc_release must see a resused count of
-	 * zero or it will attempt to put them.
+	 * Detach res pages. If svc_release sees any it will attempt to
+	 * put them.
 	 */
-	while (rqstp->rq_resused)
-		rqstp->rq_respages[--rqstp->rq_resused] = NULL;
+	while (rqstp->rq_next_page != rqstp->rq_respages)
+		*(--rqstp->rq_next_page) = NULL;
 
 	return err;
 }
@@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_resused = 0;
+	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
 
 	/* Rebuild rq_arg head and tail. */
 	rqstp->rq_arg.head[0] = head->arg.head[0];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 42eb7ba0b903..c1d124dc772b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	int sge_no;
 	int sge_bytes;
 	int page_no;
+	int pages;
 	int ret;
 
 	/* Post a recv buffer to handle another request. */
@@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	 * respages array. They are our pages until the I/O
 	 * completes.
 	 */
-	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
+	pages = rqstp->rq_next_page - rqstp->rq_respages;
+	for (page_no = 0; page_no < pages; page_no++) {
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;

From a1dc6955829f20ad80c1d6a411ecbcf538bb1410 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 17 Dec 2012 18:17:13 -0500
Subject: [PATCH 110/111] nfsd4: free_stateid can use the current stateid

Cc: Tigran Mkrtchyan <kofemann@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2a2d9b06a413..9d1c5dba2bbb 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1766,6 +1766,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
+		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 };

From 24ffb93872f7363a01ad639e3c8a9889b46c3f0a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 12 Dec 2012 15:24:12 -0500
Subject: [PATCH 111/111] nfsd4: don't leave freed stateid hashed

Note the stateid is hashed early on in init_stid(), but isn't currently
being unhashed on error paths.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8e127b39d323..ac8ed96c4199 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2983,6 +2983,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
 	}
 	return;
 out_free:
+	unhash_stid(&dp->dl_stid);
 	nfs4_put_delegation(dp);
 out_no_deleg:
 	flag = NFS4_OPEN_DELEGATE_NONE;