From c5f9d9db83d9f84d2b4aae5a1b29d9b582ccff2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 May 2020 16:12:55 +0100 Subject: [PATCH 01/12] cachefiles: Fix corruption of the return value in cachefiles_read_or_alloc_pages() The patch which changed cachefiles from calling ->bmap() to using the bmap() wrapper overwrote the running return value with the result of calling bmap(). This causes an assertion failure elsewhere in the code. Fix this by using ret2 rather than ret to hold the return value. The oops looks like: kernel BUG at fs/nfs/fscache.c:468! ... RIP: 0010:__nfs_readpages_from_fscache+0x18b/0x190 [nfs] ... Call Trace: nfs_readpages+0xbf/0x1c0 [nfs] ? __alloc_pages_nodemask+0x16c/0x320 read_pages+0x67/0x1a0 __do_page_cache_readahead+0x1cf/0x1f0 ondemand_readahead+0x172/0x2b0 page_cache_async_readahead+0xaa/0xe0 generic_file_buffered_read+0x852/0xd50 ? mem_cgroup_commit_charge+0x6e/0x140 ? nfs4_have_delegation+0x19/0x30 [nfsv4] generic_file_read_iter+0x100/0x140 ? nfs_revalidate_mapping+0x176/0x2b0 [nfs] nfs_file_read+0x6d/0xc0 [nfs] new_sync_read+0x11a/0x1c0 __vfs_read+0x29/0x40 vfs_read+0x8e/0x140 ksys_read+0x61/0xd0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f5d148267e0 Fixes: 10d83e11a582 ("cachefiles: drop direct usage of ->bmap method.") Reported-by: David Wysochanski Signed-off-by: David Howells Tested-by: David Wysochanski cc: Carlos Maiolino --- fs/cachefiles/rdwr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1dc97f2d6201..d3d78176b23c 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -398,7 +398,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, struct inode *inode; sector_t block; unsigned shift; - int ret; + int ret, ret2; object = container_of(op->op.object, struct cachefiles_object, fscache); @@ -430,8 +430,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, block = page->index; block <<= shift; - ret = bmap(inode, &block); - ASSERT(ret < 0); + ret2 = bmap(inode, &block); + ASSERT(ret2 == 0); _debug("%llx -> %llx", (unsigned long long) (page->index << shift), @@ -739,8 +739,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, block = page->index; block <<= shift; - ret = bmap(inode, &block); - ASSERT(!ret); + ret2 = bmap(inode, &block); + ASSERT(ret2 == 0); _debug("%llx -> %llx", (unsigned long long) (page->index << shift), From d9bfced1fbcb35b28d8fbed4e785d2807055ed2b Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 15 Apr 2020 16:14:41 -0400 Subject: [PATCH 02/12] NFS: Fix fscache super_cookie index_key from changing after umount Commit 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") added the index_key and index_key_len parameters to fscache_acquire_cookie(), and updated the callers in the NFS client. One of the callers was inside nfs_fscache_get_super_cookie() and was changed to use the full struct nfs_fscache_key as the index_key. However, a couple members of this structure contain pointers and thus will change each time the same NFS share is remounted. Since index_key is used for fscache_cookie->key_hash and this subsequently is used to compare cookies, the effectiveness of fscache with NFS is reduced to the point at which a umount occurs. Any subsequent remount of the same share will cause a unique NFS super_block index_key and key_hash to be generated for the same data, rendering any prior fscache data unable to be found. A simple reproducer demonstrates the problem. 1. Mount share with 'fsc', create a file, drop page cache systemctl start cachefilesd mount -o vers=3,fsc 127.0.0.1:/export /mnt dd if=/dev/zero of=/mnt/file1.bin bs=4096 count=1 echo 3 > /proc/sys/vm/drop_caches 2. Read file into page cache and fscache, then unmount dd if=/mnt/file1.bin of=/dev/null bs=4096 count=1 umount /mnt 3. Remount and re-read which should come from fscache mount -o vers=3,fsc 127.0.0.1:/export /mnt echo 3 > /proc/sys/vm/drop_caches dd if=/mnt/file1.bin of=/dev/null bs=4096 count=1 4. Check for READ ops in mountstats - there should be none grep READ: /proc/self/mountstats Looking at the history and the removed function, nfs_super_get_key(), we should only use nfs_fscache_key.key plus any uniquifier, for the fscache index_key. Fixes: 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 1abf126c2df4..8eff1fd806b1 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -188,7 +188,8 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - key, sizeof(*key) + ulen, + &key->key, + sizeof(key->key) + ulen, NULL, 0, nfss, 0, true); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", From 15751612734ca0c419ac43ce986c9badcb5e2829 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 15 Apr 2020 16:14:42 -0400 Subject: [PATCH 03/12] NFS: Fix fscache super_cookie allocation Commit f2aedb713c28 ("NFS: Add fs_context support.") reworked NFS mount code paths for fs_context support which included super_block initialization. In the process there was an extra return left in the code and so we never call nfs_fscache_get_super_cookie even if 'fsc' is given on as mount option. In addition, there is an extra check inside nfs_fscache_get_super_cookie for the NFS_OPTION_FSCACHE which is unnecessary since the only caller nfs_get_cache_cookie checks this flag. Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 2 -- fs/nfs/super.c | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 8eff1fd806b1..f51718415606 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -118,8 +118,6 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int nfss->fscache_key = NULL; nfss->fscache = NULL; - if (!(nfss->options & NFS_OPTION_FSCACHE)) - return; if (!uniq) { uniq = ""; ulen = 1; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 59ef3b13ccca..cc34aa3a8ba4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1189,7 +1189,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, uniq = ctx->fscache_uniq; ulen = strlen(ctx->fscache_uniq); } - return; } nfs_fscache_get_super_cookie(sb, uniq, ulen); From 50eaa652b54df1e2b48dc398d9e6114c9ed080eb Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 16 Apr 2020 06:06:08 -0400 Subject: [PATCH 04/12] NFSv4: Fix fscache cookie aux_data to ensure change_attr is included Commit 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") added the aux_data and aux_data_len to parameters to fscache_acquire_cookie(), and updated the callers in the NFS client. In the process it modified the aux_data to include the change_attr, but missed adding change_attr to a couple places where aux_data was used. Specifically, when opening a file and the change_attr is not added, the following attempt to lookup an object will fail inside cachefiles_check_object_xattr() = -116 due to nfs_fscache_inode_check_aux() failing memcmp on auxdata and returning FSCACHE_CHECKAUX_OBSOLETE. Fix this by adding nfs_fscache_update_auxdata() to set the auxdata from all relevant fields in the inode, including the change_attr. Fixes: 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index f51718415606..a60df88efc40 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -225,6 +225,19 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) } } +static void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata, + struct nfs_inode *nfsi) +{ + memset(auxdata, 0, sizeof(*auxdata)); + auxdata->mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata->mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata->ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata->ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata->change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); +} + /* * Initialise the per-inode cache cookie pointer for an NFS inode. */ @@ -238,14 +251,7 @@ void nfs_fscache_init_inode(struct inode *inode) if (!(nfss->fscache && S_ISREG(inode->i_mode))) return; - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; - - if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); + nfs_fscache_update_auxdata(&auxdata, nfsi); nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, &nfs_fscache_inode_object_def, @@ -265,11 +271,7 @@ void nfs_fscache_clear_inode(struct inode *inode) dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + nfs_fscache_update_auxdata(&auxdata, nfsi); fscache_relinquish_cookie(cookie, &auxdata, false); nfsi->fscache = NULL; } @@ -309,11 +311,7 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) if (!fscache_cookie_valid(cookie)) return; - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + nfs_fscache_update_auxdata(&auxdata, nfsi); if (inode_is_open_for_write(inode)) { dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); From 7bb0c5338436dae953622470d52689265867f032 Mon Sep 17 00:00:00 2001 From: Lei Xue Date: Thu, 7 May 2020 08:50:22 -0400 Subject: [PATCH 05/12] cachefiles: Fix race between read_waiter and read_copier involving op->to_do There is a potential race in fscache operation enqueuing for reading and copying multiple pages from cachefiles to netfs. The problem can be seen easily on a heavy loaded system (for example many processes reading files continually on an NFS share covered by fscache triggered this problem within a few minutes). The race is due to cachefiles_read_waiter() adding the op to the monitor to_do list and then then drop the object->work_lock spinlock before completing fscache_enqueue_operation(). Once the lock is dropped, cachefiles_read_copier() grabs the op, completes processing it, and makes it through fscache_retrieval_complete() which sets the op->state to the final state of FSCACHE_OP_ST_COMPLETE(4). When cachefiles_read_waiter() finally gets through the remainder of fscache_enqueue_operation() it sees the invalid state, and hits the ASSERTCMP and the following oops is seen: [ 2259.612361] FS-Cache: [ 2259.614785] FS-Cache: Assertion failed [ 2259.618639] FS-Cache: 4 == 5 is false [ 2259.622456] ------------[ cut here ]------------ [ 2259.627190] kernel BUG at fs/fscache/operation.c:70! ... [ 2259.791675] RIP: 0010:[] [] fscache_enqueue_operation+0xff/0x170 [fscache] [ 2259.802059] RSP: 0000:ffffa0263d543be0 EFLAGS: 00010046 [ 2259.807521] RAX: 0000000000000019 RBX: ffffa01a4d390480 RCX: 0000000000000006 [ 2259.814847] RDX: 0000000000000000 RSI: 0000000000000046 RDI: ffffa0263d553890 [ 2259.822176] RBP: ffffa0263d543be8 R08: 0000000000000000 R09: ffffa0263c2d8708 [ 2259.829502] R10: 0000000000001e7f R11: 0000000000000000 R12: ffffa01a4d390480 [ 2259.844483] R13: ffff9fa9546c5920 R14: ffffa0263d543c80 R15: ffffa0293ff9bf10 [ 2259.859554] FS: 00007f4b6efbd700(0000) GS:ffffa0263d540000(0000) knlGS:0000000000000000 [ 2259.875571] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2259.889117] CR2: 00007f49e1624ff0 CR3: 0000012b38b38000 CR4: 00000000007607e0 [ 2259.904015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2259.918764] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2259.933449] PKRU: 55555554 [ 2259.943654] Call Trace: [ 2259.953592] [ 2259.955577] [] cachefiles_read_waiter+0x92/0xf0 [cachefiles] [ 2259.978039] [] __wake_up_common+0x82/0x120 [ 2259.991392] [] __wake_up_common_lock+0x83/0xc0 [ 2260.004930] [] ? task_rq_unlock+0x20/0x20 [ 2260.017863] [] __wake_up+0x13/0x20 [ 2260.030230] [] __wake_up_bit+0x50/0x70 [ 2260.042535] [] unlock_page+0x2b/0x30 [ 2260.054495] [] page_endio+0x29/0x90 [ 2260.066184] [] mpage_end_io+0x51/0x80 CPU1 cachefiles_read_waiter() 20 static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, 21 int sync, void *_key) 22 { ... 61 spin_lock(&object->work_lock); 62 list_add_tail(&monitor->op_link, &op->to_do); 63 spin_unlock(&object->work_lock); 64 65 fscache_enqueue_retrieval(op); 182 static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) 183 { 184 fscache_enqueue_operation(&op->op); 185 } 58 void fscache_enqueue_operation(struct fscache_operation *op) 59 { 60 struct fscache_cookie *cookie = op->object->cookie; 61 62 _enter("{OBJ%x OP%x,%u}", 63 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 64 65 ASSERT(list_empty(&op->pend_link)); 66 ASSERT(op->processor != NULL); 67 ASSERT(fscache_object_is_available(op->object)); 68 ASSERTCMP(atomic_read(&op->usage), >, 0); CPU2 cachefiles_read_copier() 168 while (!list_empty(&op->to_do)) { ... 202 fscache_end_io(op, monitor->netfs_page, error); 203 put_page(monitor->netfs_page); 204 fscache_retrieval_complete(op, 1); CPU1 58 void fscache_enqueue_operation(struct fscache_operation *op) 59 { ... 69 ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, 70 op->state, ==, FSCACHE_OP_ST_CANCELLED); Signed-off-by: Lei Xue Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/cachefiles/rdwr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index d3d78176b23c..e7726f5f1241 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -60,9 +60,9 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); list_add_tail(&monitor->op_link, &op->to_do); + fscache_enqueue_retrieval(op); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(op); fscache_put_retrieval(op); return 0; } From 31e9a7f353526bbe53165a292f8ea9695ead3168 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 9 May 2020 07:25:11 +1000 Subject: [PATCH 06/12] SUNRPC: fix use-after-free in rpc_free_client_work() Parts of rpc_free_client() were recently moved to a separate rpc_free_clent_work(). This introduced a use-after-free as rpc_clnt_remove_pipedir() calls rpc_net_ns(), and that uses clnt->cl_xprt which has already been freed. So move the call to xprt_put() after the call to rpc_clnt_remove_pipedir(). Reported-by: syzbot+22b5ef302c7c40d94ea8@syzkaller.appspotmail.com Fixes: 7c4310ff5642 ("SUNRPC: defer slow parts of rpc_free_client() to a workqueue.") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8350d3a2e9a7..8d3972ea688b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -890,6 +890,7 @@ static void rpc_free_client_work(struct work_struct *work) */ rpc_clnt_debugfs_unregister(clnt); rpc_clnt_remove_pipedir(clnt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); kfree(clnt); rpciod_down(); @@ -907,7 +908,6 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); rpc_free_clid(clnt); From 29fe839976266bc7c55b927360a1daae57477723 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 May 2020 10:02:48 -0400 Subject: [PATCH 07/12] nfs: fix NULL deference in nfs4_get_valid_delegation We add the new state to the nfsi->open_states list, making it potentially visible to other threads, before we've finished initializing it. That wasn't a problem when all the readers were also taking the i_lock (as we do here), but since we switched to RCU, there's now a possibility that a reader could see the partially initialized state. Symptoms observed were a crash when another thread called nfs4_get_valid_delegation() on a NULL inode, resulting in an oops like: BUG: unable to handle page fault for address: ffffffffffffffb0 ... RIP: 0010:nfs4_get_valid_delegation+0x6/0x30 [nfsv4] ... Call Trace: nfs4_open_prepare+0x80/0x1c0 [nfsv4] __rpc_execute+0x75/0x390 [sunrpc] ? finish_task_switch+0x75/0x260 rpc_async_schedule+0x29/0x40 [sunrpc] process_one_work+0x1ad/0x370 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x10c/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x22/0x30 Fixes: 9ae075fdd190 "NFSv4: Convert open state lookup to use RCU" Reviewed-by: Seiichi Ikarashi Tested-by: Daisuke Matsuda Tested-by: Masayoshi Mizuma Signed-off-by: J. Bruce Fields Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ac93715c05a4..a8dc25ce48bb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -734,9 +734,9 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state = new; state->owner = owner; atomic_inc(&owner->so_count); - list_add_rcu(&state->inode_states, &nfsi->open_states); ihold(inode); state->inode = inode; + list_add_rcu(&state->inode_states, &nfsi->open_states); spin_unlock(&inode->i_lock); /* Note: The reclaim code dictates that we add stateless * and read-only stateids to the end of the list */ From ce99aa62e1eb793e259d023c7f6ccb7c4879917b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 9 May 2020 14:07:13 -0400 Subject: [PATCH 08/12] SUNRPC: Signalled ASYNC tasks need to exit Ensure that signalled ASYNC rpc_tasks exit immediately instead of spinning until a timeout (or forever). To avoid checking for the signal flag on every scheduler iteration, the check is instead introduced in the client's finite state machine. Signed-off-by: Chuck Lever Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d3972ea688b..c74bc402f8c7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + if (RPC_SIGNALLED(task)) { + rpc_call_rpcerror(task, -ERESTARTSYS); + return; + } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; From f304a809a9f1eec23e023f3aae6c768a08495244 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 11 May 2020 10:42:04 -0400 Subject: [PATCH 09/12] NFS: Don't use RPC_TASK_CRED_NOREF with delegreturn We are not guaranteed that the credential will remain pinned. Fixes: 612965072020 ("NFSv4: Avoid referencing the cred unnecessarily during NFSv4 I/O") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0c1e653a935..9056f3dd380e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6347,7 +6347,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_delegreturn_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; int status = 0; From 4fa7ef69e2918eb89c95e57978d839c0a31f332c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 May 2020 09:55:36 -0400 Subject: [PATCH 10/12] NFS/pnfs: Don't use RPC_TASK_CRED_NOREF with pnfs When we're doing pnfs then the credential being used for the RPC call is not necessarily the same as the one used in the open context, so don't use RPC_TASK_CRED_NOREF. Fixes: 612965072020 ("NFSv4: Avoid referencing the cred unnecessarily during NFSv4 I/O") Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 +++-- fs/nfs/pnfs_nfs.c | 3 ++- fs/nfs/write.c | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f61f96603df7..6ca421cbe19c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -752,7 +752,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, .callback_ops = call_ops, .callback_data = hdr, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | flags, + .flags = RPC_TASK_ASYNC | flags, }; hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); @@ -950,7 +950,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) hdr->cred, NFS_PROTO(hdr->inode), desc->pg_rpc_callops, - desc->pg_ioflags, 0); + desc->pg_ioflags, + RPC_TASK_CRED_NOREF); return ret; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index e7ddbce48321..679767ac258d 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -536,7 +536,8 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_init_commit(data, NULL, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(data->inode), - data->mds_ops, how, 0); + data->mds_ops, how, + RPC_TASK_CRED_NOREF); } else { nfs_init_commit(data, NULL, data->lseg, cinfo); initiate_commit(data, how); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index df4b87c30ac9..1e767f779c49 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1695,7 +1695,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | flags, + .flags = RPC_TASK_ASYNC | flags, .priority = priority, }; /* Set up the initial task struct. */ @@ -1813,7 +1813,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), - data->mds_ops, how, 0); + data->mds_ops, how, RPC_TASK_CRED_NOREF); } /* From 933496e9cc02cda16ca8f0c38e56204376d6b0b2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 May 2020 18:38:40 -0400 Subject: [PATCH 11/12] SUNRPC: 'Directory with parent 'rpc_clnt' already present!' Each rpc_client has a cl_clid which is allocated from a global ida, and a debugfs directory which is named after cl_clid. We're releasing the cl_clid before we free the debugfs directory named after it. As soon as the cl_clid is released, that value is available for another newly created client. That leaves a window where another client may attempt to create a new debugfs directory with the same name as the not-yet-deleted debugfs directory from the dying client. Symptoms are log messages like Directory 4 with parent 'rpc_clnt' already present! Fixes: 7c4310ff5642 "SUNRPC: defer slow parts of rpc_free_client() to a workqueue." Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c74bc402f8c7..61b21dafd7c0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -889,6 +889,7 @@ static void rpc_free_client_work(struct work_struct *work) * here. */ rpc_clnt_debugfs_unregister(clnt); + rpc_free_clid(clnt); rpc_clnt_remove_pipedir(clnt); xprt_put(rcu_dereference_raw(clnt->cl_xprt)); @@ -910,7 +911,6 @@ rpc_free_client(struct rpc_clnt *clnt) clnt->cl_metrics = NULL; xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); - rpc_free_clid(clnt); INIT_WORK(&clnt->cl_work, rpc_free_client_work); schedule_work(&clnt->cl_work); From 8eed292bc8cbf737e46fb1c119d4c8f6dcb00650 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 14 May 2020 16:09:40 -0400 Subject: [PATCH 12/12] NFSv3: fix rpc receive buffer size for MOUNT call Prior to commit e3d3ab64dd66 ("SUNRPC: Use au_rslack when computing reply buffer size"), there was enough slack in the reply buffer to commodate filehandles of size 60bytes. However, the real problem was that the reply buffer size for the MOUNT operation was not correctly calculated. Received buffer size used the filehandle size for NFSv2 (32bytes) which is much smaller than the allowed filehandle size for the v3 mounts. Fix the reply buffer size (decode arguments size) for the MNT command. Fixes: 2c94b8eca1a2 ("SUNRPC: Use au_rslack when computing reply buffer size") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 35c8cb2d7637..dda5c3e65d8d 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -30,6 +30,7 @@ #define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) #define MNT_status_sz (1) #define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) +#define MNT_fhandlev3_sz XDR_QUADLEN(NFS3_FHSIZE) #define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) /* @@ -37,7 +38,7 @@ */ #define MNT_enc_dirpath_sz encode_dirpath_sz #define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz) -#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \ +#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandlev3_sz + \ MNT_authflav3_sz) /*