Miscellaneous NFS server fixes. Probably the most visible bug is one

that could artificially limit NFSv4.1 performance by limiting the number
 of oustanding rpcs from a single client.  Neil Brown also gets a special
 mention for fixing a 14.5-year-old memory-corruption bug in the encoding
 of NFSv3 readdir responses.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJch/d7AAoJECebzXlCjuG+VzkP/0AlSz0+vC/65oyqxAtvsn4z
 xxp6p+PcjuP7NhbImA9zO0d27rpkVFiY0vIArbcwI4R/PsqjRu8kW+dYzK2QAkY2
 pzlV2aXxB9es16Vit5M+rvM4nwSsiYdnmN7kd4ntr8DKxtzePoCSL0/kGnbVE8tt
 bnI5VhbWdd2kekR8xjRFhbHEQm2X6238K4Rv0wRc2o117GSBZCaHh7sgAdZ8BfFt
 lJET9iOGagrusVWP0Cy6+Jomg0SzyPjcwn6WuBM7MRyn9Viso+ZEOkvnxVtlX+uZ
 Z4GjbfAxC+7PKajovEW+/zIOjFgHYwNpQEsqGWAALsXXRj9vLgb0+a1DRWFCAj5n
 KkWxPMcqqxpinhluLjw+r8U90KaA3DqewJdmmSol0IjM+mvVjhUuzhvCaMOhTxJM
 WG3YZKhZzurQyp+q79xl2MycZ1QWDEDfEf7LD7MAfL88MSVbmYTM/744B4A61LZ4
 Ap9p8Jy5LeT6SgSWUh9QBxMIrab4dY0L4xUs0e0s9VE62oS3R62BojTzXSXcztNI
 UPwabKhQHnsNMAZoPlMoG5UjtCFcTHM24+NDYveWyi55dBwDq8q0FnAlKSevE1pi
 BC3WtWtnLeL1YgAMH7TLjpTUcl6jVZLHuvELW/2Lz5o0zyo8qlgF3sC7eOtGNB4j
 LQYmnf67aG+dGvZ91YPc
 =ntUa
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-5.1' of git://linux-nfs.org/~bfields/linux

Pull NFS server updates from Bruce Fields:
 "Miscellaneous NFS server fixes.

  Probably the most visible bug is one that could artificially limit
  NFSv4.1 performance by limiting the number of oustanding rpcs from a
  single client.

  Neil Brown also gets a special mention for fixing a 14.5-year-old
  memory-corruption bug in the encoding of NFSv3 readdir responses"

* tag 'nfsd-5.1' of git://linux-nfs.org/~bfields/linux:
  nfsd: allow nfsv3 readdir request to be larger.
  nfsd: fix wrong check in write_v4_end_grace()
  nfsd: fix memory corruption caused by readdir
  nfsd: fix performance-limiting session calculation
  svcrpc: fix UDP on servers with lots of threads
  svcrdma: Remove syslog warnings in work completion handlers
  svcrdma: Squelch compiler warning when SUNRPC_DEBUG is disabled
  svcrdma: Use struct_size() in kmalloc()
  svcrpc: fix unlikely races preventing queueing of sockets
  svcrpc: svc_xprt_has_something_to_do seems a little long
  SUNRPC: Don't allow compiler optimisation of svc_xprt_release_slot()
  nfsd: fix an IS_ERR() vs NULL check
This commit is contained in:
Linus Torvalds 2019-03-12 15:06:54 -07:00
commit ebc551f2b8
11 changed files with 65 additions and 58 deletions

View File

@ -463,8 +463,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
&resp->common, nfs3svc_encode_entry); &resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8); memcpy(resp->verf, argp->verf, 8);
resp->count = resp->buffer - argp->buffer; resp->count = resp->buffer - argp->buffer;
if (resp->offset) if (resp->offset) {
xdr_encode_hyper(resp->offset, argp->cookie); loff_t offset = argp->cookie;
if (unlikely(resp->offset1)) {
/* we ended up with offset on a page boundary */
*resp->offset = htonl(offset >> 32);
*resp->offset1 = htonl(offset & 0xffffffff);
resp->offset1 = NULL;
} else {
xdr_encode_hyper(resp->offset, offset);
}
resp->offset = NULL;
}
RETURN_STATUS(nfserr); RETURN_STATUS(nfserr);
} }
@ -533,6 +544,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
} else { } else {
xdr_encode_hyper(resp->offset, offset); xdr_encode_hyper(resp->offset, offset);
} }
resp->offset = NULL;
} }
RETURN_STATUS(nfserr); RETURN_STATUS(nfserr);
@ -576,7 +588,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
resp->f_wtmax = max_blocksize; resp->f_wtmax = max_blocksize;
resp->f_wtpref = max_blocksize; resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE; resp->f_wtmult = PAGE_SIZE;
resp->f_dtpref = PAGE_SIZE; resp->f_dtpref = max_blocksize;
resp->f_maxfilesize = ~(u32) 0; resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT; resp->f_properties = NFS3_FSF_DEFAULT;

View File

@ -573,6 +573,8 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd3_readdirargs *args = rqstp->rq_argp; struct nfsd3_readdirargs *args = rqstp->rq_argp;
u32 max_blocksize = svc_max_payload(rqstp);
p = decode_fh(p, &args->fh); p = decode_fh(p, &args->fh);
if (!p) if (!p)
return 0; return 0;
@ -580,7 +582,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
args->verf = p; p += 2; args->verf = p; p += 2;
args->dircount = ~0; args->dircount = ~0;
args->count = ntohl(*p++); args->count = ntohl(*p++);
args->count = min_t(u32, args->count, PAGE_SIZE); args->count = min_t(u32, args->count, max_blocksize);
args->buffer = page_address(*(rqstp->rq_next_page++)); args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
@ -921,6 +923,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
} else { } else {
xdr_encode_hyper(cd->offset, offset64); xdr_encode_hyper(cd->offset, offset64);
} }
cd->offset = NULL;
} }
/* /*

View File

@ -900,9 +900,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
return PTR_ERR(client); return PTR_ERR(client);
} }
cred = get_backchannel_cred(clp, client, ses); cred = get_backchannel_cred(clp, client, ses);
if (IS_ERR(cred)) { if (!cred) {
rpc_shutdown_client(client); rpc_shutdown_client(client);
return PTR_ERR(cred); return -ENOMEM;
} }
clp->cl_cb_client = client; clp->cl_cb_client = client;
clp->cl_cb_cred = cred; clp->cl_cb_cred = cred;

View File

@ -1544,16 +1544,16 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
{ {
u32 slotsize = slot_bytes(ca); u32 slotsize = slot_bytes(ca);
u32 num = ca->maxreqs; u32 num = ca->maxreqs;
int avail; unsigned long avail, total_avail;
spin_lock(&nfsd_drc_lock); spin_lock(&nfsd_drc_lock);
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
nfsd_drc_max_mem - nfsd_drc_mem_used); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/* /*
* Never use more than a third of the remaining memory, * Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot: * unless it's the only way to give this client a slot:
*/ */
avail = clamp_t(int, avail, slotsize, avail/3); avail = clamp_t(int, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize); num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize; nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock); spin_unlock(&nfsd_drc_lock);

View File

@ -1126,7 +1126,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y': case 'Y':
case 'y': case 'y':
case '1': case '1':
if (nn->nfsd_serv) if (!nn->nfsd_serv)
return -EBUSY; return -EBUSY;
nfsd4_end_grace(nn); nfsd4_end_grace(nn);
break; break;

View File

@ -357,15 +357,29 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp)
struct svc_xprt *xprt = rqstp->rq_xprt; struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) { if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
atomic_dec(&xprt->xpt_nr_rqsts); atomic_dec(&xprt->xpt_nr_rqsts);
smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt); svc_xprt_enqueue(xprt);
} }
} }
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) static bool svc_xprt_ready(struct svc_xprt *xprt)
{ {
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE))) unsigned long xpt_flags;
/*
* If another cpu has recently updated xpt_flags,
* sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to
* know about it; otherwise it's possible that both that cpu and
* this one could call svc_xprt_enqueue() without either
* svc_xprt_enqueue() recognizing that the conditions below
* are satisfied, and we could stall indefinitely:
*/
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true; return true;
if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) { if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) && if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
svc_xprt_slots_in_range(xprt)) svc_xprt_slots_in_range(xprt))
return true; return true;
@ -381,7 +395,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_rqst *rqstp = NULL; struct svc_rqst *rqstp = NULL;
int cpu; int cpu;
if (!svc_xprt_has_something_to_do(xprt)) if (!svc_xprt_ready(xprt))
return; return;
/* Mark transport as busy. It will remain in this state until /* Mark transport as busy. It will remain in this state until
@ -475,7 +489,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
if (xprt && space < rqstp->rq_reserved) { if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space; rqstp->rq_reserved = space;
smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt); svc_xprt_enqueue(xprt);
} }
} }

View File

@ -349,12 +349,16 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
/* /*
* Set socket snd and rcv buffer lengths * Set socket snd and rcv buffer lengths
*/ */
static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
unsigned int rcv)
{ {
unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
struct socket *sock = svsk->sk_sock;
nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
lock_sock(sock->sk); lock_sock(sock->sk);
sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
sock->sk->sk_write_space(sock->sk); sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk); release_sock(sock->sk);
} }
@ -516,9 +520,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* provides an upper bound on the number of threads * provides an upper bound on the number of threads
* which will access the socket. * which will access the socket.
*/ */
svc_sock_setbufsize(svsk->sk_sock, svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
(serv->sv_nrthreads+3) * serv->sv_max_mesg,
(serv->sv_nrthreads+3) * serv->sv_max_mesg);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL; skb = NULL;
@ -681,9 +683,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
* receive and respond to one request. * receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary * svc_udp_recvfrom will re-adjust if necessary
*/ */
svc_sock_setbufsize(svsk->sk_sock, svc_sock_setbufsize(svsk, 3);
3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
/* data might have come in before data_ready set up */ /* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);

View File

@ -272,11 +272,8 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
return false; return false;
ctxt->rc_temp = true; ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt); ret = __svc_rdma_post_recv(rdma, ctxt);
if (ret) { if (ret)
pr_err("svcrdma: failure posting recv buffers: %d\n",
ret);
return false; return false;
}
} }
return true; return true;
} }
@ -314,17 +311,14 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_lock(&rdma->sc_rq_dto_lock); spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
spin_unlock(&rdma->sc_rq_dto_lock); /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt); svc_xprt_enqueue(&rdma->sc_xprt);
goto out; goto out;
flushed: flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
post_err: post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt); svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);

View File

@ -64,8 +64,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
spin_unlock(&rdma->sc_rw_ctxt_lock); spin_unlock(&rdma->sc_rw_ctxt_lock);
} else { } else {
spin_unlock(&rdma->sc_rw_ctxt_lock); spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(sizeof(*ctxt) + ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
SG_CHUNK_SIZE * sizeof(struct scatterlist),
GFP_KERNEL); GFP_KERNEL);
if (!ctxt) if (!ctxt)
goto out; goto out;
@ -213,13 +212,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait); wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS)) { if (unlikely(wc->status != IB_WC_SUCCESS))
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
}
svc_rdma_write_info_free(info); svc_rdma_write_info_free(info);
} }
@ -278,18 +272,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) { if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt); svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else { } else {
spin_lock(&rdma->sc_rq_dto_lock); spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&info->ri_readctxt->rc_list, list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q); &rdma->sc_read_complete_q);
/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock); spin_unlock(&rdma->sc_rq_dto_lock);
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt); svc_xprt_enqueue(&rdma->sc_xprt);
} }

View File

@ -272,10 +272,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) { if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt); svc_xprt_enqueue(&rdma->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
} }
svc_xprt_put(&rdma->sc_xprt); svc_xprt_put(&rdma->sc_xprt);

View File

@ -390,8 +390,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr; struct ib_qp_init_attr qp_attr;
unsigned int ctxts, rq_depth; unsigned int ctxts, rq_depth;
struct ib_device *dev; struct ib_device *dev;
struct sockaddr *sap;
int ret = 0; int ret = 0;
RPC_IFDEBUG(struct sockaddr *sap);
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags); clear_bit(XPT_CONN, &xprt->xpt_flags);
@ -525,6 +525,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (ret) if (ret)
goto errout; goto errout;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt); dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
@ -535,6 +536,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", conn_param.initiator_depth); dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif
trace_svcrdma_xprt_accept(&newxprt->sc_xprt); trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt; return &newxprt->sc_xprt;
@ -588,11 +590,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp); ib_drain_qp(rdma->sc_qp);
/* We should only be called from kref_put */
if (kref_read(&xprt->xpt_ref) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
kref_read(&xprt->xpt_ref));
svc_rdma_flush_recv_queues(rdma); svc_rdma_flush_recv_queues(rdma);
/* Final put of backchannel client transport */ /* Final put of backchannel client transport */