nfsd4: allow large readdirs

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
This commit is contained in:
J. Bruce Fields 2014-01-20 16:37:11 -05:00
parent 32aaa62ede
commit 561f0ed498
3 changed files with 82 additions and 69 deletions

View File

@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount; u32 rlen = op->u.readdir.rd_maxcount;
if (rlen > PAGE_SIZE) if (rlen > maxcount)
rlen = PAGE_SIZE; rlen = maxcount;
return (op_encode_hdr_size + op_encode_verifier_maxsz) return (op_encode_hdr_size + op_encode_verifier_maxsz +
* sizeof(__be32) + rlen; XDR_QUADLEN(rlen)) * sizeof(__be32);
} }
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)

View File

@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
} }
static __be32 static __be32
nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
const char *name, int namlen, __be32 **p, int buflen) const char *name, int namlen)
{ {
struct svc_export *exp = cd->rd_fhp->fh_export; struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry; struct dentry *dentry;
@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
} }
out_encode: out_encode:
nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
cd->rd_bmval,
cd->rd_rqstp, ignore_crossmnt); cd->rd_rqstp, ignore_crossmnt);
out_put: out_put:
dput(dentry); dput(dentry);
@ -2638,9 +2637,12 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
} }
static __be32 * static __be32 *
nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{ {
if (buflen < 6) __be32 *p;
p = xdr_reserve_space(xdr, 6);
if (!p)
return NULL; return NULL;
*p++ = htonl(2); *p++ = htonl(2);
*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
{ {
struct readdir_cd *ccd = ccdv; struct readdir_cd *ccd = ccdv;
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
int buflen; struct xdr_stream *xdr = cd->xdr;
__be32 *p = cd->buffer; int start_offset = xdr->buf->len;
__be32 *cookiep; int cookie_offset;
int entry_bytes;
__be32 nfserr = nfserr_toosmall; __be32 nfserr = nfserr_toosmall;
__be64 wire_offset;
__be32 *p;
/* In nfsv4, "." and ".." never make it onto the wire.. */ /* In nfsv4, "." and ".." never make it onto the wire.. */
if (name && isdotent(name, namlen)) { if (name && isdotent(name, namlen)) {
@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return 0; return 0;
} }
if (cd->offset) if (cd->cookie_offset) {
xdr_encode_hyper(cd->offset, (u64) offset); wire_offset = cpu_to_be64(offset);
write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
&wire_offset, 8);
}
buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); p = xdr_reserve_space(xdr, 4);
if (buflen < 0) if (!p)
goto fail; goto fail;
*p++ = xdr_one; /* mark entry present */ *p++ = xdr_one; /* mark entry present */
cookiep = p; cookie_offset = xdr->buf->len;
p = xdr_reserve_space(xdr, 3*4 + namlen);
if (!p)
goto fail;
p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
p = xdr_encode_array(p, name, namlen); /* name length & name */ p = xdr_encode_array(p, name, namlen); /* name length & name */
nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
switch (nfserr) { switch (nfserr) {
case nfs_ok: case nfs_ok:
break; break;
@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
*/ */
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
goto fail; goto fail;
p = nfsd4_encode_rdattr_error(p, buflen, nfserr); p = nfsd4_encode_rdattr_error(xdr, nfserr);
if (p == NULL) { if (p == NULL) {
nfserr = nfserr_toosmall; nfserr = nfserr_toosmall;
goto fail; goto fail;
} }
} }
cd->buflen -= (p - cd->buffer); nfserr = nfserr_toosmall;
cd->buffer = p; entry_bytes = xdr->buf->len - start_offset;
cd->offset = cookiep; if (entry_bytes > cd->rd_maxcount)
goto fail;
cd->rd_maxcount -= entry_bytes;
cd->cookie_offset = cookie_offset;
skip_entry: skip_entry:
cd->common.err = nfs_ok; cd->common.err = nfs_ok;
return 0; return 0;
fail: fail:
xdr_truncate_encode(xdr, start_offset);
cd->common.err = nfserr; cd->common.err = nfserr;
return -EINVAL; return -EINVAL;
} }
@ -3206,10 +3220,11 @@ static __be32
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
{ {
int maxcount; int maxcount;
int bytes_left;
loff_t offset; loff_t offset;
__be64 wire_offset;
struct xdr_stream *xdr = &resp->xdr; struct xdr_stream *xdr = &resp->xdr;
int starting_len = xdr->buf->len; int starting_len = xdr->buf->len;
__be32 *page, *tailbase;
__be32 *p; __be32 *p;
if (nfserr) if (nfserr)
@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (!p) if (!p)
return nfserr_resource; return nfserr_resource;
if (resp->xdr.buf->page_len)
return nfserr_resource;
if (!*resp->rqstp->rq_next_page)
return nfserr_resource;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
WRITE32(0); WRITE32(0);
WRITE32(0); WRITE32(0);
resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
- (char *)resp->xdr.buf->head[0].iov_base; - (char *)resp->xdr.buf->head[0].iov_base;
tailbase = p;
maxcount = PAGE_SIZE;
if (maxcount > readdir->rd_maxcount)
maxcount = readdir->rd_maxcount;
/* /*
* Convert from bytes to words, account for the two words already * Number of bytes left for directory entries allowing for the
* written, make sure to leave two words at the end for the next * final 8 bytes of the readdir and a following failed op:
* pointer and eof field.
*/ */
maxcount = (maxcount >> 2) - 4; bytes_left = xdr->buf->buflen - xdr->buf->len
if (maxcount < 0) { - COMPOUND_ERR_SLACK_SPACE - 8;
if (bytes_left < 0) {
nfserr = nfserr_resource;
goto err_no_verf;
}
maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
/*
* Note the rfc defines rd_maxcount as the size of the
* READDIR4resok structure, which includes the verifier above
* and the 8 bytes encoded at the end of this function:
*/
if (maxcount < 16) {
nfserr = nfserr_toosmall; nfserr = nfserr_toosmall;
goto err_no_verf; goto err_no_verf;
} }
maxcount = min_t(int, maxcount-16, bytes_left);
page = page_address(*(resp->rqstp->rq_next_page++)); readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
readdir->common.err = 0; readdir->common.err = 0;
readdir->buflen = maxcount; readdir->cookie_offset = 0;
readdir->buffer = page;
readdir->offset = NULL;
offset = readdir->rd_cookie; offset = readdir->rd_cookie;
nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
&readdir->common, nfsd4_encode_dirent); &readdir->common, nfsd4_encode_dirent);
if (nfserr == nfs_ok && if (nfserr == nfs_ok &&
readdir->common.err == nfserr_toosmall && readdir->common.err == nfserr_toosmall &&
readdir->buffer == page) xdr->buf->len == starting_len + 8) {
/* nothing encoded; which limit did we hit?: */
if (maxcount - 16 < bytes_left)
/* It was the fault of rd_maxcount: */
nfserr = nfserr_toosmall; nfserr = nfserr_toosmall;
else
/* We ran out of buffer space: */
nfserr = nfserr_resource;
}
if (nfserr) if (nfserr)
goto err_no_verf; goto err_no_verf;
if (readdir->offset) if (readdir->cookie_offset) {
xdr_encode_hyper(readdir->offset, offset); wire_offset = cpu_to_be64(offset);
write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
&wire_offset, 8);
}
p = readdir->buffer; p = xdr_reserve_space(xdr, 8);
if (!p) {
WARN_ON_ONCE(1);
goto err_no_verf;
}
*p++ = 0; /* no more entries */ *p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof); *p++ = htonl(readdir->common.err == nfserr_eof);
resp->xdr.buf->page_len = ((char *)p) -
(char*)page_address(*(resp->rqstp->rq_next_page-1));
xdr->buf->len += xdr->buf->page_len;
xdr->iov = xdr->buf->tail;
xdr->page_ptr++;
xdr->buf->buflen -= PAGE_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
resp->xdr.buf->tail[0].iov_base = tailbase;
resp->xdr.buf->tail[0].iov_len = 0;
resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
resp->xdr.end = resp->xdr.p +
(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
return 0; return 0;
err_no_verf: err_no_verf:

View File

@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */ struct svc_fh * rd_fhp; /* response */
struct readdir_cd common; struct readdir_cd common;
__be32 * buffer; struct xdr_stream *xdr;
int buflen; int cookie_offset;
__be32 * offset;
}; };
struct nfsd4_release_lockowner { struct nfsd4_release_lockowner {